go to gmp 4.3.2baserock/pedroalvarez/gcc-5.3.0-gmp432

author: Pedro Alvarez <pedro.alvarez@codethink.co.uk> 2016-05-27 17:39:31 +0100
committer: Pedro Alvarez <pedro.alvarez@codethink.co.uk> 2016-05-27 17:53:32 +0100
commit: 26c75cf8267919f81a1759c9c965a52c660233f9 (patch)
tree: cf2a39cf56c2c8ac45760854413ab233e6263974 /gmp/mpn
parent: 56892c1d217baea02092b51a09bbc924130ca84c (diff)
download: gcc-tarball-baserock/pedroalvarez/gcc-5.3.0-gmp432.tar.gz
1227 files changed, 30206 insertions, 116133 deletions
diff --git a/gmp/mpn/Makeasm.am b/gmp/mpn/Makeasm.am
index 5d7306c221..bb66700384 100644
--- a/gmp/mpn/Makeasm.am
+++ b/gmp/mpn/Makeasm.am
@@ -1,32 +1,22 @@
 ## Automake asm file rules.
 
-# Copyright 1996, 1998-2002 Free Software Foundation, Inc.
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # COMPILE minus CC.
diff --git a/gmp/mpn/Makefile.am b/gmp/mpn/Makefile.am
index 20b8a4a116..073b89e988 100644
--- a/gmp/mpn/Makefile.am
+++ b/gmp/mpn/Makefile.am
@@ -1,32 +1,22 @@
 ## Process this file with automake to generate Makefile.in
 
-# Copyright 1996, 1998-2002, 2005, 2011, 2013 Free Software Foundation, Inc.
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2005 Free Software Foundation,
+# Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
@@ -34,14 +24,42 @@ INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
 
 OFILES = @mpn_objects@
 
+
+# All possible mpn normal and optional function files are listed here, to
+# get automake to generate ansi2knr rules for each.  Such rules will be
+# ignored for any that are instead implemented with a .asm (or whatever) for
+# a particular target.
+#
+nodist_EXTRA_libmpn_la_SOURCES =					    \
+  add.c add_1.c add_n.c							    \
+  addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c	    \
+  addmul_7.c addmul_8.c							    \
+  and_n.c andn_n.c bdivmod.c						    \
+  cmp.c com_n.c copyd.c copyi.c						    \
+  dc_divrem_n.c dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c   \
+  dump.c fib2_ui.c gcd.c						    \
+  gcd_1.c gcdext.c get_d.c get_str.c					    \
+  hamdist.c hgcd2.c hgcd.c invert_limb.c				    \
+  ior_n.c iorn_n.c jacbase.c lshift.c \
+  matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c	    \
+  mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c				    \
+  mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c    \
+  mul_toom22.c mul_toom32.c mul_toom42.c				    \
+  mullow_n.c mullow_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c	    \
+  popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
+  rootrem.c sb_divrem_mn.c scan0.c scan1.c set_str.c			    \
+  sqr_basecase.c sqr_diagonal.c						    \
+  sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c				    \
+  tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
+
 noinst_LTLIBRARIES = libmpn.la
 nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
 libmpn_la_LIBADD = $(OFILES)
 libmpn_la_DEPENDENCIES = $(OFILES)
 
-TARG_DIST = alpha arm arm64 cray generic ia64 lisp m68k m88k \
-  minithres mips32 mips64 pa32 pa64 power powerpc32 powerpc64 \
-  s390_32 s390_64 sh sparc32 sparc64 thumb vax x86 x86_64
+TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
+  minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr s390 \
+  sh sparc32 sparc64 thumb vax x86 x86_64 z8000 z8000x
 
 EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
 
@@ -56,4 +74,7 @@ mp_bases.c:
 perfsqr.h:
 	cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
 
+tune-gcd-p: gcd.c
+	$(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
+
 include Makeasm.am
diff --git a/gmp/mpn/Makefile.in b/gmp/mpn/Makefile.in
index 099abf26ab..e9817864fc 100644
--- a/gmp/mpn/Makefile.in
+++ b/gmp/mpn/Makefile.in
@@ -1,9 +1,8 @@
-# Makefile.in generated by automake 1.11.6 from Makefile.am.
+# Makefile.in generated by automake 1.8.4 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
+# 2003, 2004  Free Software Foundation, Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -15,85 +14,53 @@
 
 @SET_MAKE@
 
-# Copyright 1996, 1998-2002, 2005, 2011, 2013 Free Software Foundation, Inc.
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2005 Free Software Foundation,
+# Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-# Copyright 1996, 1998-2002 Free Software Foundation, Inc.
-#
-#  This file is part of the GNU MP Library.
-#
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# This file is part of the GNU MP Library.
 #
-#  or
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+SOURCES = $(nodist_libmpn_la_SOURCES) $(nodist_EXTRA_libmpn_la_SOURCES)
 
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
 VPATH = @srcdir@
-am__make_dryrun = \
-  { \
-    am__dry=no; \
-    case $$MAKEFLAGS in \
-      *\\[\ \	]*) \
-        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
-          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
-      *) \
-        for am__flg in $$MAKEFLAGS; do \
-          case $$am__flg in \
-            *=*|--*) ;; \
-            *n*) am__dry=yes; break;; \
-          esac; \
-        done;; \
-    esac; \
-    test $$am__dry = yes; \
-  }
 pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
 pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
 am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
 install_sh_DATA = $(install_sh) -c -m 644
 install_sh_PROGRAM = $(install_sh) -c
 install_sh_SCRIPT = $(install_sh) -c
@@ -105,43 +72,37 @@ POST_INSTALL = :
 NORMAL_UNINSTALL = :
 PRE_UNINSTALL = :
 POST_UNINSTALL = :
-build_triplet = @build@
 host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
 DIST_COMMON = README $(srcdir)/Makeasm.am $(srcdir)/Makefile.am \
 	$(srcdir)/Makefile.in
 subdir = mpn
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-	$(top_srcdir)/configure.ac
+	$(top_srcdir)/configure.in
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
+mkinstalldirs = $(mkdir_p)
 CONFIG_HEADER = $(top_builddir)/config.h
 CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
 LTLIBRARIES = $(noinst_LTLIBRARIES)
 am__DEPENDENCIES_1 =
-nodist_libmpn_la_OBJECTS = fib_table.lo mp_bases.lo
+nodist_libmpn_la_OBJECTS = fib_table$U.lo mp_bases$U.lo
 libmpn_la_OBJECTS = $(nodist_libmpn_la_OBJECTS)
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
 depcomp =
 am__depfiles_maybe =
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
 	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
 CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
-SOURCES = $(nodist_libmpn_la_SOURCES)
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+SOURCES = $(nodist_libmpn_la_SOURCES) \
+	$(nodist_EXTRA_libmpn_la_SOURCES)
 DIST_SOURCES =
-am__can_run_installinfo = \
-  case $$AM_UPDATE_INFO_DIR in \
-    n|no|NO) false;; \
-    *) (install-info --version) >/dev/null 2>&1;; \
-  esac
 ETAGS = etags
 CTAGS = ctags
 DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
@@ -155,6 +116,7 @@ AUTOCONF = @AUTOCONF@
 AUTOHEADER = @AUTOHEADER@
 AUTOMAKE = @AUTOMAKE@
 AWK = @AWK@
+BITS_PER_MP_LIMB = @BITS_PER_MP_LIMB@
 CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
 CC = @CC@
 CCAS = @CCAS@
@@ -170,17 +132,16 @@ CYGPATH_W = @CYGPATH_W@
 DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
 DEFS = @DEFS@
 DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
+ECHO = @ECHO@
 ECHO_C = @ECHO_C@
 ECHO_N = @ECHO_N@
 ECHO_T = @ECHO_T@
 EGREP = @EGREP@
+ENABLE_STATIC_FALSE = @ENABLE_STATIC_FALSE@
+ENABLE_STATIC_TRUE = @ENABLE_STATIC_TRUE@
 EXEEXT = @EXEEXT@
 EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
 GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
 GMP_NAIL_BITS = @GMP_NAIL_BITS@
 GREP = @GREP@
 HAVE_CLOCK_01 = @HAVE_CLOCK_01@
@@ -194,12 +155,10 @@ HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
 HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
 HAVE_STACK_T_01 = @HAVE_STACK_T_01@
 HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
 INSTALL_DATA = @INSTALL_DATA@
 INSTALL_PROGRAM = @INSTALL_PROGRAM@
 INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
 LDFLAGS = @LDFLAGS@
 LEX = @LEX@
 LEXLIB = @LEXLIB@
@@ -214,26 +173,20 @@ LIBOBJS = @LIBOBJS@
 LIBREADLINE = @LIBREADLINE@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 M4 = @M4@
 MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
 MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
 OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
 PACKAGE = @PACKAGE@
 PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
 PACKAGE_NAME = @PACKAGE_NAME@
 PACKAGE_STRING = @PACKAGE_STRING@
 PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
 PACKAGE_VERSION = @PACKAGE_VERSION@
 PATH_SEPARATOR = @PATH_SEPARATOR@
 RANLIB = @RANLIB@
@@ -243,31 +196,26 @@ SHELL = @SHELL@
 SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
 STRIP = @STRIP@
 TAL_OBJECT = @TAL_OBJECT@
-TUNE_LIBS = @TUNE_LIBS@
 TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
 U_FOR_BUILD = @U_FOR_BUILD@
 VERSION = @VERSION@
+WANT_CXX_FALSE = @WANT_CXX_FALSE@
+WANT_CXX_TRUE = @WANT_CXX_TRUE@
+WANT_MPBSD_FALSE = @WANT_MPBSD_FALSE@
+WANT_MPBSD_TRUE = @WANT_MPBSD_TRUE@
 WITH_READLINE_01 = @WITH_READLINE_01@
 YACC = @YACC@
 YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
 ac_ct_CC = @ac_ct_CC@
 ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
 am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
 bindir = @bindir@
 build = @build@
 build_alias = @build_alias@
 build_cpu = @build_cpu@
 build_os = @build_os@
 build_vendor = @build_vendor@
-builddir = @builddir@
 datadir = @datadir@
 datarootdir = @datarootdir@
 docdir = @docdir@
@@ -291,6 +239,7 @@ mandir = @mandir@
 mkdir_p = @mkdir_p@
 mpn_objects = @mpn_objects@
 mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
 oldincludedir = @oldincludedir@
 pdfdir = @pdfdir@
 prefix = @prefix@
@@ -298,23 +247,47 @@ program_transform_name = @program_transform_name@
 psdir = @psdir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
 sysconfdir = @sysconfdir@
 target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
 INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir) \
   -DOPERATION_`echo $* | sed 's/_$$//'`
 
 OFILES = @mpn_objects@
+
+# All possible mpn normal and optional function files are listed here, to
+# get automake to generate ansi2knr rules for each.  Such rules will be
+# ignored for any that are instead implemented with a .asm (or whatever) for
+# a particular target.
+#
+nodist_EXTRA_libmpn_la_SOURCES = \
+  add.c add_1.c add_n.c							    \
+  addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c	    \
+  addmul_7.c addmul_8.c							    \
+  and_n.c andn_n.c bdivmod.c						    \
+  cmp.c com_n.c copyd.c copyi.c						    \
+  dc_divrem_n.c dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c   \
+  dump.c fib2_ui.c gcd.c						    \
+  gcd_1.c gcdext.c get_d.c get_str.c					    \
+  hamdist.c hgcd2.c hgcd.c invert_limb.c				    \
+  ior_n.c iorn_n.c jacbase.c lshift.c \
+  matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c	    \
+  mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c				    \
+  mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c    \
+  mul_toom22.c mul_toom32.c mul_toom42.c				    \
+  mullow_n.c mullow_basecase.c nand_n.c neg_n.c nior_n.c perfsqr.c	    \
+  popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
+  rootrem.c sb_divrem_mn.c scan0.c scan1.c set_str.c			    \
+  sqr_basecase.c sqr_diagonal.c						    \
+  sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c				    \
+  tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
+
 noinst_LTLIBRARIES = libmpn.la
 nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
 libmpn_la_LIBADD = $(OFILES)
 libmpn_la_DEPENDENCIES = $(OFILES)
-TARG_DIST = alpha arm arm64 cray generic ia64 lisp m68k m88k \
-  minithres mips32 mips64 pa32 pa64 power powerpc32 powerpc64 \
-  s390_32 s390_64 sh sparc32 sparc64 thumb vax x86 x86_64
+TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
+  minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr s390 \
+  sh sparc32 sparc64 thumb vax x86 x86_64 z8000 z8000x
 
 EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
 
@@ -354,14 +327,14 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/Ma
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
 	    *$$dep*) \
-	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
 	      exit 1;; \
 	  esac; \
 	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpn/Makefile'; \
-	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --gnu --ignore-deps mpn/Makefile
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu  --ignore-deps mpn/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  --ignore-deps mpn/Makefile
 .PRECIOUS: Makefile
 Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	@case '$?' in \
@@ -371,7 +344,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
 	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
 	esac;
-$(srcdir)/Makeasm.am:
 
 $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -380,24 +352,28 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
 
 clean-noinstLTLIBRARIES:
 	-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
 	@list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
 	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
-	  test "$$dir" != "$$p" || dir=.; \
+	  test "$$dir" = "$$p" && dir=.; \
 	  echo "rm -f \"$${dir}/so_locations\""; \
 	  rm -f "$${dir}/so_locations"; \
 	done
-libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) $(EXTRA_libmpn_la_DEPENDENCIES) 
-	$(LINK)  $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
+libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) 
+	$(LINK)  $(libmpn_la_LDFLAGS) $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
 
 distclean-compile:
 	-rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ansi2knr
+
+mostlyclean-kr:
+	-test "$U" = "" || rm -f *_.c
 
 .c.o:
 	$(COMPILE) -c $<
@@ -407,6 +383,233 @@ distclean-compile:
 
 .c.lo:
 	$(LTCOMPILE) -c -o $@ $<
+add_.c: add.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_1_.c: add_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_1.c; then echo $(srcdir)/add_1.c; else echo add_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_n_.c: add_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_n.c; then echo $(srcdir)/add_n.c; else echo add_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_1_.c: addmul_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_1.c; then echo $(srcdir)/addmul_1.c; else echo addmul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_2_.c: addmul_2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_2.c; then echo $(srcdir)/addmul_2.c; else echo addmul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_3_.c: addmul_3.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_3.c; then echo $(srcdir)/addmul_3.c; else echo addmul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_4_.c: addmul_4.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_4.c; then echo $(srcdir)/addmul_4.c; else echo addmul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_5_.c: addmul_5.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_5.c; then echo $(srcdir)/addmul_5.c; else echo addmul_5.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_6_.c: addmul_6.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_6.c; then echo $(srcdir)/addmul_6.c; else echo addmul_6.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_7_.c: addmul_7.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_7.c; then echo $(srcdir)/addmul_7.c; else echo addmul_7.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+addmul_8_.c: addmul_8.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_8.c; then echo $(srcdir)/addmul_8.c; else echo addmul_8.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+and_n_.c: and_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and_n.c; then echo $(srcdir)/and_n.c; else echo and_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+andn_n_.c: andn_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/andn_n.c; then echo $(srcdir)/andn_n.c; else echo andn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+bdivmod_.c: bdivmod.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdivmod.c; then echo $(srcdir)/bdivmod.c; else echo bdivmod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+cmp_.c: cmp.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+com_n_.c: com_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com_n.c; then echo $(srcdir)/com_n.c; else echo com_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+copyd_.c: copyd.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyd.c; then echo $(srcdir)/copyd.c; else echo copyd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+copyi_.c: copyi.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyi.c; then echo $(srcdir)/copyi.c; else echo copyi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dc_divrem_n_.c: dc_divrem_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dc_divrem_n.c; then echo $(srcdir)/dc_divrem_n.c; else echo dc_divrem_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dive_1_.c: dive_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_1.c; then echo $(srcdir)/dive_1.c; else echo dive_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+diveby3_.c: diveby3.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/diveby3.c; then echo $(srcdir)/diveby3.c; else echo diveby3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divis_.c: divis.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_.c: divrem.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_1_.c: divrem_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_2_.c: divrem_2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dump_.c: dump.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+fib_table_.c: fib_table.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_table.c; then echo $(srcdir)/fib_table.c; else echo fib_table.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: gcd.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_1_.c: gcd_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_1.c; then echo $(srcdir)/gcd_1.c; else echo gcd_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_.c: gcdext.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_d_.c: get_d.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hamdist_.c: hamdist.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd_.c: hgcd.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd2_.c: hgcd2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd2.c; then echo $(srcdir)/hgcd2.c; else echo hgcd2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+invert_limb_.c: invert_limb.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert_limb.c; then echo $(srcdir)/invert_limb.c; else echo invert_limb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+ior_n_.c: ior_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior_n.c; then echo $(srcdir)/ior_n.c; else echo ior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+iorn_n_.c: iorn_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iorn_n.c; then echo $(srcdir)/iorn_n.c; else echo iorn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase_.c: jacbase.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase.c; then echo $(srcdir)/jacbase.c; else echo jacbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+lshift_.c: lshift.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lshift.c; then echo $(srcdir)/lshift.c; else echo lshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_.c: mod_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_1_.c: mod_1_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_1.c; then echo $(srcdir)/mod_1_1.c; else echo mod_1_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_2_.c: mod_1_2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_2.c; then echo $(srcdir)/mod_1_2.c; else echo mod_1_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_3_.c: mod_1_3.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_3.c; then echo $(srcdir)/mod_1_3.c; else echo mod_1_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_4_.c: mod_1_4.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_4.c; then echo $(srcdir)/mod_1_4.c; else echo mod_1_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_34lsub1_.c: mod_34lsub1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_34lsub1.c; then echo $(srcdir)/mod_34lsub1.c; else echo mod_34lsub1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mode1o_.c: mode1o.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mode1o.c; then echo $(srcdir)/mode1o.c; else echo mode1o.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mp_bases_.c: mp_bases.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bases.c; then echo $(srcdir)/mp_bases.c; else echo mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_1_.c: mul_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_1.c; then echo $(srcdir)/mul_1.c; else echo mul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_2_.c: mul_2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2.c; then echo $(srcdir)/mul_2.c; else echo mul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_3_.c: mul_3.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_3.c; then echo $(srcdir)/mul_3.c; else echo mul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_4_.c: mul_4.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_4.c; then echo $(srcdir)/mul_4.c; else echo mul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_basecase_.c: mul_basecase.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_basecase.c; then echo $(srcdir)/mul_basecase.c; else echo mul_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_fft_.c: mul_fft.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_n_.c: mul_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_toom22_.c: mul_toom22.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_toom22.c; then echo $(srcdir)/mul_toom22.c; else echo mul_toom22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_toom32_.c: mul_toom32.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_toom32.c; then echo $(srcdir)/mul_toom32.c; else echo mul_toom32.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_toom42_.c: mul_toom42.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_toom42.c; then echo $(srcdir)/mul_toom42.c; else echo mul_toom42.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullow_basecase_.c: mullow_basecase.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullow_basecase.c; then echo $(srcdir)/mullow_basecase.c; else echo mullow_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullow_n_.c: mullow_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullow_n.c; then echo $(srcdir)/mullow_n.c; else echo mullow_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nand_n_.c: nand_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nand_n.c; then echo $(srcdir)/nand_n.c; else echo nand_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+neg_n_.c: neg_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg_n.c; then echo $(srcdir)/neg_n.c; else echo neg_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+nior_n_.c: nior_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nior_n.c; then echo $(srcdir)/nior_n.c; else echo nior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+perfsqr_.c: perfsqr.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+popcount_.c: popcount.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pow_1_.c: pow_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_1.c; then echo $(srcdir)/pow_1.c; else echo pow_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_mod_1_.c: pre_mod_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_mod_1.c; then echo $(srcdir)/pre_mod_1.c; else echo pre_mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random_.c: random.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+random2_.c: random2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rootrem_.c: rootrem.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+rshift_.c: rshift.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rshift.c; then echo $(srcdir)/rshift.c; else echo rshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_divrem_mn_.c: sb_divrem_mn.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_divrem_mn.c; then echo $(srcdir)/sb_divrem_mn.c; else echo sb_divrem_mn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scan0_.c: scan0.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+scan1_.c: scan1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_diagonal_.c: sqr_diagonal.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_diagonal.c; then echo $(srcdir)/sqr_diagonal.c; else echo sqr_diagonal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_.c: sub.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_1_.c: sub_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_1.c; then echo $(srcdir)/sub_1.c; else echo sub_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sub_n_.c: sub_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_n.c; then echo $(srcdir)/sub_n.c; else echo sub_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+submul_1_.c: submul_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/submul_1.c; then echo $(srcdir)/submul_1.c; else echo submul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+udiv_qrnnd_.c: udiv_qrnnd.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_qrnnd.c; then echo $(srcdir)/udiv_qrnnd.c; else echo udiv_qrnnd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+udiv_w_sdiv_.c: udiv_w_sdiv.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_w_sdiv.c; then echo $(srcdir)/udiv_w_sdiv.c; else echo udiv_w_sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+xnor_n_.c: xnor_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xnor_n.c; then echo $(srcdir)/xnor_n.c; else echo xnor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+xor_n_.c: xor_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor_n.c; then echo $(srcdir)/xor_n.c; else echo xor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+add_.$(OBJEXT) add_.lo add_1_.$(OBJEXT) add_1_.lo add_n_.$(OBJEXT) \
+add_n_.lo addmul_1_.$(OBJEXT) addmul_1_.lo addmul_2_.$(OBJEXT) \
+addmul_2_.lo addmul_3_.$(OBJEXT) addmul_3_.lo addmul_4_.$(OBJEXT) \
+addmul_4_.lo addmul_5_.$(OBJEXT) addmul_5_.lo addmul_6_.$(OBJEXT) \
+addmul_6_.lo addmul_7_.$(OBJEXT) addmul_7_.lo addmul_8_.$(OBJEXT) \
+addmul_8_.lo and_n_.$(OBJEXT) and_n_.lo andn_n_.$(OBJEXT) andn_n_.lo \
+bdivmod_.$(OBJEXT) bdivmod_.lo cmp_.$(OBJEXT) cmp_.lo com_n_.$(OBJEXT) \
+com_n_.lo copyd_.$(OBJEXT) copyd_.lo copyi_.$(OBJEXT) copyi_.lo \
+dc_divrem_n_.$(OBJEXT) dc_divrem_n_.lo dive_1_.$(OBJEXT) dive_1_.lo \
+diveby3_.$(OBJEXT) diveby3_.lo divis_.$(OBJEXT) divis_.lo \
+divrem_.$(OBJEXT) divrem_.lo divrem_1_.$(OBJEXT) divrem_1_.lo \
+divrem_2_.$(OBJEXT) divrem_2_.lo dump_.$(OBJEXT) dump_.lo \
+fib2_ui_.$(OBJEXT) fib2_ui_.lo fib_table_.$(OBJEXT) fib_table_.lo \
+gcd_.$(OBJEXT) gcd_.lo gcd_1_.$(OBJEXT) gcd_1_.lo gcdext_.$(OBJEXT) \
+gcdext_.lo get_d_.$(OBJEXT) get_d_.lo get_str_.$(OBJEXT) get_str_.lo \
+hamdist_.$(OBJEXT) hamdist_.lo hgcd_.$(OBJEXT) hgcd_.lo \
+hgcd2_.$(OBJEXT) hgcd2_.lo invert_limb_.$(OBJEXT) invert_limb_.lo \
+ior_n_.$(OBJEXT) ior_n_.lo iorn_n_.$(OBJEXT) iorn_n_.lo \
+jacbase_.$(OBJEXT) jacbase_.lo lshift_.$(OBJEXT) lshift_.lo \
+matrix22_mul_.$(OBJEXT) matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo \
+mod_1_1_.$(OBJEXT) mod_1_1_.lo mod_1_2_.$(OBJEXT) mod_1_2_.lo \
+mod_1_3_.$(OBJEXT) mod_1_3_.lo mod_1_4_.$(OBJEXT) mod_1_4_.lo \
+mod_34lsub1_.$(OBJEXT) mod_34lsub1_.lo mode1o_.$(OBJEXT) mode1o_.lo \
+mp_bases_.$(OBJEXT) mp_bases_.lo mul_.$(OBJEXT) mul_.lo \
+mul_1_.$(OBJEXT) mul_1_.lo mul_2_.$(OBJEXT) mul_2_.lo mul_3_.$(OBJEXT) \
+mul_3_.lo mul_4_.$(OBJEXT) mul_4_.lo mul_basecase_.$(OBJEXT) \
+mul_basecase_.lo mul_fft_.$(OBJEXT) mul_fft_.lo mul_n_.$(OBJEXT) \
+mul_n_.lo mul_toom22_.$(OBJEXT) mul_toom22_.lo mul_toom32_.$(OBJEXT) \
+mul_toom32_.lo mul_toom42_.$(OBJEXT) mul_toom42_.lo \
+mullow_basecase_.$(OBJEXT) mullow_basecase_.lo mullow_n_.$(OBJEXT) \
+mullow_n_.lo nand_n_.$(OBJEXT) nand_n_.lo neg_n_.$(OBJEXT) neg_n_.lo \
+nior_n_.$(OBJEXT) nior_n_.lo perfsqr_.$(OBJEXT) perfsqr_.lo \
+popcount_.$(OBJEXT) popcount_.lo pow_1_.$(OBJEXT) pow_1_.lo \
+pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo pre_mod_1_.$(OBJEXT) \
+pre_mod_1_.lo random_.$(OBJEXT) random_.lo random2_.$(OBJEXT) \
+random2_.lo rootrem_.$(OBJEXT) rootrem_.lo rshift_.$(OBJEXT) \
+rshift_.lo sb_divrem_mn_.$(OBJEXT) sb_divrem_mn_.lo scan0_.$(OBJEXT) \
+scan0_.lo scan1_.$(OBJEXT) scan1_.lo set_str_.$(OBJEXT) set_str_.lo \
+sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqr_diagonal_.$(OBJEXT) \
+sqr_diagonal_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) sub_.lo \
+sub_1_.$(OBJEXT) sub_1_.lo sub_n_.$(OBJEXT) sub_n_.lo \
+submul_1_.$(OBJEXT) submul_1_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo \
+udiv_qrnnd_.$(OBJEXT) udiv_qrnnd_.lo udiv_w_sdiv_.$(OBJEXT) \
+udiv_w_sdiv_.lo xnor_n_.$(OBJEXT) xnor_n_.lo xor_n_.$(OBJEXT) \
+xor_n_.lo : $(ANSI2KNR)
 
 mostlyclean-libtool:
 	-rm -f *.lo
@@ -414,85 +617,82 @@ mostlyclean-libtool:
 clean-libtool:
 	-rm -rf .libs _libs
 
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+
 ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
 	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
 	mkid -fID $$unique
 tags: TAGS
 
 TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
-	set x; \
+	tags=; \
 	here=`pwd`; \
 	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	shift; \
-	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-	  test -n "$$unique" || unique=$$empty_fix; \
-	  if test $$# -gt 0; then \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      "$$@" $$unique; \
-	  else \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      $$unique; \
-	  fi; \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -z "$$unique" && unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
 	fi
 ctags: CTAGS
 CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
 	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	test -z "$(CTAGS_ARGS)$$unique" \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
 	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-	     $$unique
+	     $$tags $$unique
 
 GTAGS:
 	here=`$(am__cd) $(top_builddir) && pwd` \
-	  && $(am__cd) $(top_srcdir) \
-	  && gtags -i $(GTAGS_ARGS) "$$here"
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
 
 distclean-tags:
 	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
 
 distdir: $(DISTFILES)
-	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	list='$(DISTFILES)'; \
-	  dist_files=`for file in $$list; do echo $$file; done | \
-	  sed -e "s|^$$srcdirstrip/||;t" \
-	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-	case $$dist_files in \
-	  */*) $(MKDIR_P) `echo "$$dist_files" | \
-			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-			   sort -u` ;; \
-	esac; \
-	for file in $$dist_files; do \
+	@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+	list='$(DISTFILES)'; for file in $$list; do \
+	  case $$file in \
+	    $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+	    $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+	  esac; \
 	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkdir_p) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
 	  if test -d $$d/$$file; then \
-	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-	    if test -d "$(distdir)/$$file"; then \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
 	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
 	    fi; \
-	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
 	  else \
-	    test -f "$(distdir)/$$file" \
-	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
 	    || exit 1; \
 	  fi; \
 	done
@@ -510,22 +710,16 @@ install-am: all-am
 
 installcheck: installcheck-am
 install-strip:
-	if test -z '$(STRIP)'; then \
-	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	      install; \
-	else \
-	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
-	fi
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
 mostlyclean-generic:
 
 clean-generic:
 
 distclean-generic:
-	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f $(CONFIG_CLEAN_FILES)
 
 maintainer-clean-generic:
 	@echo "This command is intended for maintainers to use"
@@ -538,7 +732,7 @@ clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
 distclean: distclean-am
 	-rm -f Makefile
 distclean-am: clean-am distclean-compile distclean-generic \
-	distclean-tags
+	distclean-libtool distclean-tags
 
 dvi: dvi-am
 
@@ -546,38 +740,18 @@ dvi-am:
 
 html: html-am
 
-html-am:
-
 info: info-am
 
 info-am:
 
 install-data-am:
 
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
 install-exec-am:
 
-install-html: install-html-am
-
-install-html-am:
-
 install-info: install-info-am
 
-install-info-am:
-
 install-man:
 
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
 installcheck-am:
 
 maintainer-clean: maintainer-clean-am
@@ -586,7 +760,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
 
 mostlyclean: mostlyclean-am
 
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
 	mostlyclean-libtool
 
 pdf: pdf-am
@@ -597,22 +771,19 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am:
-
-.MAKE: install-am install-strip
+uninstall-am: uninstall-info-am
 
 .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
 	clean-libtool clean-noinstLTLIBRARIES ctags distclean \
 	distclean-compile distclean-generic distclean-libtool \
 	distclean-tags distdir dvi dvi-am html html-am info info-am \
-	install install-am install-data install-data-am install-dvi \
-	install-dvi-am install-exec install-exec-am install-html \
-	install-html-am install-info install-info-am install-man \
-	install-pdf install-pdf-am install-ps install-ps-am \
+	install install-am install-data install-data-am install-exec \
+	install-exec-am install-info install-info-am install-man \
 	install-strip installcheck installcheck-am installdirs \
 	maintainer-clean maintainer-clean-generic mostlyclean \
-	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
-	pdf pdf-am ps ps-am tags uninstall uninstall-am
+	mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+	mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+	uninstall-am uninstall-info-am
 
 
 # These are BUILT_SOURCES at the top-level, so normally they're built before
@@ -625,6 +796,9 @@ mp_bases.c:
 perfsqr.h:
 	cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
 
+tune-gcd-p: gcd.c
+	$(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
+
 # .s assembler, no preprocessing.
 #
 .s.o:
@@ -680,7 +854,6 @@ perfsqr.h:
 	$(RM_TMP) tmp-$*.s
 .asm.lo:
 	$(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
-
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/gmp/mpn/README b/gmp/mpn/README
index bc046be732..32fc007e40 100644
--- a/gmp/mpn/README
+++ b/gmp/mpn/README
@@ -3,28 +3,17 @@ Copyright 1996, 1999 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/a29k/add_n.s b/gmp/mpn/a29k/add_n.s
new file mode 100644
index 0000000000..2d926047fd
--- /dev/null
+++ b/gmp/mpn/a29k/add_n.s
@@ -0,0 +1,118 @@
+; 29000 mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers.  gr116 is used for this purpose.  Note that
+; gr116==0 means that carry should be set.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_add_n
+	.word	0x60000
+___gmpn_add_n:
+	srl	gr117,lr5,3
+	sub	gr118,gr117,1
+	jmpt	gr118,Ltail
+	 constn	gr116,-1		; init cy reg
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr104,lr4
+	add	lr4,lr4,32
+
+	subr	gr116,gr116,0		; restore carry
+	addc	gr96,gr96,gr104
+	addc	gr97,gr97,gr105
+	addc	gr98,gr98,gr106
+	addc	gr99,gr99,gr107
+	addc	gr100,gr100,gr108
+	addc	gr101,gr101,gr109
+	addc	gr102,gr102,gr110
+	addc	gr103,gr103,gr111
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr96,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+	and	lr5,lr5,(8-1)
+Ltail:	sub	gr118,lr5,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	gr117,lr5,2		; count for jmpfdec
+
+	mtsr	cr,gr118
+	loadm	0,0,gr96,lr3
+	mtsr	cr,gr118
+	loadm	0,0,gr104,lr4
+
+	subr	gr116,gr116,0		; restore carry
+
+	jmpfdec	gr117,L1
+	 addc	gr96,gr96,gr104
+	jmp	Lstore
+	 mtsr	cr,gr118
+L1:	jmpfdec	gr117,L2
+	 addc	gr97,gr97,gr105
+	jmp	Lstore
+	 mtsr	cr,gr118
+L2:	jmpfdec	gr117,L3
+	 addc	gr98,gr98,gr106
+	jmp	Lstore
+	 mtsr	cr,gr118
+L3:	jmpfdec	gr117,L4
+	 addc	gr99,gr99,gr107
+	jmp	Lstore
+	 mtsr	cr,gr118
+L4:	jmpfdec	gr117,L5
+	 addc	gr100,gr100,gr108
+	jmp	Lstore
+	 mtsr	cr,gr118
+L5:	jmpfdec	gr117,L6
+	 addc	gr101,gr101,gr109
+	jmp	Lstore
+	 mtsr	cr,gr118
+L6:	addc	gr102,gr102,gr110
+
+Lstore:	storem	0,0,gr96,lr2
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+Lend:	jmpi	lr0
+	 add	gr96,gr116,1
diff --git a/gmp/mpn/a29k/addmul_1.s b/gmp/mpn/a29k/addmul_1.s
new file mode 100644
index 0000000000..fcf7fc2f39
--- /dev/null
+++ b/gmp/mpn/a29k/addmul_1.s
@@ -0,0 +1,111 @@
+; 29000 __gmpn_addmul_1 -- Multiply a limb vector with a single limb and
+; add the product to a second limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; size		lr4
+; s2_limb	lr5
+
+	.cputype 29050
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_addmul_1
+	.word	0x60000
+___gmpn_addmul_1:
+	sub	lr4,lr4,8
+	jmpt	lr4,Ltail
+	 const	gr120,0			; init cylimb reg
+
+	srl	gr117,lr4,3		; divide by 8
+	sub	gr117,gr117,1		; count for jmpfdec
+
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+
+	multiplu gr104,gr96,lr5
+	multmu	 gr96,gr96,lr5
+	multiplu gr105,gr97,lr5
+	multmu	 gr97,gr97,lr5
+	multiplu gr106,gr98,lr5
+	multmu	 gr98,gr98,lr5
+	multiplu gr107,gr99,lr5
+	multmu	 gr99,gr99,lr5
+	multiplu gr108,gr100,lr5
+	multmu	 gr100,gr100,lr5
+	multiplu gr109,gr101,lr5
+	multmu	 gr101,gr101,lr5
+	multiplu gr110,gr102,lr5
+	multmu	 gr102,gr102,lr5
+	multiplu gr111,gr103,lr5
+	multmu	 gr103,gr103,lr5
+
+	add	gr104,gr104,gr120
+	addc	gr105,gr105,gr96
+	addc	gr106,gr106,gr97
+	addc	gr107,gr107,gr98
+	addc	gr108,gr108,gr99
+	addc	gr109,gr109,gr100
+	addc	gr110,gr110,gr101
+	addc	gr111,gr111,gr102
+	addc	gr120,gr103,0
+
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr2
+
+	add	gr104,gr96,gr104
+	addc	gr105,gr97,gr105
+	addc	gr106,gr98,gr106
+	addc	gr107,gr99,gr107
+	addc	gr108,gr100,gr108
+	addc	gr109,gr101,gr109
+	addc	gr110,gr102,gr110
+	addc	gr111,gr103,gr111
+	addc	gr120,gr120,0
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr104,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+Ltail:	and	lr4,lr4,(8-1)
+	sub	gr118,lr4,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2
+	sub	lr2,lr2,4		; offset res_ptr by one limb
+
+Loop2:	load	0,0,gr116,lr3
+	add	lr3,lr3,4
+	multiplu gr117,gr116,lr5
+	multmu	gr118,gr116,lr5
+	add	lr2,lr2,4
+	load	0,0,gr119,lr2
+	add	gr117,gr117,gr120
+	addc	gr118,gr118,0
+	add	gr117,gr117,gr119
+	store	0,0,gr117,lr2
+	jmpfdec	lr4,Loop2
+	 addc	gr120,gr118,0
+
+Lend:	jmpi	lr0
+	 or	gr96,gr120,0		; copy
diff --git a/gmp/mpn/a29k/lshift.s b/gmp/mpn/a29k/lshift.s
new file mode 100644
index 0000000000..3df6dabfe4
--- /dev/null
+++ b/gmp/mpn/a29k/lshift.s
@@ -0,0 +1,91 @@
+; 29000 __gmpn_lshift --
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_lshift
+	.word	0x60000
+___gmpn_lshift:
+	sll	gr116,lr4,2
+	add	lr3,gr116,lr3
+	add	lr2,gr116,lr2
+	sub	lr3,lr3,4
+	load	0,0,gr119,lr3
+
+	subr	gr116,lr5,32
+	srl	gr96,gr119,gr116	; return value
+	sub	lr4,lr4,1		; actual loop count is SIZE - 1
+
+	srl	gr117,lr4,3		; chuck count = (actual count) / 8
+	cpeq	gr118,gr117,0
+	jmpt	gr118,Ltail
+	 mtsr	fc,lr5
+
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	sub	lr3,lr3,32
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr100,lr3
+
+	extract	gr109,gr119,gr107
+	extract	gr108,gr107,gr106
+	extract	gr107,gr106,gr105
+	extract	gr106,gr105,gr104
+	extract	gr105,gr104,gr103
+	extract	gr104,gr103,gr102
+	extract	gr103,gr102,gr101
+	extract	gr102,gr101,gr100
+
+	sub	lr2,lr2,32
+	mtsrim	cr,(8-1)
+	storem	0,0,gr102,lr2
+	jmpfdec	gr117,Loop
+	 or	gr119,gr100,0
+
+; Code for the last up-to-7 limbs.
+
+	and	lr4,lr4,(8-1)
+Ltail:	cpeq	gr118,lr4,0
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2		; count for jmpfdec
+
+Loop2:	sub	lr3,lr3,4
+	load	0,0,gr116,lr3
+	extract	gr117,gr119,gr116
+	sub	lr2,lr2,4
+	store	0,0,gr117,lr2
+	jmpfdec	lr4,Loop2
+	 or	gr119,gr116,0
+
+Lend:	extract	gr117,gr119,0
+	sub	lr2,lr2,4
+	jmpi	lr0
+	 store	0,0,gr117,lr2
diff --git a/gmp/mpn/a29k/mul_1.s b/gmp/mpn/a29k/mul_1.s
new file mode 100644
index 0000000000..a55fe3e367
--- /dev/null
+++ b/gmp/mpn/a29k/mul_1.s
@@ -0,0 +1,95 @@
+; 29000 __gmpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; size		lr4
+; s2_limb	lr5
+
+	.cputype 29050
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_mul_1
+	.word	0x60000
+___gmpn_mul_1:
+	sub	lr4,lr4,8
+	jmpt	lr4,Ltail
+	 const	gr120,0			; init cylimb reg
+
+	srl	gr117,lr4,3		; divide by 8
+	sub	gr117,gr117,1		; count for jmpfdec
+
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+
+	multiplu gr104,gr96,lr5
+	multmu	 gr96,gr96,lr5
+	multiplu gr105,gr97,lr5
+	multmu	 gr97,gr97,lr5
+	multiplu gr106,gr98,lr5
+	multmu	 gr98,gr98,lr5
+	multiplu gr107,gr99,lr5
+	multmu	 gr99,gr99,lr5
+	multiplu gr108,gr100,lr5
+	multmu	 gr100,gr100,lr5
+	multiplu gr109,gr101,lr5
+	multmu	 gr101,gr101,lr5
+	multiplu gr110,gr102,lr5
+	multmu	 gr102,gr102,lr5
+	multiplu gr111,gr103,lr5
+	multmu	 gr103,gr103,lr5
+
+	add	gr104,gr104,gr120
+	addc	gr105,gr105,gr96
+	addc	gr106,gr106,gr97
+	addc	gr107,gr107,gr98
+	addc	gr108,gr108,gr99
+	addc	gr109,gr109,gr100
+	addc	gr110,gr110,gr101
+	addc	gr111,gr111,gr102
+	addc	gr120,gr103,0
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr104,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+Ltail:	and	lr4,lr4,(8-1)
+	sub	gr118,lr4,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2
+	sub	lr2,lr2,4		; offset res_ptr by one limb
+
+Loop2:	load	0,0,gr116,lr3
+	add	lr3,lr3,4
+	multiplu gr117,gr116,lr5
+	multmu	gr118,gr116,lr5
+	add	lr2,lr2,4
+	add	gr117,gr117,gr120
+	store	0,0,gr117,lr2
+	jmpfdec	lr4,Loop2
+	 addc	gr120,gr118,0
+
+Lend:	jmpi	lr0
+	 or	gr96,gr120,0		; copy
diff --git a/gmp/mpn/a29k/rshift.s b/gmp/mpn/a29k/rshift.s
new file mode 100644
index 0000000000..8a3086755a
--- /dev/null
+++ b/gmp/mpn/a29k/rshift.s
@@ -0,0 +1,87 @@
+; 29000 __gmpn_rshift --
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_rshift
+	.word	0x60000
+___gmpn_rshift:
+	load	0,0,gr119,lr3
+	add	lr3,lr3,4
+
+	subr	gr116,lr5,32
+	sll	gr96,gr119,gr116	; return value
+	sub	lr4,lr4,1		; actual loop count is SIZE - 1
+
+	srl	gr117,lr4,3		; chuck count = (actual count) / 8
+	cpeq	gr118,gr117,0
+	jmpt	gr118,Ltail
+	 mtsr	fc,gr116
+
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr100,lr3
+	add	lr3,lr3,32
+
+	extract	gr98,gr100,gr119
+	extract	gr99,gr101,gr100
+	extract	gr100,gr102,gr101
+	extract	gr101,gr103,gr102
+	extract	gr102,gr104,gr103
+	extract	gr103,gr105,gr104
+	extract	gr104,gr106,gr105
+	extract	gr105,gr107,gr106
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr98,lr2
+	add	lr2,lr2,32
+	jmpfdec	gr117,Loop
+	 or	gr119,gr107,0
+
+; Code for the last up-to-7 limbs.
+
+	and	lr4,lr4,(8-1)
+Ltail:	cpeq	gr118,lr4,0
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2		; count for jmpfdec
+
+Loop2:	load	0,0,gr100,lr3
+	add	lr3,lr3,4
+	extract	gr117,gr100,gr119
+	store	0,0,gr117,lr2
+	add	lr2,lr2,4
+	jmpfdec	lr4,Loop2
+	 or	gr119,gr100,0
+
+Lend:	srl	gr117,gr119,lr5
+	jmpi	lr0
+	 store	0,0,gr117,lr2
diff --git a/gmp/mpn/a29k/sub_n.s b/gmp/mpn/a29k/sub_n.s
new file mode 100644
index 0000000000..42072a494d
--- /dev/null
+++ b/gmp/mpn/a29k/sub_n.s
@@ -0,0 +1,118 @@
+; 29000 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers.  gr116 is used for this purpose.  Note that
+; gr116==0 means that carry should be set.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_sub_n
+	.word	0x60000
+___gmpn_sub_n:
+	srl	gr117,lr5,3
+	sub	gr118,gr117,1
+	jmpt	gr118,Ltail
+	 constn	gr116,-1		; init cy reg
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr104,lr4
+	add	lr4,lr4,32
+
+	subr	gr116,gr116,0		; restore carry
+	subc	gr96,gr96,gr104
+	subc	gr97,gr97,gr105
+	subc	gr98,gr98,gr106
+	subc	gr99,gr99,gr107
+	subc	gr100,gr100,gr108
+	subc	gr101,gr101,gr109
+	subc	gr102,gr102,gr110
+	subc	gr103,gr103,gr111
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr96,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+	and	lr5,lr5,(8-1)
+Ltail:	sub	gr118,lr5,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	gr117,lr5,2		; count for jmpfdec
+
+	mtsr	cr,gr118
+	loadm	0,0,gr96,lr3
+	mtsr	cr,gr118
+	loadm	0,0,gr104,lr4
+
+	subr	gr116,gr116,0		; restore carry
+
+	jmpfdec	gr117,L1
+	 subc	gr96,gr96,gr104
+	jmp	Lstore
+	 mtsr	cr,gr118
+L1:	jmpfdec	gr117,L2
+	 subc	gr97,gr97,gr105
+	jmp	Lstore
+	 mtsr	cr,gr118
+L2:	jmpfdec	gr117,L3
+	 subc	gr98,gr98,gr106
+	jmp	Lstore
+	 mtsr	cr,gr118
+L3:	jmpfdec	gr117,L4
+	 subc	gr99,gr99,gr107
+	jmp	Lstore
+	 mtsr	cr,gr118
+L4:	jmpfdec	gr117,L5
+	 subc	gr100,gr100,gr108
+	jmp	Lstore
+	 mtsr	cr,gr118
+L5:	jmpfdec	gr117,L6
+	 subc	gr101,gr101,gr109
+	jmp	Lstore
+	 mtsr	cr,gr118
+L6:	subc	gr102,gr102,gr110
+
+Lstore:	storem	0,0,gr96,lr2
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+Lend:	jmpi	lr0
+	 add	gr96,gr116,1
diff --git a/gmp/mpn/a29k/submul_1.s b/gmp/mpn/a29k/submul_1.s
new file mode 100644
index 0000000000..7955b89537
--- /dev/null
+++ b/gmp/mpn/a29k/submul_1.s
@@ -0,0 +1,114 @@
+; 29000 __gmpn_submul_1 -- Multiply a limb vector with a single limb and
+; subtract the product from a second limb vector.
+
+; Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; size		lr4
+; s2_limb	lr5
+
+	.cputype 29050
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___gmpn_submul_1
+	.word	0x60000
+___gmpn_submul_1:
+	sub	lr4,lr4,8
+	jmpt	lr4,Ltail
+	 const	gr120,0			; init cylimb reg
+
+	srl	gr117,lr4,3		; divide by 8
+	sub	gr117,gr117,1		; count for jmpfdec
+
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+
+	multiplu gr104,gr96,lr5
+	multmu	 gr96,gr96,lr5
+	multiplu gr105,gr97,lr5
+	multmu	 gr97,gr97,lr5
+	multiplu gr106,gr98,lr5
+	multmu	 gr98,gr98,lr5
+	multiplu gr107,gr99,lr5
+	multmu	 gr99,gr99,lr5
+	multiplu gr108,gr100,lr5
+	multmu	 gr100,gr100,lr5
+	multiplu gr109,gr101,lr5
+	multmu	 gr101,gr101,lr5
+	multiplu gr110,gr102,lr5
+	multmu	 gr102,gr102,lr5
+	multiplu gr111,gr103,lr5
+	multmu	 gr103,gr103,lr5
+
+	add	gr104,gr104,gr120
+	addc	gr105,gr105,gr96
+	addc	gr106,gr106,gr97
+	addc	gr107,gr107,gr98
+	addc	gr108,gr108,gr99
+	addc	gr109,gr109,gr100
+	addc	gr110,gr110,gr101
+	addc	gr111,gr111,gr102
+	addc	gr120,gr103,0
+
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr2
+
+	sub	gr96,gr96,gr104
+	subc	gr97,gr97,gr105
+	subc	gr98,gr98,gr106
+	subc	gr99,gr99,gr107
+	subc	gr100,gr100,gr108
+	subc	gr101,gr101,gr109
+	subc	gr102,gr102,gr110
+	subc	gr103,gr103,gr111
+
+	add	gr104,gr103,gr111	; invert carry from previus sub
+	addc	gr120,gr120,0
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr96,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+Ltail:	and	lr4,lr4,(8-1)
+	sub	gr118,lr4,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2
+	sub	lr2,lr2,4		; offset res_ptr by one limb
+
+Loop2:	load	0,0,gr116,lr3
+	add	lr3,lr3,4
+	multiplu gr117,gr116,lr5
+	multmu	gr118,gr116,lr5
+	add	lr2,lr2,4
+	load	0,0,gr119,lr2
+	add	gr117,gr117,gr120
+	addc	gr118,gr118,0
+	sub	gr119,gr119,gr117
+	add	gr104,gr119,gr117	; invert carry from previus sub
+	store	0,0,gr119,lr2
+	jmpfdec	lr4,Loop2
+	 addc	gr120,gr118,0
+
+Lend:	jmpi	lr0
+	 or	gr96,gr120,0		; copy
diff --git a/gmp/mpn/a29k/udiv.s b/gmp/mpn/a29k/udiv.s
new file mode 100644
index 0000000000..82c3925a42
--- /dev/null
+++ b/gmp/mpn/a29k/udiv.s
@@ -0,0 +1,28 @@
+; Copyright 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+	.sect .lit,lit
+	.text
+	.align 4
+	.global ___udiv_qrnnd
+	.word 0x60000
+___udiv_qrnnd:
+	mtsr q,lr3
+	dividu gr96,lr4,lr5
+	mfsr gr116,q
+	jmpi lr0
+	store 0,0,gr116,lr2
diff --git a/gmp/mpn/a29k/umul.s b/gmp/mpn/a29k/umul.s
new file mode 100644
index 0000000000..02c34e9151
--- /dev/null
+++ b/gmp/mpn/a29k/umul.s
@@ -0,0 +1,27 @@
+; Copyright 1999, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+	.sect .lit,lit
+	.text
+	.align 4
+	.global ___umul_ppmm
+	.word 0x50000
+___umul_ppmm:
+	multiplu gr116,lr3,lr4
+	multmu gr96,lr3,lr4
+	jmpi lr0
+	store 0,0,gr116,lr2
diff --git a/gmp/mpn/alpha/README b/gmp/mpn/alpha/README
index 09c2f04047..3578c53b85 100644
--- a/gmp/mpn/alpha/README
+++ b/gmp/mpn/alpha/README
@@ -1,30 +1,20 @@
-Copyright 1996, 1997, 1999-2005 Free Software Foundation, Inc.
+Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -46,7 +36,7 @@ Cray T3 code is very very different...
 them to "$6" or "$f6" where necessary.
 
 "0x" introduces a hex constant in gas and DEC as, but on Unicos "^X" is
-required.  The X() macro accommodates this difference.
+required.  The X() macro accomodates this difference.
 
 "cvttqc" is required by DEC as, "cvttq/c" is required by Unicos, and gas will
 accept either.  We use cvttqc and have an m4 define expand to cvttq/c where
@@ -70,7 +60,7 @@ RELEVANT OPTIMIZATION ISSUES
 EV4
 
 1. This chip has very limited store bandwidth.  The on-chip L1 cache is write-
-   through, and a cache line is transferred from the store buffer to the off-
+   through, and a cache line is transfered from the store buffer to the off-
    chip L2 in as much 15 cycles on most systems.  This delay hurts mpn_add_n,
    mpn_sub_n, mpn_lshift, and mpn_rshift.
 
diff --git a/gmp/mpn/alpha/add_n.asm b/gmp/mpn/alpha/add_n.asm
index bc572a57a9..77d4cad2ef 100644
--- a/gmp/mpn/alpha/add_n.asm
+++ b/gmp/mpn/alpha/add_n.asm
@@ -1,164 +1,117 @@
 dnl  Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
 dnl  store sum in a third limb vector.
 
-dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:     ?
-C EV5:     4.75
-C EV6:     3
+C EV4:     7.75
+C EV5:     5.75
+C EV6:     4
 
-dnl  INPUT PARAMETERS
-dnl  res_ptr	r16
-dnl  s1_ptr	r17
-dnl  s2_ptr	r18
-dnl  size	r19
+C  INPUT PARAMETERS
+C  rp	r16
+C  up	r17
+C  vp	r18
+C  n	r19
 
 ASM_START()
-PROLOGUE(mpn_add_nc)
-	bis	r20,r31,r25
-	br	L(com)
-EPILOGUE()
 PROLOGUE(mpn_add_n)
-	bis	r31,r31,r25		C clear cy
-L(com):	subq	r19,4,r19		C decr loop cnt
-	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
-C Start software pipeline for 1st loop
-	ldq	r0,0(r18)
-	ldq	r4,0(r17)
-	ldq	r1,8(r18)
-	ldq	r5,8(r17)
-	addq	r17,32,r17		C update s1_ptr
-	addq	r0,r4,r28		C 1st main add
-	ldq	r2,16(r18)
-	addq	r25,r28,r20		C 1st carry add
-	ldq	r3,24(r18)
-	cmpult	r28,r4,r8		C compute cy from last add
-	ldq	r6,-16(r17)
-	cmpult	r20,r28,r25		C compute cy from last add
-	ldq	r7,-8(r17)
-	bis	r8,r25,r25		C combine cy from the two adds
-	subq	r19,4,r19		C decr loop cnt
-	addq	r1,r5,r28		C 2nd main add
-	addq	r18,32,r18		C update s2_ptr
-	addq	r28,r25,r21		C 2nd carry add
-	cmpult	r28,r5,r8		C compute cy from last add
-	blt	r19,$Lend1		C if less than 4 limbs remain, jump
-C 1st loop handles groups of 4 limbs in a software pipeline
-	ALIGN(16)
-$Loop:	cmpult	r21,r28,r25		C compute cy from last add
-	ldq	r0,0(r18)
-	bis	r8,r25,r25		C combine cy from the two adds
-	ldq	r1,8(r18)
-	addq	r2,r6,r28		C 3rd main add
-	ldq	r4,0(r17)
-	addq	r28,r25,r22		C 3rd carry add
-	ldq	r5,8(r17)
-	cmpult	r28,r6,r8		C compute cy from last add
-	cmpult	r22,r28,r25		C compute cy from last add
-	stq	r20,0(r16)
-	bis	r8,r25,r25		C combine cy from the two adds
-	stq	r21,8(r16)
-	addq	r3,r7,r28		C 4th main add
-	addq	r28,r25,r23		C 4th carry add
-	cmpult	r28,r7,r8		C compute cy from last add
-	cmpult	r23,r28,r25		C compute cy from last add
-		addq	r17,32,r17		C update s1_ptr
-	bis	r8,r25,r25		C combine cy from the two adds
-		addq	r16,32,r16		C update res_ptr
-	addq	r0,r4,r28		C 1st main add
-	ldq	r2,16(r18)
-	addq	r25,r28,r20		C 1st carry add
-	ldq	r3,24(r18)
-	cmpult	r28,r4,r8		C compute cy from last add
-	ldq	r6,-16(r17)
-	cmpult	r20,r28,r25		C compute cy from last add
-	ldq	r7,-8(r17)
-	bis	r8,r25,r25		C combine cy from the two adds
-	subq	r19,4,r19		C decr loop cnt
-	stq	r22,-16(r16)
-	addq	r1,r5,r28		C 2nd main add
-	stq	r23,-8(r16)
-	addq	r25,r28,r21		C 2nd carry add
-		addq	r18,32,r18		C update s2_ptr
-	cmpult	r28,r5,r8		C compute cy from last add
-	bge	r19,$Loop
-C Finish software pipeline for 1st loop
-$Lend1:	cmpult	r21,r28,r25		C compute cy from last add
-	bis	r8,r25,r25		C combine cy from the two adds
-	addq	r2,r6,r28		C 3rd main add
-	addq	r28,r25,r22		C 3rd carry add
-	cmpult	r28,r6,r8		C compute cy from last add
-	cmpult	r22,r28,r25		C compute cy from last add
-	stq	r20,0(r16)
-	bis	r8,r25,r25		C combine cy from the two adds
-	stq	r21,8(r16)
-	addq	r3,r7,r28		C 4th main add
-	addq	r28,r25,r23		C 4th carry add
-	cmpult	r28,r7,r8		C compute cy from last add
-	cmpult	r23,r28,r25		C compute cy from last add
-	bis	r8,r25,r25		C combine cy from the two adds
-	addq	r16,32,r16		C update res_ptr
-	stq	r22,-16(r16)
-	stq	r23,-8(r16)
-$Lend2:	addq	r19,4,r19		C restore loop cnt
-	beq	r19,$Lret
-C Start software pipeline for 2nd loop
-	ldq	r0,0(r18)
-	ldq	r4,0(r17)
+	ldq	r3,0(r17)
+	ldq	r4,0(r18)
+
 	subq	r19,1,r19
-	beq	r19,$Lend0
-C 2nd loop handles remaining 1-3 limbs
-	ALIGN(16)
-$Loop0:	addq	r0,r4,r28		C main add
-	ldq	r0,8(r18)
-	cmpult	r28,r4,r8		C compute cy from last add
-	ldq	r4,8(r17)
-	addq	r28,r25,r20		C carry add
-	addq	r18,8,r18
+	and	r19,4-1,r2	C number of limbs in first loop
+	bis	r31,r31,r0
+	beq	r2,$L0		C if multiple of 4 limbs, skip first loop
+
+	subq	r19,r2,r19
+
+$Loop0:	subq	r2,1,r2
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
 	addq	r17,8,r17
-	stq	r20,0(r16)
-	cmpult	r20,r28,r25		C compute cy from last add
-	subq	r19,1,r19		C decr loop cnt
-	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r18,8,r18
+	bis	r5,r5,r3
+	bis	r6,r6,r4
 	addq	r16,8,r16
-	bne	r19,$Loop0
-$Lend0:	addq	r0,r4,r28		C main add
-	addq	r28,r25,r20		C carry add
-	cmpult	r28,r4,r8		C compute cy from last add
-	cmpult	r20,r28,r25		C compute cy from last add
-	stq	r20,0(r16)
-	bis	r8,r25,r25		C combine cy from the two adds
-
-$Lret:	bis	r25,r31,r0		C return cy
+	bne	r2,$Loop0
+
+$L0:	beq	r19,$Lend
+
+	ALIGN(8)
+$Loop:	subq	r19,4,r19
+
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,16(r17)
+	addq	r6,r0,r6
+	ldq	r4,16(r18)
+	cmpult	r6,r0,r1
+	addq	r5,r6,r6
+	cmpult	r6,r5,r0
+	stq	r6,8(r16)
+	bis	r0,r1,r0
+
+	ldq	r5,24(r17)
+	addq	r4,r0,r4
+	ldq	r6,24(r18)
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,16(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,32(r17)
+	addq	r6,r0,r6
+	ldq	r4,32(r18)
+	cmpult	r6,r0,r1
+	addq	r5,r6,r6
+	cmpult	r6,r5,r0
+	stq	r6,24(r16)
+	bis	r0,r1,r0
+
+	addq	r17,32,r17
+	addq	r18,32,r18
+	addq	r16,32,r16
+	bne	r19,$Loop
+
+$Lend:	addq	r4,r0,r4
+	cmpult	r4,r0,r1
+	addq	r3,r4,r4
+	cmpult	r4,r3,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
 	ret	r31,(r26),1
-EPILOGUE()
+EPILOGUE(mpn_add_n)
 ASM_END()
diff --git a/gmp/mpn/alpha/addmul_1.asm b/gmp/mpn/alpha/addmul_1.asm
index c4e6834b61..22c41a5c74 100644
--- a/gmp/mpn/alpha/addmul_1.asm
+++ b/gmp/mpn/alpha/addmul_1.asm
@@ -4,30 +4,19 @@ dnl result to a second limb vector.
 dnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/alpha-defs.m4 b/gmp/mpn/alpha/alpha-defs.m4
index af34c9294c..b2f9a242a8 100644
--- a/gmp/mpn/alpha/alpha-defs.m4
+++ b/gmp/mpn/alpha/alpha-defs.m4
@@ -3,32 +3,21 @@ divert(-1)
 dnl  m4 macros for Alpha assembler.
 
 dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Usage: ASSERT([reg] [,code])
diff --git a/gmp/mpn/alpha/aorslsh1_n.asm b/gmp/mpn/alpha/aorslsh1_n.asm
index 9525e669db..3694f78761 100644
--- a/gmp/mpn/alpha/aorslsh1_n.asm
+++ b/gmp/mpn/alpha/aorslsh1_n.asm
@@ -1,40 +1,36 @@
 dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
 
-dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:     ?
+C EV4:    12.5
 C EV5:     6.25
-C EV6:     4.5
+C EV6:     4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
 
+C TODO
+C  * Write special version for ev6, as this is a slowdown for 100 < n < 2200
+C    compared to separate mpn_lshift and mpn_add_n.
+C  * Use addq instead of sll for left shift, and similarly cmplt instead of srl
+C    for right shift.
+
+dnl  INPUT PARAMETERS
 define(`rp',`r16')
 define(`up',`r17')
 define(`vp',`r18')
@@ -42,8 +38,12 @@ define(`n', `r19')
 
 define(`u0', `r8')
 define(`u1', `r1')
+define(`u2', `r2')
+define(`u3', `r3')
 define(`v0', `r4')
 define(`v1', `r5')
+define(`v2', `r6')
+define(`v3', `r7')
 
 define(`cy0', `r0')
 define(`cy1', `r20')
@@ -67,98 +67,168 @@ MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
 
 ASM_START()
 PROLOGUE(func)
-	and	n, 2, cy0
-	blbs	n, L(bx1)
-L(bx0):	ldq	v1, 0(vp)
+	lda	n, -4(n)
+	bis	r31, r31, cy1
+	and	n, 3, r1
+	beq	r1, $Lb00
+	cmpeq	r1, 1, r2
+	bne	r2, $Lb01
+	cmpeq	r1, 2, r2
+	bne	r2, $Lb10
+$Lb11:	C n = 3, 7, 11, ...
+	ldq	v0, 0(vp)
+	ldq	u0, 0(up)
+	ldq	v1, 8(vp)
+	ldq	u1, 8(up)
+	ldq	v2, 16(vp)
+	ldq	u2, 16(up)
+	lda	vp, 24(vp)
+	lda	up, 24(up)
+	bge	n, $Loop
+	br	r31, $Lcj3
+$Lb10:	C n = 2, 6, 10, ...
+	bis	r31, r31, cy0
+	ldq	v1, 0(vp)
 	ldq	u1, 0(up)
-	nop
-	bne	cy0, L(b10)
-
-L(b00):	lda	vp, 48(vp)
-	lda	up, -16(up)
+	ldq	v2, 8(vp)
+	ldq	u2, 8(up)
 	lda	rp, -8(rp)
-	br	r31, L(lo0)
-
-L(b10):	lda	vp, 32(vp)
+	blt	n, $Lcj2
+	ldq	v3, 16(vp)
+	ldq	u3, 16(up)
+	lda	vp, 48(vp)
+	lda	up, 16(up)
+	br	r31, $LL10
+$Lb01:	C n = 1, 5, 9, ...
+	ldq	v2, 0(vp)
+	ldq	u2, 0(up)
+	lda	rp, -16(rp)
+	blt	n, $Lcj1
+	ldq	v3, 8(vp)
+	ldq	u3, 8(up)
+	ldq	v0, 16(vp)
+	ldq	u0, 16(up)
+	lda	vp, 40(vp)
+	lda	up, 8(up)
+	lda	rp, 32(rp)
+	br	r31, $LL01
+$Lb00:	C n = 4, 8, 12, ...
+	bis	r31, r31, cy0
+	ldq	v3, 0(vp)
+	ldq	u3, 0(up)
+	ldq	v0, 8(vp)
+	ldq	u0, 8(up)
+	ldq	v1, 16(vp)
+	ldq	u1, 16(up)
+	lda	vp, 32(vp)
 	lda	rp, 8(rp)
-	lda	cy0, 0(r31)
-	br	r31, L(lo2)
-
-L(bx1):	ldq	v0, 0(vp)
-	ldq	u0, 0(up)
-	lda	cy1, 0(r31)
-	beq	cy0, L(b01)
-
-L(b11):	lda	vp, 40(vp)
-	lda	up, -24(up)
-	lda	rp, 16(rp)
-	br	r31, L(lo3)
-
-L(b01):	lda	n, -4(n)
-	ble	n, L(end)
-	lda	vp, 24(vp)
-	lda	up, -8(up)
-
+	br	r31, $LL00x
 	ALIGN(16)
-L(top):	addq	v0, v0, sl	C left shift vlimb
-	ldq	v1, -16(vp)
+C 0
+$Loop:	sll	v0, 1, sl	C left shift vlimb
+	ldq	v3, 0(vp)
+C 1
 	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
-	cmplt	v0, r31, cy0	C carry out #1
-	ldq	u1, 16(up)
+	ldq	u3, 0(up)
+C 2
 	ADDSUB	ps, cy1, rr	C consume carry from previous operation
+	srl	v0, 63, cy0	C carry out #1
+C 3
 	CARRY(	ps, u0, cy)	C carry out #2
 	stq	rr, 0(rp)
+C 4
 	addq	cy, cy0, cy0	C combine carry out #1 and #2
 	CARRY(	rr, ps, cy)	C carry out #3
+C 5
 	addq	cy, cy0, cy0	C final carry out
 	lda	vp, 32(vp)	C bookkeeping
-L(lo0):	addq	v1, v1, sl
-	ldq	v0, -40(vp)
+C 6
+$LL10:	sll	v1, 1, sl
+	ldq	v0, -24(vp)
+C 7
 	ADDSUB	u1, sl, ps
-	cmplt	v1, r31, cy1
-	ldq	u0, 24(up)
+	ldq	u0, 8(up)
+C 8
 	ADDSUB	ps, cy0, rr
+	srl	v1, 63, cy1
+C 9
 	CARRY(	ps, u1, cy)
 	stq	rr, 8(rp)
+C 10
 	addq	cy, cy1, cy1
 	CARRY(	rr, ps, cy)
+C 11
 	addq	cy, cy1, cy1
 	lda	rp, 32(rp)	C bookkeeping
-L(lo3):	addq	v0, v0, sl
-	ldq	v1, -32(vp)
-	ADDSUB	u0, sl, ps
-	cmplt	v0, r31, cy0
-	ldq	u1, 32(up)
+C 12
+$LL01:	sll	v2, 1, sl
+	ldq	v1, -16(vp)
+C 13
+	ADDSUB	u2, sl, ps
+	ldq	u1, 16(up)
+C 14
 	ADDSUB	ps, cy1, rr
-	CARRY(	ps, u0, cy)
+	srl	v2, 63, cy0
+C 15
+	CARRY(	ps, u2, cy)
 	stq	rr, -16(rp)
+C 16
 	addq	cy, cy0, cy0
 	CARRY(	rr, ps, cy)
+C 17
 	addq	cy, cy0, cy0
-	lda	up, 32(up)	C bookkeeping
-L(lo2):	addq	v1, v1, sl
-	ldq	v0, -24(vp)
-	ADDSUB	u1, sl, ps
-	cmplt	v1, r31, cy1
-	ldq	u0, 8(up)
+$LL00x:	lda	up, 32(up)	C bookkeeping
+C 18
+	sll	v3, 1, sl
+	ldq	v2, -8(vp)
+C 19
+	ADDSUB	u3, sl, ps
+	ldq	u2, -8(up)
+C 20
 	ADDSUB	ps, cy0, rr
-	CARRY(	ps, u1, cy)
+	srl	v3, 63, cy1
+C 21
+	CARRY(	ps, u3, cy)
 	stq	rr, -8(rp)
+C 22
 	addq	cy, cy1, cy1
 	CARRY(	rr, ps, cy)
+C 23
 	addq	cy, cy1, cy1
 	lda	n, -4(n)	C bookkeeping
-	bgt	n, L(top)
+C 24
+	bge	n, $Loop
 
-L(end):	addq	v0, v0, sl
+$Lcj3:	sll	v0, 1, sl
 	ADDSUB	u0, sl, ps
 	ADDSUB	ps, cy1, rr
-	cmplt	v0, r31, cy0
+	srl	v0, 63, cy0
 	CARRY(	ps, u0, cy)
 	stq	rr, 0(rp)
 	addq	cy, cy0, cy0
 	CARRY(	rr, ps, cy)
-	addq	cy, cy0, r0
+	addq	cy, cy0, cy0
+
+$Lcj2:	sll	v1, 1, sl
+	ADDSUB	u1, sl, ps
+	ADDSUB	ps, cy0, rr
+	srl	v1, 63, cy1
+	CARRY(	ps, u1, cy)
+	stq	rr, 8(rp)
+	addq	cy, cy1, cy1
+	CARRY(	rr, ps, cy)
+	addq	cy, cy1, cy1
+
+$Lcj1:	sll	v2, 1, sl
+	ADDSUB	u2, sl, ps
+	ADDSUB	ps, cy1, rr
+	srl	v2, 63, cy0
+	CARRY(	ps, u2, cy)
+	stq	rr, 16(rp)
+	addq	cy, cy0, cy0
+	CARRY(	rr, ps, cy)
+	addq	cy, cy0, cy0
+
 	ret	r31,(r26),1
 EPILOGUE()
 ASM_END()
diff --git a/gmp/mpn/alpha/aorslsh2_n.asm b/gmp/mpn/alpha/aorslsh2_n.asm
deleted file mode 100644
index bdee1d6d02..0000000000
--- a/gmp/mpn/alpha/aorslsh2_n.asm
+++ /dev/null
@@ -1,167 +0,0 @@
-dnl  Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
-
-dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C      cycles/limb
-C EV4:     ?
-C EV5:     6
-C EV6:     3.75
-
-C TODO
-C  * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5.
-
-define(`rp',`r16')
-define(`up',`r17')
-define(`vp',`r18')
-define(`n', `r19')
-
-define(`u0', `r8')
-define(`u1', `r1')
-define(`v0', `r4')
-define(`v1', `r5')
-
-define(`cy0', `r0')
-define(`cy1', `r20')
-define(`cy', `r22')
-define(`rr', `r24')
-define(`ps', `r25')
-define(`sl', `r28')
-
-ifdef(`OPERATION_addlsh2_n',`
-  define(ADDSUB,       addq)
-  define(CARRY,       `cmpult $1,$2,$3')
-  define(func, mpn_addlsh2_n)
-')
-ifdef(`OPERATION_sublsh2_n',`
-  define(ADDSUB,       subq)
-  define(CARRY,       `cmpult $2,$1,$3')
-  define(func, mpn_sublsh2_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
-
-ASM_START()
-PROLOGUE(func)
-	and	n, 2, cy0
-	blbs	n, L(bx1)
-L(bx0):	ldq	v1, 0(vp)
-	ldq	u1, 0(up)
-	bis	r31, r31, r2
-	bne	cy0, L(b10)
-
-L(b00):	lda	vp, 48(vp)
-	lda	up, -16(up)
-	lda	rp, -8(rp)
-	s4addq	v1, r31, sl
-	br	r31, L(lo0)
-
-L(b10):	lda	vp, 32(vp)
-	lda	rp, 8(rp)
-	lda	cy0, 0(r31)
-	br	r31, L(lo2)
-
-L(bx1):	ldq	v0, 0(vp)
-	ldq	u0, 0(up)
-	lda	cy1, 0(r31)
-	bis	r31, r31, r3
-	nop
-	beq	cy0, L(b01)
-
-L(b11):	lda	vp, 40(vp)
-	lda	up, -24(up)
-	lda	rp, 16(rp)
-	br	r31, L(lo3)
-
-L(b01):	lda	n, -4(n)
-	ble	n, L(end)
-	lda	vp, 24(vp)
-	lda	up, -8(up)
-
-	ALIGN(16)
-L(top):	s4addq	v0, r3, sl	C combined vlimb
-	ldq	v1, -16(vp)
-	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
-	ldq	u1, 16(up)
-	srl	v0, 62, r2	C high v bits
-	ADDSUB	ps, cy1, rr	C consume carry from previous operation
-	CARRY(	ps, u0, cy0)	C carry out #2
-	stq	rr, 0(rp)
-	CARRY(	rr, ps, cy)	C carry out #3
-	lda	vp, 32(vp)	C bookkeeping
-	addq	cy, cy0, cy0	C final carry out
-	s4addq	v1, r2, sl
-L(lo0):	ldq	v0, -40(vp)
-	ADDSUB	u1, sl, ps
-	ldq	u0, 24(up)
-	srl	v1, 62, r3
-	ADDSUB	ps, cy0, rr
-	CARRY(	ps, u1, cy1)
-	stq	rr, 8(rp)
-	CARRY(	rr, ps, cy)
-	lda	rp, 32(rp)	C bookkeeping
-	addq	cy, cy1, cy1
-L(lo3):	s4addq	v0, r3, sl
-	ldq	v1, -32(vp)
-	ADDSUB	u0, sl, ps
-	ldq	u1, 32(up)
-	srl	v0, 62, r2
-	ADDSUB	ps, cy1, rr
-	CARRY(	ps, u0, cy0)
-	stq	rr, -16(rp)
-	CARRY(	rr, ps, cy)
-	lda	up, 32(up)	C bookkeeping
-	addq	cy, cy0, cy0
-L(lo2):	s4addq	v1, r2, sl
-	ldq	v0, -24(vp)
-	ADDSUB	u1, sl, ps
-	ldq	u0, 8(up)
-	srl	v1, 62, r3
-	ADDSUB	ps, cy0, rr
-	CARRY(	ps, u1, cy1)
-	stq	rr, -8(rp)
-	CARRY(	rr, ps, cy)
-	lda	n, -4(n)	C bookkeeping
-	addq	cy, cy1, cy1
-	bgt	n, L(top)
-
-L(end):	s4addq	v0, r3, sl
-	ADDSUB	u0, sl, ps
-	srl	v0, 62, r2
-	ADDSUB	ps, cy1, rr
-	CARRY(	ps, u0, cy0)
-	stq	rr, 0(rp)
-	CARRY(	rr, ps, cy)
-	addq	cy, cy0, cy0
-	addq	cy0, r2, r0
-
-	ret	r31,(r26),1
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/alpha/bdiv_dbm1c.asm b/gmp/mpn/alpha/bdiv_dbm1c.asm
index 472966ca98..e5f11dbf48 100644
--- a/gmp/mpn/alpha/bdiv_dbm1c.asm
+++ b/gmp/mpn/alpha/bdiv_dbm1c.asm
@@ -3,30 +3,19 @@ dnl  Alpha mpn_bdiv_dbm1c.
 dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/cntlz.asm b/gmp/mpn/alpha/cntlz.asm
index 25af19b131..2bfd923e5e 100644
--- a/gmp/mpn/alpha/cntlz.asm
+++ b/gmp/mpn/alpha/cntlz.asm
@@ -3,30 +3,19 @@ dnl  Alpha auxiliary for longlong.h's count_leading_zeros
 dnl  Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/copyd.asm b/gmp/mpn/alpha/copyd.asm
index b41b5366cc..ba8fa1c633 100644
--- a/gmp/mpn/alpha/copyd.asm
+++ b/gmp/mpn/alpha/copyd.asm
@@ -3,30 +3,19 @@ dnl  Alpha mpn_copyd -- copy, decrementing.
 dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/copyi.asm b/gmp/mpn/alpha/copyi.asm
index f7e2ad6f6a..425804127e 100644
--- a/gmp/mpn/alpha/copyi.asm
+++ b/gmp/mpn/alpha/copyi.asm
@@ -3,30 +3,19 @@ dnl  Alpha mpn_copyi -- copy, incrementing.
 dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/default.m4 b/gmp/mpn/alpha/default.m4
index 8fe7c4e122..e7aae2eeea 100644
--- a/gmp/mpn/alpha/default.m4
+++ b/gmp/mpn/alpha/default.m4
@@ -3,33 +3,22 @@ divert(-1)
 dnl  m4 macros for alpha assembler (everywhere except unicos).
 
 
-dnl  Copyright 2000, 2002-2004, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Usage: ASM_START()
@@ -64,9 +53,8 @@ ifelse(`$2',noalign,,`	ALIGN(16)')
 	.globl	$1
 	.ent	$1
 $1:
-	.frame r30,0,r26,0
-ifelse(`$2',gp,`	ldgp	r29, 0(r27)
-`$'$1..ng:')
+ifelse(`$2',gp,`	ldgp	r29,0(r27)')
+	.frame r30,0,r26
 	.prologue ifelse(`$2',gp,1,0)')
 
 define(`EPILOGUE_cpu',
@@ -102,13 +90,12 @@ forloop(i,0,31,`defreg(`r'i,$i)')
 forloop(i,0,31,`deflit(`f'i,``$f''i)')
 
 
-dnl  Usage: DATASTART(name,align)  or  DATASTART(name)
+dnl  Usage: DATASTART(name)
 dnl         DATAEND()
 
 define(`DATASTART',
-m4_assert_numargs_range(1,2)
-`	RODATA
-	ALIGN(ifelse($#,1,2,$2))
+m4_assert_numargs(1)
+`	DATA
 $1:')
 define(`DATAEND',
 m4_assert_numargs(0)
@@ -117,7 +104,7 @@ m4_assert_numargs(0)
 dnl  Load a symbolic address into a register
 define(`LEA',
 m4_assert_numargs(2)
-`lda	$1, $2')
+`lda   $1,  $2')
 
 dnl  Usage: ASM_END()
 define(`ASM_END',
diff --git a/gmp/mpn/alpha/dive_1.c b/gmp/mpn/alpha/dive_1.c
index 88b82db2f7..a915c58a9e 100644
--- a/gmp/mpn/alpha/dive_1.c
+++ b/gmp/mpn/alpha/dive_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2003 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/alpha/ev5/diveby3.asm b/gmp/mpn/alpha/diveby3.asm
index 3758188e02..e2d1c6beee 100644
--- a/gmp/mpn/alpha/ev5/diveby3.asm
+++ b/gmp/mpn/alpha/diveby3.asm
@@ -1,42 +1,32 @@
 dnl  Alpha mpn_divexact_by3c -- mpn division by 3, expecting no remainder.
 
-dnl  Copyright 2004, 2005, 2009 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
 C EV4:    22
 C EV5:    11.5
-C EV6:     6.3		Note that mpn_bdiv_dbm1c is faster
+C EV6:     6.3
 
 C TODO
-C  * Remove the unops, they benefit just ev6, which no longer uses this file.
+C  * Trim this to 6.0 c/l for ev6.
+C  * Write special ev5 version, should reach 9 c/l, and could be smaller.
 C  * Try prefetch for destination, using lds.
 C  * Improve feed-in code, by moving initial mulq earlier; make initial load
 C    to u0/u0 to save some copying.
@@ -50,7 +40,7 @@ define(`cy',	`r19')
 
 ASM_START()
 
-DATASTART(L(LC),8)
+DATASTART(L(LC))
 	.quad	0xAAAAAAAAAAAAAAAB
 	.quad	0x5555555555555555
 	.quad	0xAAAAAAAAAAAAAAAA
diff --git a/gmp/mpn/alpha/divrem_2.asm b/gmp/mpn/alpha/divrem_2.asm
index 046b246a95..b68468bca0 100644
--- a/gmp/mpn/alpha/divrem_2.asm
+++ b/gmp/mpn/alpha/divrem_2.asm
@@ -1,32 +1,21 @@
 dnl  Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
 
-dnl  Copyright 2007, 2008, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -52,7 +41,8 @@ define(`un_param',	`r19')
 define(`dp',		`r20')
 
 ASM_START()
-PROLOGUE(mpn_divrem_2,gp)
+PROLOGUE(mpn_divrem_2)
+	ldgp	r29, 0(r27)
 	lda	r30, -80(r30)
 	stq	r26, 0(r30)
 	stq	r9, 8(r30)
@@ -90,7 +80,7 @@ L(L8):	stq	r3, 72(r30)
 	blt	r19, L(L10)
 	bis	r31, r12, r16
 	jsr	r26, mpn_invert_limb
-	LDGP(	r29, 0(r26))
+	ldgp	r29, 0(r26)
 	mulq	r0, r12, r4		C t0 = LO(di * d1)
 	umulh	r0, r10, r2		C s1 = HI(di * d0)
 	addq	r4, r10, r4		C t0 += d0
diff --git a/gmp/mpn/alpha/ev5/add_n.asm b/gmp/mpn/alpha/ev5/add_n.asm
new file mode 100644
index 0000000000..626e713ccb
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/add_n.asm
@@ -0,0 +1,146 @@
+dnl  Alpha EV5 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     4.75
+C EV6:     3
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  s2_ptr	r18
+dnl  size	r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	bis	r31,r31,r25		C clear cy
+	subq	r19,4,r19		C decr loop cnt
+	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	ldq	r1,8(r18)
+	ldq	r5,8(r17)
+	addq	r17,32,r17		C update s1_ptr
+	ldq	r2,16(r18)
+	addq	r0,r4,r20		C 1st main add
+	ldq	r3,24(r18)
+	subq	r19,4,r19		C decr loop cnt
+	ldq	r6,-16(r17)
+	cmpult	r20,r0,r25		C compute cy from last add
+	ldq	r7,-8(r17)
+	addq	r1,r5,r28		C 2nd main add
+	addq	r18,32,r18		C update s2_ptr
+	addq	r28,r25,r21		C 2nd carry add
+	cmpult	r28,r5,r8		C compute cy from last add
+	blt	r19,$Lend1		C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+	ALIGN(16)
+$Loop:	cmpult	r21,r28,r25		C compute cy from last add
+	ldq	r0,0(r18)
+	bis	r8,r25,r25		C combine cy from the two adds
+	ldq	r1,8(r18)
+	addq	r2,r6,r28		C 3rd main add
+	ldq	r4,0(r17)
+	addq	r28,r25,r22		C 3rd carry add
+	ldq	r5,8(r17)
+	cmpult	r28,r6,r8		C compute cy from last add
+	cmpult	r22,r28,r25		C compute cy from last add
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two adds
+	stq	r21,8(r16)
+	addq	r3,r7,r28		C 4th main add
+	addq	r28,r25,r23		C 4th carry add
+	cmpult	r28,r7,r8		C compute cy from last add
+	cmpult	r23,r28,r25		C compute cy from last add
+		addq	r17,32,r17		C update s1_ptr
+	bis	r8,r25,r25		C combine cy from the two adds
+		addq	r16,32,r16		C update res_ptr
+	addq	r0,r4,r28		C 1st main add
+	ldq	r2,16(r18)
+	addq	r25,r28,r20		C 1st carry add
+	ldq	r3,24(r18)
+	cmpult	r28,r4,r8		C compute cy from last add
+	ldq	r6,-16(r17)
+	cmpult	r20,r28,r25		C compute cy from last add
+	ldq	r7,-8(r17)
+	bis	r8,r25,r25		C combine cy from the two adds
+	subq	r19,4,r19		C decr loop cnt
+	stq	r22,-16(r16)
+	addq	r1,r5,r28		C 2nd main add
+	stq	r23,-8(r16)
+	addq	r25,r28,r21		C 2nd carry add
+		addq	r18,32,r18		C update s2_ptr
+	cmpult	r28,r5,r8		C compute cy from last add
+	bge	r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1:	cmpult	r21,r28,r25		C compute cy from last add
+	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r2,r6,r28		C 3rd main add
+	addq	r28,r25,r22		C 3rd carry add
+	cmpult	r28,r6,r8		C compute cy from last add
+	cmpult	r22,r28,r25		C compute cy from last add
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two adds
+	stq	r21,8(r16)
+	addq	r3,r7,r28		C 4th main add
+	addq	r28,r25,r23		C 4th carry add
+	cmpult	r28,r7,r8		C compute cy from last add
+	cmpult	r23,r28,r25		C compute cy from last add
+	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r16,32,r16		C update res_ptr
+	stq	r22,-16(r16)
+	stq	r23,-8(r16)
+$Lend2:	addq	r19,4,r19		C restore loop cnt
+	beq	r19,$Lret
+C Start software pipeline for 2nd loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	subq	r19,1,r19
+	beq	r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+	ALIGN(16)
+$Loop0:	addq	r0,r4,r28		C main add
+	ldq	r0,8(r18)
+	cmpult	r28,r4,r8		C compute cy from last add
+	ldq	r4,8(r17)
+	addq	r28,r25,r20		C carry add
+	addq	r18,8,r18
+	addq	r17,8,r17
+	stq	r20,0(r16)
+	cmpult	r20,r28,r25		C compute cy from last add
+	subq	r19,1,r19		C decr loop cnt
+	bis	r8,r25,r25		C combine cy from the two adds
+	addq	r16,8,r16
+	bne	r19,$Loop0
+$Lend0:	addq	r0,r4,r28		C main add
+	addq	r28,r25,r20		C carry add
+	cmpult	r28,r4,r8		C compute cy from last add
+	cmpult	r20,r28,r25		C compute cy from last add
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two adds
+
+$Lret:	bis	r25,r31,r0		C return cy
+	ret	r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/gmp/mpn/alpha/com.asm b/gmp/mpn/alpha/ev5/com_n.asm
index f084ab5e96..979e711eb8 100644
--- a/gmp/mpn/alpha/com.asm
+++ b/gmp/mpn/alpha/ev5/com_n.asm
@@ -1,32 +1,21 @@
-dnl  Alpha mpn_com -- mpn one's complement.
+dnl  Alpha EV5 mpn_com_n -- mpn one's complement.
 
 dnl  Copyright 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -37,7 +26,7 @@ C EV5:    2.0
 C EV6:    1.5
 
 
-C mp_limb_t mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C
 C For ev5 the main loop is 7 cycles plus 1 taken branch bubble, for a total
 C 2.0 c/l.  In general, a pattern like this unrolled to N limbs per loop
@@ -71,7 +60,7 @@ FLOAT64(L(dat), 2.0)
 
 	ALIGN(16)
 
-PROLOGUE(mpn_com,gp)
+PROLOGUE(mpn_com_n,gp)
 
 	C r16	dst
 	C r17	src
diff --git a/gmp/mpn/alpha/ev5/gmp-mparam.h b/gmp/mpn/alpha/ev5/gmp-mparam.h
index b560c20afe..cbedd4f173 100644
--- a/gmp/mpn/alpha/ev5/gmp-mparam.h
+++ b/gmp/mpn/alpha/ev5/gmp-mparam.h
@@ -1,187 +1,81 @@
 /* Alpha EV5 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2008-2010, 2014 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
 
 /* 600 MHz 21164A */
-/* FFT tuning limit = 5000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        22
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
-#define USE_PREINV_DIVREM_1                  1  /* preinv always */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           76
-
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                50
-#define MUL_TOOM44_THRESHOLD               118
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               236
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      77
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      70
-
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 22
-#define SQR_TOOM3_THRESHOLD                 73
-#define SQR_TOOM4_THRESHOLD                178
-#define SQR_TOOM6_THRESHOLD                  0  /* always */
-#define SQR_TOOM8_THRESHOLD                260
-
-#define MULMID_TOOM42_THRESHOLD             18
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD               12
-
-#define MUL_FFT_MODF_THRESHOLD             284  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    284, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {      8, 6}, {     17, 7}, {     13, 8}, \
-    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     63, 8}, \
-    {    255, 7}, {    511,10}, {     71, 9}, {    143, 8}, \
-    {    287, 7}, {    575, 9}, {    159, 8}, {    319,11}, \
-    {     47,12}, {     31,11}, {     63, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    175, 9}, {    351, 8}, \
-    {    703,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415,12}, {     63,10}, {    255,11}, {    143,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
-    {    639,11}, {    175,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,10}, {    415,11}, {    223,13}, \
-    {     63,11}, {    287,10}, {    575,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,12}, {    191,11}, \
-    {    415,12}, {    223,11}, {    447,10}, {    895,11}, \
-    {    479,12}, {    287,11}, {    575,12}, {    351,13}, \
-    {    191,12}, {    479,13}, {    255,12}, {    575,13}, \
-    {    319,12}, {    703,13}, {    383,12}, {    831,13}, \
-    {    447,14}, {    255,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 121
-#define MUL_FFT_THRESHOLD                 4224
-
-#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    240, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     14, 5}, {     29, 7}, {      9, 6}, {     19, 7}, \
-    {     13, 6}, {     27, 8}, {      7, 7}, {     21, 8}, \
-    {     11, 7}, {     29, 8}, {     19, 9}, {     11, 8}, \
-    {     27,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
-    {     19, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,12}, {     31,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287,11}, {     79,10}, {    159, 9}, {    319,10}, \
-    {    175,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207, 9}, {    415,11}, {    111,10}, {    223,12}, \
-    {     63,11}, {    175,12}, {     95,11}, {    207,13}, \
-    {     63,12}, {    127,11}, {    287,12}, {    159,11}, \
-    {    351,12}, {    191,11}, {    415,12}, {    223,11}, \
-    {    447,13}, {    127,12}, {    351,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
-    {    447,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,13}, {    319,12}, {    703,13}, \
-    {    383,12}, {    831,13}, {    447,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 105
-#define SQR_FFT_THRESHOLD                 3968
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  45
-#define MULLO_MUL_N_THRESHOLD             8397
-
-#define DC_DIV_QR_THRESHOLD                 47
-#define DC_DIVAPPR_Q_THRESHOLD             168
-#define DC_BDIV_QR_THRESHOLD                47
-#define DC_BDIV_Q_THRESHOLD                110
-
-#define INV_MULMOD_BNM1_THRESHOLD           26
-#define INV_NEWTON_THRESHOLD               189
-#define INV_APPR_THRESHOLD                 181
-
-#define BINV_NEWTON_THRESHOLD              196
-#define REDC_1_TO_REDC_N_THRESHOLD          51
-
-#define MU_DIV_QR_THRESHOLD               1558
-#define MU_DIVAPPR_Q_THRESHOLD            1558
-#define MUPI_DIV_QR_THRESHOLD               90
-#define MU_BDIV_QR_THRESHOLD               855
-#define MU_BDIV_Q_THRESHOLD               1078
-
-#define POWM_SEC_TABLE  1,16,90,452,1221
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                      99
-#define HGCD_APPR_THRESHOLD                103
-#define HGCD_REDUCE_THRESHOLD             2899
-#define GCD_DC_THRESHOLD                   283
-#define GCDEXT_DC_THRESHOLD                201
-#define JACOBI_BASE_METHOD                   3
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               426
-#define SET_STR_PRECOMPUTE_THRESHOLD      1505
-
-#define FAC_DSC_THRESHOLD                 1404
-#define FAC_ODD_THRESHOLD                    0  /* always */
+
+/* Generated by tuneup.c, 2009-01-15, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          14
+#define MUL_TOOM3_THRESHOLD              74
+#define MUL_TOOM44_THRESHOLD            118
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_KARATSUBA_THRESHOLD          28
+#define SQR_TOOM3_THRESHOLD              77
+#define SQR_TOOM4_THRESHOLD             136
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              44
+#define MULLOW_MUL_N_THRESHOLD          246
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                 53
+#define POWM_THRESHOLD                   85
+
+#define MATRIX22_STRASSEN_THRESHOLD      17
+#define HGCD_THRESHOLD                  104
+#define GCD_DC_THRESHOLD                321
+#define GCDEXT_DC_THRESHOLD             298
+#define JACOBI_BASE_METHOD                3
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                13
+#define MOD_1_2_THRESHOLD                14
+#define MOD_1_4_THRESHOLD                16
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             20
+#define GET_STR_PRECOMPUTE_THRESHOLD     32
+#define SET_STR_DC_THRESHOLD            532
+#define SET_STR_PRECOMPUTE_THRESHOLD   1501
+
+#define MUL_FFT_TABLE  { 240, 480, 1344, 1792, 5120, 20480, 81920, 196608, 0 }
+#define MUL_FFT_MODF_THRESHOLD          240
+#define MUL_FFT_THRESHOLD              1920
+
+#define SQR_FFT_TABLE  { 240, 480, 1216, 1792, 5120, 12288, 81920, 196608, 0 }
+#define SQR_FFT_MODF_THRESHOLD          208
+#define SQR_FFT_THRESHOLD              1408
+
+/* These tables need to be updated.  */
+
+#define MUL_FFT_TABLE2 {{1, 4}, {177, 5}, {193, 4}, {209, 5}, {353, 6}, {385, 5}, {417, 6}, {833, 7}, {897, 6}, {961, 7}, {1025, 6}, {1089, 7}, {1665, 8}, {1793, 7}, {2177, 8}, {2305, 7}, {2433, 8}, {2817, 7}, {2945, 8}, {3329, 9}, {3457, 8}, {4865, 9}, {5633, 8}, {6401, 10}, {7169, 9}, {11777, 10}, {12801, 9}, {13825, 10}, {15361, 9}, {19969, 10}, {23553, 9}, {24065, 11}, {30721, 10}, {48129, 11}, {63489, 10}, {72705, 11}, {96257, 12}, {126977, 11}, {194561, 12}, {258049, 11}, {325633, 12}, {389121, 13}, {516097, 12}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1, 4}, {177, 5}, {193, 4}, {209, 5}, {353, 6}, {385, 5}, {417, 6}, {961, 7}, {1025, 6}, {1089, 7}, {1153, 6}, {1217, 7}, {1665, 8}, {1793, 7}, {2177, 8}, {2305, 7}, {2561, 8}, {2817, 7}, {2945, 8}, {3329, 9}, {3585, 8}, {5377, 9}, {5633, 8}, {6401, 9}, {6657, 10}, {6913, 9}, {11777, 10}, {13313, 9}, {13825, 10}, {15361, 9}, {18945, 10}, {19457, 9}, {19969, 10}, {23553, 9}, {24065, 11}, {30721, 10}, {48129, 11}, {53249, 10}, {56321, 11}, {63489, 10}, {72705, 11}, {73729, 10}, {79873, 11}, {96257, 12}, {126977, 11}, {194561, 12}, {258049, 11}, {325633, 12}, {389121, 13}, {516097, 12}, {1699841, 13}, {1708033, 12}, {1732609, 13}, {1748993, 12}, {1757185, 13}, {1773569, 12}, {1777665, 13}, {1781761, 12}, {1789953, 13}, {1806337, 12}, {1818625, 13}, {1822721, 12}, {1826817, 13}, {1830913, 12}, {1961985, 13}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/alpha/ev5/lshift.asm b/gmp/mpn/alpha/ev5/lshift.asm
new file mode 100644
index 0000000000..04385d3484
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/lshift.asm
@@ -0,0 +1,171 @@
+dnl  Alpha EV5 mpn_lshift -- Shift a number left.
+
+dnl  Copyright 1994, 1995, 2000, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     3.25
+C EV6:     1.75
+
+C  INPUT PARAMETERS
+C  rp	r16
+C  up	r17
+C  n	r18
+C  cnt	r19
+
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+	s8addq	r18,r17,r17	C make r17 point at end of s1
+	ldq	r4,-8(r17)	C load first limb
+	subq	r31,r19,r20
+	s8addq	r18,r16,r16	C make r16 point at end of RES
+	subq	r18,1,r18
+	and	r18,4-1,r28	C number of limbs in first loop
+	srl	r4,r20,r0	C compute function result
+
+	beq	r28,$L0
+	subq	r18,r28,r18
+
+	ALIGN(8)
+$Loop0:	ldq	r3,-16(r17)
+	subq	r16,8,r16
+	sll	r4,r19,r5
+	subq	r17,8,r17
+	subq	r28,1,r28
+	srl	r3,r20,r6
+	bis	r3,r3,r4
+	bis	r5,r6,r8
+	stq	r8,0(r16)
+	bne	r28,$Loop0
+
+$L0:	sll	r4,r19,r24
+	beq	r18,$Lend
+C warm up phase 1
+	ldq	r1,-16(r17)
+	subq	r18,4,r18
+	ldq	r2,-24(r17)
+	ldq	r3,-32(r17)
+	ldq	r4,-40(r17)
+	beq	r18,$Lend1
+C warm up phase 2
+	srl	r1,r20,r7
+	sll	r1,r19,r21
+	srl	r2,r20,r8
+	ldq	r1,-48(r17)
+	sll	r2,r19,r22
+	ldq	r2,-56(r17)
+	srl	r3,r20,r5
+	bis	r7,r24,r7
+	sll	r3,r19,r23
+	bis	r8,r21,r8
+	srl	r4,r20,r6
+	ldq	r3,-64(r17)
+	sll	r4,r19,r24
+	ldq	r4,-72(r17)
+	subq	r18,4,r18
+	beq	r18,$Lend2
+	ALIGN(16)
+C main loop
+$Loop:	stq	r7,-8(r16)
+	bis	r5,r22,r5
+	stq	r8,-16(r16)
+	bis	r6,r23,r6
+
+	srl	r1,r20,r7
+	subq	r18,4,r18
+	sll	r1,r19,r21
+	unop	C ldq	r31,-96(r17)
+
+	srl	r2,r20,r8
+	ldq	r1,-80(r17)
+	sll	r2,r19,r22
+	ldq	r2,-88(r17)
+
+	stq	r5,-24(r16)
+	bis	r7,r24,r7
+	stq	r6,-32(r16)
+	bis	r8,r21,r8
+
+	srl	r3,r20,r5
+	unop	C ldq	r31,-96(r17)
+	sll	r3,r19,r23
+	subq	r16,32,r16
+
+	srl	r4,r20,r6
+	ldq	r3,-96(r17)
+	sll	r4,r19,r24
+	ldq	r4,-104(r17)
+
+	subq	r17,32,r17
+	bne	r18,$Loop
+C cool down phase 2/1
+$Lend2:	stq	r7,-8(r16)
+	bis	r5,r22,r5
+	stq	r8,-16(r16)
+	bis	r6,r23,r6
+	srl	r1,r20,r7
+	sll	r1,r19,r21
+	srl	r2,r20,r8
+	sll	r2,r19,r22
+	stq	r5,-24(r16)
+	bis	r7,r24,r7
+	stq	r6,-32(r16)
+	bis	r8,r21,r8
+	srl	r3,r20,r5
+	sll	r3,r19,r23
+	srl	r4,r20,r6
+	sll	r4,r19,r24
+C cool down phase 2/2
+	stq	r7,-40(r16)
+	bis	r5,r22,r5
+	stq	r8,-48(r16)
+	bis	r6,r23,r6
+	stq	r5,-56(r16)
+	stq	r6,-64(r16)
+C cool down phase 2/3
+	stq	r24,-72(r16)
+	ret	r31,(r26),1
+
+C cool down phase 1/1
+$Lend1:	srl	r1,r20,r7
+	sll	r1,r19,r21
+	srl	r2,r20,r8
+	sll	r2,r19,r22
+	srl	r3,r20,r5
+	bis	r7,r24,r7
+	sll	r3,r19,r23
+	bis	r8,r21,r8
+	srl	r4,r20,r6
+	sll	r4,r19,r24
+C cool down phase 1/2
+	stq	r7,-8(r16)
+	bis	r5,r22,r5
+	stq	r8,-16(r16)
+	bis	r6,r23,r6
+	stq	r5,-24(r16)
+	stq	r6,-32(r16)
+	stq	r24,-40(r16)
+	ret	r31,(r26),1
+
+$Lend:	stq	r24,-8(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/gmp/mpn/alpha/ev5/rshift.asm b/gmp/mpn/alpha/ev5/rshift.asm
new file mode 100644
index 0000000000..0244da35a5
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/rshift.asm
@@ -0,0 +1,169 @@
+dnl  Alpha EV5 mpn_rshift -- Shift a number right.
+
+dnl  Copyright 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     3.25
+C EV6:     1.75
+
+C  INPUT PARAMETERS
+C  rp	r16
+C  up	r17
+C  n	r18
+C  cnt	r19
+
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+	ldq	r4,0(r17)	C load first limb
+	subq	r31,r19,r20
+	subq	r18,1,r18
+	and	r18,4-1,r28	C number of limbs in first loop
+	sll	r4,r20,r0	C compute function result
+
+	beq	r28,$L0
+	subq	r18,r28,r18
+
+	ALIGN(8)
+$Loop0:	ldq	r3,8(r17)
+	addq	r16,8,r16
+	srl	r4,r19,r5
+	addq	r17,8,r17
+	subq	r28,1,r28
+	sll	r3,r20,r6
+	bis	r3,r3,r4
+	bis	r5,r6,r8
+	stq	r8,-8(r16)
+	bne	r28,$Loop0
+
+$L0:	srl	r4,r19,r24
+	beq	r18,$Lend
+C warm up phase 1
+	ldq	r1,8(r17)
+	subq	r18,4,r18
+	ldq	r2,16(r17)
+	ldq	r3,24(r17)
+	ldq	r4,32(r17)
+	beq	r18,$Lend1
+C warm up phase 2
+	sll	r1,r20,r7
+	srl	r1,r19,r21
+	sll	r2,r20,r8
+	ldq	r1,40(r17)
+	srl	r2,r19,r22
+	ldq	r2,48(r17)
+	sll	r3,r20,r5
+	bis	r7,r24,r7
+	srl	r3,r19,r23
+	bis	r8,r21,r8
+	sll	r4,r20,r6
+	ldq	r3,56(r17)
+	srl	r4,r19,r24
+	ldq	r4,64(r17)
+	subq	r18,4,r18
+	beq	r18,$Lend2
+	ALIGN(16)
+C main loop
+$Loop:	stq	r7,0(r16)
+	bis	r5,r22,r5
+	stq	r8,8(r16)
+	bis	r6,r23,r6
+
+	sll	r1,r20,r7
+	subq	r18,4,r18
+	srl	r1,r19,r21
+	unop	C ldq	r31,-96(r17)
+
+	sll	r2,r20,r8
+	ldq	r1,72(r17)
+	srl	r2,r19,r22
+	ldq	r2,80(r17)
+
+	stq	r5,16(r16)
+	bis	r7,r24,r7
+	stq	r6,24(r16)
+	bis	r8,r21,r8
+
+	sll	r3,r20,r5
+	unop	C ldq	r31,-96(r17)
+	srl	r3,r19,r23
+	addq	r16,32,r16
+
+	sll	r4,r20,r6
+	ldq	r3,88(r17)
+	srl	r4,r19,r24
+	ldq	r4,96(r17)
+
+	addq	r17,32,r17
+	bne	r18,$Loop
+C cool down phase 2/1
+$Lend2:	stq	r7,0(r16)
+	bis	r5,r22,r5
+	stq	r8,8(r16)
+	bis	r6,r23,r6
+	sll	r1,r20,r7
+	srl	r1,r19,r21
+	sll	r2,r20,r8
+	srl	r2,r19,r22
+	stq	r5,16(r16)
+	bis	r7,r24,r7
+	stq	r6,24(r16)
+	bis	r8,r21,r8
+	sll	r3,r20,r5
+	srl	r3,r19,r23
+	sll	r4,r20,r6
+	srl	r4,r19,r24
+C cool down phase 2/2
+	stq	r7,32(r16)
+	bis	r5,r22,r5
+	stq	r8,40(r16)
+	bis	r6,r23,r6
+	stq	r5,48(r16)
+	stq	r6,56(r16)
+C cool down phase 2/3
+	stq	r24,64(r16)
+	ret	r31,(r26),1
+
+C cool down phase 1/1
+$Lend1:	sll	r1,r20,r7
+	srl	r1,r19,r21
+	sll	r2,r20,r8
+	srl	r2,r19,r22
+	sll	r3,r20,r5
+	bis	r7,r24,r7
+	srl	r3,r19,r23
+	bis	r8,r21,r8
+	sll	r4,r20,r6
+	srl	r4,r19,r24
+C cool down phase 1/2
+	stq	r7,0(r16)
+	bis	r5,r22,r5
+	stq	r8,8(r16)
+	bis	r6,r23,r6
+	stq	r5,16(r16)
+	stq	r6,24(r16)
+	stq	r24,32(r16)
+	ret	r31,(r26),1
+
+$Lend:	stq	r24,0(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/gmp/mpn/alpha/ev5/sub_n.asm b/gmp/mpn/alpha/ev5/sub_n.asm
new file mode 100644
index 0000000000..2c25fad400
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/sub_n.asm
@@ -0,0 +1,146 @@
+dnl  Alpha EV5 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl  and store difference in a third limb vector.
+
+dnl  Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     ?
+C EV5:     4.75
+C EV6:     3
+
+dnl  INPUT PARAMETERS
+dnl  res_ptr	r16
+dnl  s1_ptr	r17
+dnl  s2_ptr	r18
+dnl  size	r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	bis	r31,r31,r25		C clear cy
+	subq	r19,4,r19		C decr loop cnt
+	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	ldq	r1,8(r18)
+	ldq	r5,8(r17)
+	addq	r17,32,r17		C update s1_ptr
+	ldq	r2,16(r18)
+	subq	r4,r0,r20		C 1st main subtract
+	ldq	r3,24(r18)
+	subq	r19,4,r19		C decr loop cnt
+	ldq	r6,-16(r17)
+	cmpult	r4,r0,r25		C compute cy from last subtract
+	ldq	r7,-8(r17)
+	subq	r5,r1,r28		C 2nd main subtract
+	addq	r18,32,r18		C update s2_ptr
+	subq	r28,r25,r21		C 2nd carry subtract
+	cmpult	r5,r1,r8		C compute cy from last subtract
+	blt	r19,$Lend1		C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+	ALIGN(16)
+$Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
+	ldq	r0,0(r18)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	ldq	r1,8(r18)
+	subq	r6,r2,r28		C 3rd main subtract
+	ldq	r4,0(r17)
+	subq	r28,r25,r22		C 3rd carry subtract
+	ldq	r5,8(r17)
+	cmpult	r6,r2,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	stq	r21,8(r16)
+	subq	r7,r3,r28		C 4th main subtract
+	subq	r28,r25,r23		C 4th carry subtract
+	cmpult	r7,r3,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+		addq	r17,32,r17		C update s1_ptr
+	bis	r8,r25,r25		C combine cy from the two subtracts
+		addq	r16,32,r16		C update res_ptr
+	subq	r4,r0,r28		C 1st main subtract
+	ldq	r2,16(r18)
+	subq	r28,r25,r20		C 1st carry subtract
+	ldq	r3,24(r18)
+	cmpult	r4,r0,r8		C compute cy from last subtract
+	ldq	r6,-16(r17)
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	ldq	r7,-8(r17)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	subq	r19,4,r19		C decr loop cnt
+	stq	r22,-16(r16)
+	subq	r5,r1,r28		C 2nd main subtract
+	stq	r23,-8(r16)
+	subq	r28,r25,r21		C 2nd carry subtract
+		addq	r18,32,r18		C update s2_ptr
+	cmpult	r5,r1,r8		C compute cy from last subtract
+	bge	r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	subq	r6,r2,r28		C cy add
+	subq	r28,r25,r22		C 3rd main subtract
+	cmpult	r6,r2,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	stq	r21,8(r16)
+	subq	r7,r3,r28		C cy add
+	subq	r28,r25,r23		C 4th main subtract
+	cmpult	r7,r3,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	addq	r16,32,r16		C update res_ptr
+	stq	r22,-16(r16)
+	stq	r23,-8(r16)
+$Lend2:	addq	r19,4,r19		C restore loop cnt
+	beq	r19,$Lret
+C Start software pipeline for 2nd loop
+	ldq	r0,0(r18)
+	ldq	r4,0(r17)
+	subq	r19,1,r19
+	beq	r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+	ALIGN(16)
+$Loop0:	subq	r4,r0,r28		C main subtract
+	cmpult	r4,r0,r8		C compute cy from last subtract
+	ldq	r0,8(r18)
+	ldq	r4,8(r17)
+	subq	r28,r25,r20		C carry subtract
+	addq	r18,8,r18
+	addq	r17,8,r17
+	stq	r20,0(r16)
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	subq	r19,1,r19		C decr loop cnt
+	bis	r8,r25,r25		C combine cy from the two subtracts
+	addq	r16,8,r16
+	bne	r19,$Loop0
+$Lend0:	subq	r4,r0,r28		C main subtract
+	subq	r28,r25,r20		C carry subtract
+	cmpult	r4,r0,r8		C compute cy from last subtract
+	cmpult	r28,r25,r25		C compute cy from last subtract
+	stq	r20,0(r16)
+	bis	r8,r25,r25		C combine cy from the two subtracts
+
+$Lret:	bis	r25,r31,r0		C return cy
+	ret	r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/gmp/mpn/alpha/ev6/add_n.asm b/gmp/mpn/alpha/ev6/add_n.asm
index 9261f31b8a..114af73aa0 100644
--- a/gmp/mpn/alpha/ev6/add_n.asm
+++ b/gmp/mpn/alpha/ev6/add_n.asm
@@ -4,30 +4,19 @@ dnl  store sum in a third limb vector.
 dnl  Copyright 2000, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev6/aorslsh1_n.asm b/gmp/mpn/alpha/ev6/aorslsh1_n.asm
deleted file mode 100644
index cb966ce021..0000000000
--- a/gmp/mpn/alpha/ev6/aorslsh1_n.asm
+++ /dev/null
@@ -1,172 +0,0 @@
-dnl  Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
-
-dnl  Copyright 2003, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C      cycles/limb
-C EV4:     ?
-C EV5:     7
-C EV6:     4
-
-C TODO
-C  * Tune to reach 3.75 c/l on ev6.
-
-define(`rp',`r16')
-define(`up',`r17')
-define(`vp',`r18')
-define(`n', `r19')
-
-define(`u0', `r8')
-define(`u1', `r1')
-define(`v0', `r4')
-define(`v1', `r5')
-
-define(`cy0', `r0')
-define(`cy1', `r20')
-define(`cy', `r22')
-define(`rr', `r24')
-define(`ps', `r25')
-define(`sl', `r28')
-
-ifdef(`OPERATION_addlsh1_n',`
-  define(ADDSUB,       addq)
-  define(CARRY,       `cmpult $1,$2,$3')
-  define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_sublsh1_n',`
-  define(ADDSUB,       subq)
-  define(CARRY,       `cmpult $2,$1,$3')
-  define(func, mpn_sublsh1_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-PROLOGUE(func)
-	and	n, 2, cy0
-	blbs	n, L(bx1)
-L(bx0):	ldq	v1, 0(vp)
-	ldq	u1, 0(up)
-	lda	r2, 0(r31)
-	bne	cy0, L(b10)
-
-L(b00):	lda	vp, 48(vp)
-	lda	up, -16(up)
-	lda	rp, -8(rp)
-	lda	cy0, 0(r31)
-	br	r31, L(lo0)
-
-L(b10):	lda	vp, 32(vp)
-	lda	rp, 8(rp)
-	lda	cy0, 0(r31)
-	br	r31, L(lo2)
-
-L(bx1):	ldq	v0, 0(vp)
-	ldq	u0, 0(up)
-	lda	r3, 0(r31)
-	beq	cy0, L(b01)
-
-L(b11):	lda	vp, 40(vp)
-	lda	up, -24(up)
-	lda	rp, 16(rp)
-	lda	cy1, 0(r31)
-	br	r31, L(lo3)
-
-L(b01):	lda	n, -4(n)
-	lda	cy1, 0(r31)
-	ble	n, L(end)
-	lda	vp, 24(vp)
-	lda	up, -8(up)
-
-	ALIGN(16)
-L(top):	addq	v0, v0, r6
-	ldq	v1, -16(vp)
-	addq	r6, r3, sl	C combined vlimb
-	ldq	u1, 16(up)
-	ADDSUB	u0, sl, ps	C ulimb + (vlimb << 1)
-	cmplt	v0, r31, r2	C high v bits
-	ADDSUB	ps, cy1, rr	C consume carry from previous operation
-	CARRY(	ps, u0, cy0)	C carry out #2
-	stq	rr, 0(rp)
-	CARRY(	rr, ps, cy)	C carry out #3
-	lda	vp, 32(vp)	C bookkeeping
-	addq	cy, cy0, cy0	C final carry out
-L(lo0):	addq	v1, v1, r7
-	ldq	v0, -40(vp)
-	addq	r7, r2, sl
-	ldq	u0, 24(up)
-	ADDSUB	u1, sl, ps
-	cmplt	v1, r31, r3
-	ADDSUB	ps, cy0, rr
-	CARRY(	ps, u1, cy1)
-	stq	rr, 8(rp)
-	CARRY(	rr, ps, cy)
-	lda	rp, 32(rp)	C bookkeeping
-	addq	cy, cy1, cy1
-L(lo3):	addq	v0, v0, r6
-	ldq	v1, -32(vp)
-	addq	r6, r3, sl
-	ldq	u1, 32(up)
-	ADDSUB	u0, sl, ps
-	cmplt	v0, r31, r2
-	ADDSUB	ps, cy1, rr
-	CARRY(	ps, u0, cy0)
-	stq	rr, -16(rp)
-	CARRY(	rr, ps, cy)
-	lda	up, 32(up)	C bookkeeping
-	addq	cy, cy0, cy0
-L(lo2):	addq	v1, v1, r7
-	ldq	v0, -24(vp)
-	addq	r7, r2, sl
-	ldq	u0, 8(up)
-	ADDSUB	u1, sl, ps
-	cmplt	v1, r31, r3
-	ADDSUB	ps, cy0, rr
-	CARRY(	ps, u1, cy1)
-	stq	rr, -8(rp)
-	CARRY(	rr, ps, cy)
-	lda	n, -4(n)	C bookkeeping
-	addq	cy, cy1, cy1
-	bgt	n, L(top)
-
-L(end):	addq	v0, v0, r6
-	addq	r6, r3, sl
-	ADDSUB	u0, sl, ps
-	cmplt	v0, r31, r2
-	ADDSUB	ps, cy1, rr
-	CARRY(	ps, u0, cy0)
-	stq	rr, 0(rp)
-	CARRY(	rr, ps, cy)
-	addq	cy, cy0, cy0
-	addq	cy0, r2, r0
-
-	ret	r31,(r26),1
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/alpha/ev6/aorsmul_1.asm b/gmp/mpn/alpha/ev6/aorsmul_1.asm
index 0e68e6e7ad..eda092b2d5 100644
--- a/gmp/mpn/alpha/ev6/aorsmul_1.asm
+++ b/gmp/mpn/alpha/ev6/aorsmul_1.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 mpn_addmul_1 and mpn_submul_1.
 
-dnl  Copyright 2000, 2003-2005, 2008 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev6/gmp-mparam.h b/gmp/mpn/alpha/ev6/gmp-mparam.h
index e51d6b0d15..a01e977433 100644
--- a/gmp/mpn/alpha/ev6/gmp-mparam.h
+++ b/gmp/mpn/alpha/ev6/gmp-mparam.h
@@ -1,209 +1,76 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2008-2010, 2014 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 #define DIVEXACT_BY3_METHOD 0	/* override ../diveby3.asm */
 
-/* 500 MHz 21164 (agnesi.math.su.se) */
-/* FFT tuning limit = 20000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 3.3 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        21
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
-#define USE_PREINV_DIVREM_1                  1  /* preinv always */
-#define DIV_QR_1N_PI1_METHOD                 2
-#define DIV_QR_1_NORM_THRESHOLD              5
-#define DIV_QR_1_UNNORM_THRESHOLD            1
-#define DIV_QR_2_PI2_THRESHOLD               8
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           20
-
-#define MUL_TOOM22_THRESHOLD                32
-#define MUL_TOOM33_THRESHOLD               117
-#define MUL_TOOM44_THRESHOLD               124
-#define MUL_TOOM6H_THRESHOLD               230
-#define MUL_TOOM8H_THRESHOLD               357
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     107
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      88
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     105
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     136
-
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 59
-#define SQR_TOOM3_THRESHOLD                123
-#define SQR_TOOM4_THRESHOLD                163
-#define SQR_TOOM6_THRESHOLD                333
-#define SQR_TOOM8_THRESHOLD                  0  /* always */
-
-#define MULMID_TOOM42_THRESHOLD             52
-
-#define MULMOD_BNM1_THRESHOLD               19
-#define SQRMOD_BNM1_THRESHOLD                5
-
-#define MUL_FFT_MODF_THRESHOLD             468  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    468, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     19, 7}, {     10, 6}, \
-    {     24, 7}, {     13, 6}, {     27, 7}, {     14, 6}, \
-    {     29, 7}, {     17, 6}, {     35, 7}, {     29, 8}, \
-    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     29, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     51, 9}, {     27, 8}, {     55, 9}, {     35, 8}, \
-    {     71, 9}, {     39,10}, {     23, 9}, {     55,10}, \
-    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
-    {     47, 9}, {     95,10}, {     55,11}, {     31,10}, \
-    {     79,11}, {     47,10}, {    103,12}, {     31,11}, \
-    {     63,10}, {    135,11}, {     79,10}, {    167,11}, \
-    {     95,10}, {    199,11}, {    111,12}, {     63,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    271,10}, {    543,11}, {    287,10}, \
-    {    575,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
-    {    607,12}, {    319,11}, {    671,12}, {    351,11}, \
-    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    735,13}, {    383,12}, {    767,11}, \
-    {   1535,12}, {    831,13}, {    447,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
-    {    511,13}, {   1215,14}, {    639,13}, {   1407,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1855,15}, \
-    {    511,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 151
-#define MUL_FFT_THRESHOLD                 5760
-
-#define SQR_FFT_MODF_THRESHOLD             412  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    412, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     12, 5}, {     25, 6}, \
-    {     27, 7}, {     14, 6}, {     29, 7}, {     28, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     36, 8}, \
-    {     19, 7}, {     39, 8}, {     29, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     49, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,11}, {     79,10}, {    159, 9}, \
-    {    319,10}, {    167,11}, {     95,10}, {    191, 9}, \
-    {    383,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    271,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    303,11}, {    159,10}, {    319,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    335,10}, \
-    {    671,11}, {    351,10}, {    703,11}, {    367,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
-    {    607,12}, {    319,11}, {    639,10}, {   1279,11}, \
-    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
-    {    447,11}, {    895,12}, {    479,14}, {    127,13}, \
-    {    255,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    703,11}, {   1407,12}, {    735,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1151,13}, {    639,12}, {   1279,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    959,15}, {    255,14}, \
-    {    511,13}, {   1215,14}, {    639,13}, {   1407,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1791,15}, \
-    {    511,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 159
-#define SQR_FFT_THRESHOLD                 5056
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 100
-#define MULLO_MUL_N_THRESHOLD            11355
-
-#define DC_DIV_QR_THRESHOLD                124
-#define DC_DIVAPPR_Q_THRESHOLD             438
-#define DC_BDIV_QR_THRESHOLD               153
-#define DC_BDIV_Q_THRESHOLD                318
-
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD               384
-#define INV_APPR_THRESHOLD                 402
-
-#define BINV_NEWTON_THRESHOLD              381
-#define REDC_1_TO_REDC_N_THRESHOLD         110
-
-#define MU_DIV_QR_THRESHOLD               1752
-#define MU_DIVAPPR_Q_THRESHOLD            1895
-#define MUPI_DIV_QR_THRESHOLD              174
-#define MU_BDIV_QR_THRESHOLD              1387
-#define MU_BDIV_Q_THRESHOLD               1787
-
-#define POWM_SEC_TABLE  1,13,66,82,579
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     318
-#define HGCD_APPR_THRESHOLD                363
-#define HGCD_REDUCE_THRESHOLD             2384
-#define GCD_DC_THRESHOLD                  2504
-#define GCDEXT_DC_THRESHOLD                671
-#define JACOBI_BASE_METHOD                   3
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        25
-#define SET_STR_DC_THRESHOLD              3754
-#define SET_STR_PRECOMPUTE_THRESHOLD      8097
-
-#define FAC_DSC_THRESHOLD                  951
-#define FAC_ODD_THRESHOLD                   24
+/* 500 MHz 21164 */
+
+/* Generated by tuneup.c, 2009-01-12, gcc 3.3 */
+
+#define MUL_KARATSUBA_THRESHOLD          31
+#define MUL_TOOM3_THRESHOLD             101
+#define MUL_TOOM44_THRESHOLD            168
+
+#define SQR_BASECASE_THRESHOLD            6
+#define SQR_KARATSUBA_THRESHOLD          60
+#define SQR_TOOM3_THRESHOLD             102
+#define SQR_TOOM4_THRESHOLD             172
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD             102
+#define MULLOW_MUL_N_THRESHOLD          399
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                134
+#define POWM_THRESHOLD                  257
+
+#define MATRIX22_STRASSEN_THRESHOLD      19
+#define HGCD_THRESHOLD                  303
+#define GCD_DC_THRESHOLD               1258
+#define GCDEXT_DC_THRESHOLD             807
+#define JACOBI_BASE_METHOD                3
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                13
+#define MOD_1_2_THRESHOLD                14
+#define MOD_1_4_THRESHOLD                40
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             16
+#define GET_STR_PRECOMPUTE_THRESHOLD     23
+#define SET_STR_DC_THRESHOLD           4615
+#define SET_STR_PRECOMPUTE_THRESHOLD   8178
+
+#define MUL_FFT_TABLE  { 432, 864, 1856, 3840, 11264, 28672, 81920, 327680, 0 }
+#define MUL_FFT_MODF_THRESHOLD          448
+#define MUL_FFT_THRESHOLD              4992
+
+#define SQR_FFT_TABLE  { 432, 864, 1728, 3840, 9216, 20480, 81920, 327680, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD          344
+#define SQR_FFT_THRESHOLD              3712
diff --git a/gmp/mpn/alpha/ev6/mod_1_4.asm b/gmp/mpn/alpha/ev6/mod_1_4.asm
deleted file mode 100644
index 836de07c0f..0000000000
--- a/gmp/mpn/alpha/ev6/mod_1_4.asm
+++ /dev/null
@@ -1,337 +0,0 @@
-dnl Alpha mpn_mod_1s_4p
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO:
-C  * Optimise.  2.75 c/l should be possible.
-C  * Write a proper mpn_mod_1s_4p_cps.  The code below was compiler generated.
-C  * Optimise feed-in code, starting the sw pipeline in switch code.
-C  * Shorten software pipeline.  The mul instructions are scheduled too far
-C    from their users.  Fixing this will allow us to use fewer registers.
-C  * If we cannot reduce register usage, write perhaps small-n basecase.
-C  * Does this work for PIC?
-
-C      cycles/limb
-C EV4:     ?
-C EV5:    23
-C EV6:     3
-
-define(`ap',     `r16')
-define(`n',      `r17')
-define(`pl',     `r24')
-define(`ph',     `r25')
-define(`rl',     `r6')
-define(`rh',     `r7')
-define(`B1modb', `r1')
-define(`B2modb', `r2')
-define(`B3modb', `r3')
-define(`B4modb', `r4')
-define(`B5modb', `r5')
-
-ASM_START()
-PROLOGUE(mpn_mod_1s_4p)
-	lda	r30, -64(r30)
-	stq	r9, 8(r30)
-	ldq	B1modb, 16(r19)
-	stq	r10, 16(r30)
-	ldq	B2modb, 24(r19)
-	stq	r11, 24(r30)
-	ldq	B3modb, 32(r19)
-	stq	r12, 32(r30)
-	ldq	B4modb, 40(r19)
-	stq	r13, 40(r30)
-	ldq	B5modb, 48(r19)
-	s8addq	n, ap, ap		C point ap at vector end
-
-	and	n, 3, r0
-	lda	n, -4(n)
-	beq	r0, L(b0)
-	lda	r6, -2(r0)
-	blt	r6, L(b1)
-	beq	r6, L(b2)
-
-L(b3):	ldq	r21, -16(ap)
-	ldq	r22, -8(ap)
-	ldq	r20, -24(ap)
-	mulq	r21, B1modb, r8
-	umulh	r21, B1modb, r12
-	mulq	r22, B2modb, r9
-	umulh	r22, B2modb, r13
-	addq	r8, r20, pl
-	cmpult	pl, r8, r0
-	addq	r0, r12, ph
-	addq	r9, pl, rl
-	cmpult	rl, r9, r0
-	addq	r13, ph, ph
-	addq	r0, ph, rh
-	lda	ap, -56(ap)
-	br	L(com)
-
-L(b0):	ldq	r21, -24(ap)
-	ldq	r22, -16(ap)
-	ldq	r23, -8(ap)
-	ldq	r20, -32(ap)
-	mulq	r21, B1modb, r8
-	umulh	r21, B1modb, r12
-	mulq	r22, B2modb, r9
-	umulh	r22, B2modb, r13
-	mulq	r23, B3modb, r10
-	umulh	r23, B3modb, r27
-	addq	r8, r20, pl
-	cmpult	pl, r8, r0
-	addq	r0, r12, ph
-	addq	r9, pl, pl
-	cmpult	pl, r9, r0
-	addq	r13, ph, ph
-	addq	r0, ph, ph
-	addq	r10, pl, rl
-	cmpult	rl, r10, r0
-	addq	r27, ph, ph
-	addq	r0, ph, rh
-	lda	ap, -64(ap)
-	br	L(com)
-
-L(b1):	bis	r31, r31, rh
-	ldq	rl, -8(ap)
-	lda	ap, -40(ap)
-	br	L(com)
-
-L(b2):	ldq	rh, -8(ap)
-	ldq	rl, -16(ap)
-	lda	ap, -48(ap)
-
-L(com):	ble	n, L(ed3)
-	ldq	r21, 8(ap)
-	ldq	r22, 16(ap)
-	ldq	r23, 24(ap)
-	ldq	r20, 0(ap)
-	lda	n, -4(n)
-	lda	ap, -32(ap)
-	mulq	r21, B1modb, r8
-	umulh	r21, B1modb, r12
-	mulq	r22, B2modb, r9
-	umulh	r22, B2modb, r13
-	mulq	r23, B3modb, r10
-	umulh	r23, B3modb, r27
-	mulq	rl, B4modb, r11
-	umulh	rl, B4modb, r28
-	ble	n, L(ed2)
-
-	ALIGN(16)
-L(top):	ldq	r21, 8(ap)
-	mulq	rh, B5modb, rl
-	addq	r8, r20, pl
-	ldq	r22, 16(ap)
-	cmpult	pl, r8, r0
-	umulh	rh, B5modb, rh
-	ldq	r23, 24(ap)
-	addq	r0, r12, ph
-	addq	r9, pl, pl
-	mulq	r21, B1modb, r8
-	cmpult	pl, r9, r0
-	addq	r13, ph, ph
-	umulh	r21, B1modb, r12
-	lda	ap, -32(ap)
-	addq	r0, ph, ph
-	addq	r10, pl, pl
-	mulq	r22, B2modb, r9
-	cmpult	pl, r10, r0
-	addq	r27, ph, ph
-	addq	r11, pl, pl
-	umulh	r22, B2modb, r13
-	addq	r0, ph, ph
-	cmpult	pl, r11, r0
-	addq	r28, ph, ph
-	mulq	r23, B3modb, r10
-	ldq	r20, 32(ap)
-	addq	pl, rl, rl
-	umulh	r23, B3modb, r27
-	addq	r0, ph, ph
-	cmpult	rl, pl, r0
-	mulq	rl, B4modb, r11
-	addq	ph, rh, rh
-	umulh	rl, B4modb, r28
-	addq	r0, rh, rh
-	lda	n, -4(n)
-	bgt	n, L(top)
-
-L(ed2):	mulq	rh, B5modb, rl
-	addq	r8, r20, pl
-	umulh	rh, B5modb, rh
-	cmpult	pl, r8, r0
-	addq	r0, r12, ph
-	addq	r9, pl, pl
-	cmpult	pl, r9, r0
-	addq	r13, ph, ph
-	addq	r0, ph, ph
-	addq	r10, pl, pl
-	cmpult	pl, r10, r0
-	addq	r27, ph, ph
-	addq	r11, pl, pl
-	addq	r0, ph, ph
-	cmpult	pl, r11, r0
-	addq	r28, ph, ph
-	addq	pl, rl, rl
-	addq	r0, ph, ph
-	cmpult	rl, pl, r0
-	addq	ph, rh, rh
-	addq	r0, rh, rh
-
-L(ed3):	mulq	rh, B1modb, r8
-	umulh	rh, B1modb, rh
-	addq	r8, rl, rl
-	cmpult	rl, r8, r0
-	addq	r0, rh, rh
-
-	ldq	r24, 8(r19)		C cnt
-	sll	rh, r24, rh
-	subq	r31, r24, r25
-	srl	rl, r25, r2
-	sll	rl, r24, rl
-	or	r2, rh, rh
-
-	ldq	r23, 0(r19)		C bi
-	mulq	rh, r23, r8
-	umulh	rh, r23, r9
-	addq	rh, 1, r7
-	addq	r8, rl, r8		C ql
-	cmpult	r8, rl, r0
-	addq	r9, r7, r9
-	addq	r0, r9, r9		C qh
-	mulq	r9, r18, r21		C qh * b
-	subq	rl, r21, rl
-	cmpult	r8, rl, r0		C rl > ql
-	negq	r0, r0
-	and	r0, r18, r0
-	addq	rl, r0, rl
-	cmpule	r18, rl, r0		C rl >= b
-	negq	r0, r0
-	and	r0, r18, r0
-	subq	rl, r0, rl
-
-	srl	rl, r24, r0
-
-	ldq	r9, 8(r30)
-	ldq	r10, 16(r30)
-	ldq	r11, 24(r30)
-	ldq	r12, 32(r30)
-	ldq	r13, 40(r30)
-	lda	r30, 64(r30)
-	ret	r31, (r26), 1
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1s_4p_cps,gp)
-	lda	r30, -32(r30)
-	stq	r26, 0(r30)
-	stq	r9, 8(r30)
-	stq	r10, 16(r30)
-	stq	r11, 24(r30)
-	mov	r16, r11
-	LEA(	r4, __clz_tab)
-	lda	r10, 65(r31)
-	cmpbge	r31, r17, r1
-	srl	r1, 1, r1
-	xor	r1, 127, r1
-	addq	r1, r4, r1
-	ldq_u	r2, 0(r1)
-	extbl	r2, r1, r2
-	s8subq	r2, 7, r2
-	srl	r17, r2, r3
-	subq	r10, r2, r10
-	addq	r3, r4, r3
-	ldq_u	r1, 0(r3)
-	extbl	r1, r3, r1
-	subq	r10, r1, r10
-	sll	r17, r10, r9
-	mov	r9, r16
-	jsr	r26, mpn_invert_limb
-	ldah	r29, 0(r26)
-	subq	r31, r10, r2
-	lda	r1, 1(r31)
-	sll	r1, r10, r1
-	subq	r31, r9, r3
-	srl	r0, r2, r2
-	ldq	r26, 0(r30)
-	bis	r2, r1, r2
-	lda	r29, 0(r29)
-	stq	r0, 0(r11)
-	stq	r10, 8(r11)
-	mulq	r2, r3, r2
-	srl	r2, r10, r3
-	umulh	r2, r0, r1
-	stq	r3, 16(r11)
-	mulq	r2, r0, r3
-	ornot	r31, r1, r1
-	subq	r1, r2, r1
-	mulq	r1, r9, r1
-	addq	r1, r9, r2
-	cmpule	r1, r3, r3
-	cmoveq	r3, r2, r1
-	srl	r1, r10, r3
-	umulh	r1, r0, r2
-	stq	r3, 24(r11)
-	mulq	r1, r0, r3
-	ornot	r31, r2, r2
-	subq	r2, r1, r2
-	mulq	r2, r9, r2
-	addq	r2, r9, r1
-	cmpule	r2, r3, r3
-	cmoveq	r3, r1, r2
-	srl	r2, r10, r1
-	umulh	r2, r0, r3
-	stq	r1, 32(r11)
-	mulq	r2, r0, r1
-	ornot	r31, r3, r3
-	subq	r3, r2, r3
-	mulq	r3, r9, r3
-	addq	r3, r9, r2
-	cmpule	r3, r1, r1
-	cmoveq	r1, r2, r3
-	srl	r3, r10, r2
-	umulh	r3, r0, r1
-	stq	r2, 40(r11)
-	mulq	r3, r0, r0
-	ornot	r31, r1, r1
-	subq	r1, r3, r1
-	mulq	r1, r9, r1
-	addq	r1, r9, r9
-	cmpule	r1, r0, r0
-	cmoveq	r0, r9, r1
-	ldq	r9, 8(r30)
-	srl	r1, r10, r1
-	ldq	r10, 16(r30)
-	stq	r1, 48(r11)
-	ldq	r11, 24(r30)
-	lda	r30, 32(r30)
-	ret	r31, (r26), 1
-EPILOGUE()
diff --git a/gmp/mpn/alpha/ev6/mul_1.asm b/gmp/mpn/alpha/ev6/mul_1.asm
index 8ee19cd429..841f5083cb 100644
--- a/gmp/mpn/alpha/ev6/mul_1.asm
+++ b/gmp/mpn/alpha/ev6/mul_1.asm
@@ -4,30 +4,19 @@ dnl  result in a second limb vector.
 dnl  Copyright 2000, 2001, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -60,7 +49,7 @@ C   r20,r29,r13-r15  scramble
 C
 C   We're doing 7 of the 8 carry propagations with a br fixup code and 1 with a
 C   put-the-carry-into-hi.  The idea is that these branches are very rarely
-C   taken, and since a non-taken branch consumes no resources, that is better
+C   taken, and since a non-taken branch consumes no resurces, that is better
 C   than an addq.
 C
 C   Software pipeline: a load in cycle #09, feeds a mul in cycle #16, feeds an
@@ -137,7 +126,7 @@ $L_9_or_more:
 	mulq	r2,r19,r3	C r3 = prod_low
 	umulh	r2,r19,r21	C r21 = prod_high
 	beq	r20,$Le1b	C jump if size was == 1
-	bis	r31, r31, r0	C FIXME: shouldn't need this
+	bis	r31, r31, r0	C FIXME: shouldtn't need this
 	ldq	r2,0(r17)	C r2 = s1_limb
 	lda	r17,8(r17)	C s1_ptr++
 	lda	r20,-1(r20)	C size--
diff --git a/gmp/mpn/alpha/ev6/nails/README b/gmp/mpn/alpha/ev6/nails/README
index b214ac50ad..8b3b357a77 100644
--- a/gmp/mpn/alpha/ev6/nails/README
+++ b/gmp/mpn/alpha/ev6/nails/README
@@ -2,29 +2,18 @@ Copyright 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_1.asm b/gmp/mpn/alpha/ev6/nails/addmul_1.asm
index 711d4e66e5..149195c6f4 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_1.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_1.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_addmul_1.
 
 dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -36,7 +25,7 @@ C EV5:    18
 C EV6:     4
 
 C TODO
-C  * Reroll loop for 3.75 c/l with current 4-way unrolling.
+C  * Reroll loop for 3.75 c/l with current 4-way unrulling.
 C  * The loop is overscheduled wrt loads and wrt multiplies, in particular
 C    umulh.
 C  * Use FP loop count and multiple exit points, that would simplify feed-in lp0
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_2.asm b/gmp/mpn/alpha/ev6/nails/addmul_2.asm
index 6ff6b3ad6b..9edaed8b3a 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_2.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_2.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_addmul_2.
 
 dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_3.asm b/gmp/mpn/alpha/ev6/nails/addmul_3.asm
index a1ffb680ec..1d89769e13 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_3.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_3.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_addmul_3.
 
 dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_4.asm b/gmp/mpn/alpha/ev6/nails/addmul_4.asm
index 77e02a4316..f19b0232df 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_4.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_4.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_addmul_4.
 
 dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev6/nails/aors_n.asm b/gmp/mpn/alpha/ev6/nails/aors_n.asm
index f6586773f5..4958e81ed9 100644
--- a/gmp/mpn/alpha/ev6/nails/aors_n.asm
+++ b/gmp/mpn/alpha/ev6/nails/aors_n.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_add_n and mpn_sub_n.
 
 dnl  Copyright 2002, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Runs at 2.5 cycles/limb.  It would be possible to reach 2.0 cycles/limb
diff --git a/gmp/mpn/alpha/ev6/nails/gmp-mparam.h b/gmp/mpn/alpha/ev6/nails/gmp-mparam.h
index 7949fe8df8..1bc93b52c6 100644
--- a/gmp/mpn/alpha/ev6/nails/gmp-mparam.h
+++ b/gmp/mpn/alpha/ev6/nails/gmp-mparam.h
@@ -1,43 +1,33 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 /* Generated by tuneup.c, 2004-02-07, gcc 3.3 */
 
-#define MUL_TOOM22_THRESHOLD             40
-#define MUL_TOOM33_THRESHOLD            236
+#define MUL_KARATSUBA_THRESHOLD          40
+#define MUL_TOOM3_THRESHOLD             236
 
 #define SQR_BASECASE_THRESHOLD            7  /* karatsuba */
-#define SQR_TOOM2_THRESHOLD               0  /* never sqr_basecase */
+#define SQR_KARATSUBA_THRESHOLD           0  /* never sqr_basecase */
 #define SQR_TOOM3_THRESHOLD             120
 
 #define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* no preinv with nails */
diff --git a/gmp/mpn/alpha/ev6/nails/mul_1.asm b/gmp/mpn/alpha/ev6/nails/mul_1.asm
index da2ee3d099..cac3776ba0 100644
--- a/gmp/mpn/alpha/ev6/nails/mul_1.asm
+++ b/gmp/mpn/alpha/ev6/nails/mul_1.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_mul_1.
 
 dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -36,10 +25,10 @@ C EV5:    18
 C EV6:     3.25
 
 C TODO
-C  * Reroll loop for 3.0 c/l with current 4-way unrolling.
+C  * Reroll loop for 3.0 c/l with current 4-way unrulling.
 C  * The loop is overscheduled wrt loads and wrt multiplies, in particular
 C    umulh.
-C  * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C  * Use FP loop count and multiple exit points, that would simpily feed-in lp0
 C    and would work since the loop structure is really regular.
 
 C  INPUT PARAMETERS
diff --git a/gmp/mpn/alpha/ev6/nails/submul_1.asm b/gmp/mpn/alpha/ev6/nails/submul_1.asm
index f473a59ba8..4242517a4a 100644
--- a/gmp/mpn/alpha/ev6/nails/submul_1.asm
+++ b/gmp/mpn/alpha/ev6/nails/submul_1.asm
@@ -1,32 +1,21 @@
 dnl  Alpha ev6 nails mpn_submul_1.
 
 dnl  Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -36,10 +25,10 @@ C EV5:    18
 C EV6:     4
 
 C TODO
-C  * Reroll loop for 3.75 c/l with current 4-way unrolling.
+C  * Reroll loop for 3.75 c/l with current 4-way unrulling.
 C  * The loop is overscheduled wrt loads and wrt multiplies, in particular
 C    umulh.
-C  * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C  * Use FP loop count and multiple exit points, that would simpily feed-in lp0
 C    and would work since the loop structure is really regular.
 
 C  INPUT PARAMETERS
diff --git a/gmp/mpn/alpha/ev6/slot.pl b/gmp/mpn/alpha/ev6/slot.pl
index a4c8a36882..17967e79a2 100755..100644
--- a/gmp/mpn/alpha/ev6/slot.pl
+++ b/gmp/mpn/alpha/ev6/slot.pl
@@ -1,32 +1,21 @@
 #!/usr/bin/perl -w
 
-# Copyright 2000, 2001, 2003-2005, 2011 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage: slot.pl [filename.o]...
@@ -51,12 +40,9 @@ my %optable =
   (
    'addq'   => 'E',
    'and'    => 'E',
-   'andnot' => 'E',
    'beq'    => 'U',
    'bge'    => 'U',
    'bgt'    => 'U',
-   'bic'    => 'E',
-   'bis'    => 'E',
    'blt'    => 'U',
    'bne'    => 'U',
    'br'     => 'L',
@@ -85,7 +71,6 @@ my %optable =
    'ldt'    => 'L',
    'ret'    => 'L',
    'mov'    => 'E',
-   'mull'   => 'U',
    'mulq'   => 'U',
    'negq'   => 'E',
    'nop'    => 'E',
diff --git a/gmp/mpn/alpha/ev6/sqr_diagonal.asm b/gmp/mpn/alpha/ev6/sqr_diagonal.asm
new file mode 100644
index 0000000000..58d086e624
--- /dev/null
+++ b/gmp/mpn/alpha/ev6/sqr_diagonal.asm
@@ -0,0 +1,115 @@
+dnl  Alpha mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:      ?
+C EV5:      ?
+C EV6:      2.3
+
+C  INPUT PARAMETERS
+C  rp	r16
+C  up	r17
+C  n	r18
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+	lda	r18, -2(r18)	C n -= 2
+	ldq	r0,   0(r17)
+	mulq	r0, r0, r4
+	umulh	r0, r0, r20
+	blt	r18, L(ex1)
+	ldq	r1,   8(r17)
+	mulq	r1, r1, r5
+	umulh	r1, r1, r21
+	beq	r18, L(ex2)
+	lda	r18, -2(r18)	C n -= 2
+	ldq	r0,  16(r17)
+	blt	r18, L(ex3)
+	ldq	r1,  24(r17)
+	beq	r18, L(ex4)
+
+	ALIGN(16)
+L(top):	lda	r18, -2(r18)	C n -= 2
+	stq	r4,   0(r16)
+	mulq	r0, r0, r4
+	stq	r20,  8(r16)
+	umulh	r0, r0, r20
+	ldq	r0,  32(r17)
+	blt	r18, L(x)
+	stq	r5,  16(r16)
+	mulq	r1, r1, r5
+	stq	r21, 24(r16)
+	umulh	r1, r1, r21
+	ldq	r1,  40(r17)
+	lda	r16, 32(r16)	C rp += 4
+	lda	r17, 16(r17)	C up += 2
+	bne	r18, L(top)
+
+	ALIGN(16)
+L(ex4):	stq	r4,   0(r16)
+	mulq	r0, r0, r4
+	stq	r20,  8(r16)
+	umulh	r0, r0, r20
+	stq	r5,  16(r16)
+	mulq	r1, r1, r5
+	stq	r21, 24(r16)
+	umulh	r1, r1, r21
+	stq	r4,  32(r16)
+	stq	r20, 40(r16)
+	stq	r5,  48(r16)
+	stq	r21, 56(r16)
+	ret	r31, (r26), 1
+	ALIGN(16)
+L(x):	stq	r5,  16(r16)
+	mulq	r1, r1, r5
+	stq	r21, 24(r16)
+	umulh	r1, r1, r21
+	stq	r4,  32(r16)
+	mulq	r0, r0, r4
+	stq	r20, 40(r16)
+	umulh	r0, r0, r20
+	stq	r5,  48(r16)
+	stq	r21, 56(r16)
+	stq	r4,  64(r16)
+	stq	r20, 72(r16)
+	ret	r31, (r26), 1
+L(ex1):	stq	r4,   0(r16)
+	stq	r20,  8(r16)
+	ret	r31, (r26), 1
+	ALIGN(16)
+L(ex2):	stq	r4,   0(r16)
+	stq	r20,  8(r16)
+	stq	r5,  16(r16)
+	stq	r21, 24(r16)
+	ret	r31, (r26), 1
+	ALIGN(16)
+L(ex3):	stq	r4,   0(r16)
+	mulq	r0, r0, r4
+	stq	r20,  8(r16)
+	umulh	r0, r0, r20
+	stq	r5,  16(r16)
+	stq	r21, 24(r16)
+	stq	r4,  32(r16)
+	stq	r20, 40(r16)
+	ret	r31, (r26), 1
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/alpha/ev6/sub_n.asm b/gmp/mpn/alpha/ev6/sub_n.asm
index a35ba40d34..f23ad44a15 100644
--- a/gmp/mpn/alpha/ev6/sub_n.asm
+++ b/gmp/mpn/alpha/ev6/sub_n.asm
@@ -4,30 +4,19 @@ dnl  and store difference in a third limb vector.
 dnl  Copyright 2000, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev67/gcd_1.asm b/gmp/mpn/alpha/ev67/gcd_1.asm
index 55fa7d3673..2e6f0a5e22 100644
--- a/gmp/mpn/alpha/ev67/gcd_1.asm
+++ b/gmp/mpn/alpha/ev67/gcd_1.asm
@@ -4,29 +4,18 @@ dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev67/hamdist.asm b/gmp/mpn/alpha/ev67/hamdist.asm
index 4b13e9f14f..a72d95e90b 100644
--- a/gmp/mpn/alpha/ev67/hamdist.asm
+++ b/gmp/mpn/alpha/ev67/hamdist.asm
@@ -4,29 +4,18 @@ dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/ev67/popcount.asm b/gmp/mpn/alpha/ev67/popcount.asm
index 049c1cd239..6ed79cf158 100644
--- a/gmp/mpn/alpha/ev67/popcount.asm
+++ b/gmp/mpn/alpha/ev67/popcount.asm
@@ -4,29 +4,18 @@ dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
 dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/gmp-mparam.h b/gmp/mpn/alpha/gmp-mparam.h
index b850bd24b5..6b6d7bd9c8 100644
--- a/gmp/mpn/alpha/gmp-mparam.h
+++ b/gmp/mpn/alpha/gmp-mparam.h
@@ -1,54 +1,43 @@
 /* Alpha EV4 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2009 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 
 /* 175MHz 21064 */
 
 /* Generated by tuneup.c, 2009-01-15, gcc 3.2 */
 
-#define MUL_TOOM22_THRESHOLD             12
-#define MUL_TOOM33_THRESHOLD             69
+#define MUL_KARATSUBA_THRESHOLD          12
+#define MUL_TOOM3_THRESHOLD              69
 #define MUL_TOOM44_THRESHOLD             88
 
 #define SQR_BASECASE_THRESHOLD            4
-#define SQR_TOOM2_THRESHOLD              20
+#define SQR_KARATSUBA_THRESHOLD          20
 #define SQR_TOOM3_THRESHOLD              62
 #define SQR_TOOM4_THRESHOLD             155
 
-#define MULLO_BASECASE_THRESHOLD          0  /* always */
-#define MULLO_DC_THRESHOLD               40
-#define MULLO_MUL_N_THRESHOLD           202
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              40
+#define MULLOW_MUL_N_THRESHOLD          202
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
 #define DIV_DC_THRESHOLD                 38
diff --git a/gmp/mpn/alpha/invert_limb.asm b/gmp/mpn/alpha/invert_limb.asm
index afc010f58c..99f51a30d5 100644
--- a/gmp/mpn/alpha/invert_limb.asm
+++ b/gmp/mpn/alpha/invert_limb.asm
@@ -1,95 +1,342 @@
 dnl  Alpha mpn_invert_limb -- Invert a normalized limb.
 
-dnl  Copyright 1996, 2000-2003, 2007, 2011, 2013 Free Software Foundation, Inc.
-
+dnl  Copyright 1996, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:     ?
-C EV5:   137/140  (with BWX/without BWX)
-C EV6:    71/72   (with BWX/without BWX)
+C EV4:    ~175
+C EV5:    ~111-126
+C EV6:    ~52-76
 
-C This was compiler generated, with minimal manual edits.  Surely several
-C cycles could be cut with some thought.
+C  This is based on ideas of Peter L. Montgomery.
 
 ASM_START()
+
+FLOAT64($C36,9223372036854775808.0)		C 2^63
+
 PROLOGUE(mpn_invert_limb,gp)
-	LEA(	r2, approx_tab)
-	srl	r16, 54, r1
-	srl	r16, 24, r4
-	and	r16, 1, r5
-	bic	r1, 1, r7
-	lda	r4, 1(r4)
-	srl	r16, 1, r3
-	addq	r7, r2, r1
-ifelse(bwx_available_p,1,`
-	ldwu	r0, -512(r1)
-',`
-	ldq_u	r0, -512(r1)
-	extwl	r0, r7, r0
-')
-	addq	r3, r5, r3
-	mull	r0, r0, r1
-	sll	r0, 11, r0
-	mulq	r1, r4, r1
-	srl	r1, 40, r1
-	subq	r0, r1, r0
-	lda	r0, -1(r0)
-	mulq	r0, r0, r2
-	sll	r0, 60, r1
-	sll	r0, 13, r0
-	mulq	r2, r4, r2
-	subq	r1, r2, r1
-	srl	r1, 47, r1
-	addq	r0, r1, r0
-	mulq	r0, r3, r3
-	srl	r0, 1, r1
-	cmoveq	r5, 0, r1
-	subq	r1, r3, r1
-	umulh	r1, r0, r3
-	sll	r0, 31, r0
-	srl	r3, 1, r1
-	addq	r0, r1, r0
-	mulq	r0, r16, r2
-	umulh	r0, r16, r3
-	addq	r2, r16, r1
-	addq	r3, r16, r16
-	cmpult	r1, r2, r1
-	addq	r16, r1, r3
-	subq	r0, r3, r0
-	ret	r31, (r26), 1
-EPILOGUE()
-DATASTART(approx_tab,8)
-forloop(i,256,512-1,dnl
-`	.word	eval(0x7fd00/i)
-')dnl
-	SIZE(approx_tab, 512)
-	TYPE(approx_tab, object)
+	lda	r30,-16(r30)
+	addq	r16,r16,r1
+	bne	r1,$73
+	lda	r0,-1
+	br	r31,$Lend
+$73:
+	srl	r16,1,r1
+	stq	r1,0(r30)
+	ldt	f11,0(r30)
+	cvtqt	f11,f1
+	LEA(r1,$C36)
+	ldt	f10,0(r1)		C f10 = 2^63
+	divt	f10,f1,f10		C f10 = 2^63 / (u / 2)
+	LEA(r2,$invtab-4096)
+	srl	r16,52,r1		C extract high 12 bits
+	addq	r1,r1,r1		C align ...0000bbbbbbbb0
+	addq	r1,r2,r1		C compute array offset
+	ldq_u	r2,0(r1)		C load quadword containing our 16 bits
+bigend(`addq	r1,1,r1')
+	extwl	r2,r1,r2		C extract desired 16 bits
+	sll	r2,48,r0
+	umulh	r16,r0,r1
+	addq	r16,r1,r3
+	stq	r3,0(r30)
+	ldt	f11,0(r30)
+	cvtqt	f11,f1
+	mult	f1,f10,f1
+	cvttqc	f1,f1
+	stt	f1,0(r30)
+	ldq	r4,0(r30)
+	subq	r0,r4,r0
+	umulh	r16,r0,r1
+	mulq	r16,r0,r2
+	addq	r16,r1,r3
+	bge	r3,$Loop2
+$Loop1:	addq	r2,r16,r2
+	cmpult	r2,r16,r1
+	addq	r3,r1,r3
+	addq	r0,1,r0
+	blt	r3,$Loop1
+$Loop2:	cmpult	r2,r16,r1
+	subq	r0,1,r0
+	subq	r3,r1,r3
+	subq	r2,r16,r2
+	bge	r3,$Loop2
+$Lend:
+	lda	r30,16(r30)
+	ret	r31,(r26),1
+EPILOGUE(mpn_invert_limb)
+DATASTART($invtab)
+	.word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
+	.word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
+	.word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
+	.word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
+	.word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
+	.word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
+	.word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
+	.word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
+	.word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
+	.word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
+	.word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
+	.word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
+	.word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
+	.word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
+	.word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
+	.word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
+	.word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
+	.word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
+	.word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
+	.word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
+	.word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
+	.word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
+	.word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
+	.word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
+	.word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
+	.word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
+	.word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
+	.word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
+	.word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
+	.word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
+	.word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
+	.word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
+	.word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
+	.word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
+	.word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
+	.word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
+	.word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
+	.word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
+	.word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
+	.word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
+	.word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
+	.word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
+	.word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
+	.word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
+	.word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
+	.word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
+	.word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
+	.word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
+	.word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
+	.word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
+	.word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
+	.word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
+	.word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
+	.word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
+	.word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
+	.word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
+	.word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
+	.word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
+	.word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
+	.word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
+	.word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
+	.word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
+	.word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
+	.word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
+	.word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
+	.word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
+	.word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
+	.word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
+	.word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
+	.word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
+	.word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
+	.word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
+	.word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
+	.word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
+	.word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
+	.word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
+	.word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
+	.word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
+	.word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
+	.word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
+	.word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
+	.word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
+	.word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
+	.word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
+	.word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
+	.word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
+	.word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
+	.word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
+	.word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
+	.word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
+	.word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
+	.word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
+	.word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
+	.word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
+	.word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
+	.word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
+	.word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
+	.word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
+	.word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
+	.word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
+	.word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
+	.word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
+	.word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
+	.word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
+	.word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
+	.word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
+	.word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
+	.word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
+	.word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
+	.word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
+	.word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
+	.word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
+	.word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
+	.word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
+	.word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
+	.word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
+	.word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
+	.word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
+	.word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
+	.word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
+	.word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
+	.word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
+	.word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
+	.word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
+	.word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
+	.word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
+	.word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
+	.word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
+	.word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
+	.word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
+	.word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
+	.word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
+	.word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
+	.word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
+	.word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
+	.word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
+	.word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
+	.word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
+	.word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
+	.word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
+	.word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
+	.word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
+	.word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
+	.word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
+	.word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
+	.word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
+	.word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
+	.word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
+	.word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
+	.word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
+	.word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
+	.word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
+	.word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
+	.word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
+	.word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
+	.word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
+	.word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
+	.word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
+	.word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
+	.word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
+	.word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
+	.word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
+	.word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
+	.word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
+	.word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
+	.word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
+	.word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
+	.word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
+	.word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
+	.word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
+	.word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
+	.word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
+	.word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
+	.word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
+	.word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
+	.word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
+	.word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
+	.word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
+	.word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
+	.word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
+	.word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
+	.word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
+	.word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
+	.word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
+	.word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
+	.word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
+	.word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
+	.word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
+	.word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
+	.word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
+	.word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
+	.word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
+	.word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
+	.word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
+	.word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
+	.word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
+	.word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
+	.word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
+	.word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
+	.word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
+	.word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
+	.word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
+	.word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
+	.word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
+	.word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
+	.word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
+	.word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
+	.word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
+	.word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
+	.word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
+	.word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
+	.word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
+	.word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
+	.word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
+	.word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
+	.word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
+	.word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
+	.word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
+	.word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
+	.word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
+	.word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
+	.word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
+	.word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
+	.word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
+	.word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
+	.word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
+	.word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
+	.word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
+	.word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
+	.word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
+	.word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
+	.word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
+	.word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
+	.word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
+	.word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
+	.word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
+	.word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
+	.word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
+	.word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
+	.word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
+	.word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
+	.word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
+	.word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
+	.word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
+	.word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
+	.word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
+	.word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
+	.word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
+	.word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
+	.word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
+	.word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
+	.word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
+	.word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
+	.word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
+	.word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
+	.word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
 DATAEND()
 ASM_END()
diff --git a/gmp/mpn/alpha/lshift.asm b/gmp/mpn/alpha/lshift.asm
index c62a856aea..eb5b2a0b68 100644
--- a/gmp/mpn/alpha/lshift.asm
+++ b/gmp/mpn/alpha/lshift.asm
@@ -1,39 +1,28 @@
 dnl  Alpha mpn_lshift -- Shift a number left.
 
-dnl  Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
+dnl  Copyright 1994, 1995, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:     ?
-C EV5:     3.25
-C EV6:     1.75
+C EV4:     4.75
+C EV5:     4
+C EV6:     2
 
 C  INPUT PARAMETERS
 C  rp	r16
@@ -46,137 +35,63 @@ ASM_START()
 PROLOGUE(mpn_lshift)
 	s8addq	r18,r17,r17	C make r17 point at end of s1
 	ldq	r4,-8(r17)	C load first limb
-	subq	r31,r19,r20
+	subq	r17,8,r17
+	subq	r31,r19,r7
 	s8addq	r18,r16,r16	C make r16 point at end of RES
 	subq	r18,1,r18
-	and	r18,4-1,r28	C number of limbs in first loop
-	srl	r4,r20,r0	C compute function result
+	and	r18,4-1,r20	C number of limbs in first loop
+	srl	r4,r7,r0	C compute function result
 
-	beq	r28,L(L0)
-	subq	r18,r28,r18
+	beq	r20,$L0
+	subq	r18,r20,r18
 
 	ALIGN(8)
-L(top0):
-	ldq	r3,-16(r17)
+$Loop0:	ldq	r3,-8(r17)
 	subq	r16,8,r16
-	sll	r4,r19,r5
 	subq	r17,8,r17
-	subq	r28,1,r28
-	srl	r3,r20,r6
+	subq	r20,1,r20
+	sll	r4,r19,r5
+	srl	r3,r7,r6
 	bis	r3,r3,r4
 	bis	r5,r6,r8
 	stq	r8,0(r16)
-	bne	r28,L(top0)
+	bne	r20,$Loop0
 
-L(L0):	sll	r4,r19,r24
-	beq	r18,L(end)
-C warm up phase 1
-	ldq	r1,-16(r17)
-	subq	r18,4,r18
-	ldq	r2,-24(r17)
-	ldq	r3,-32(r17)
-	ldq	r4,-40(r17)
-C warm up phase 2
-	srl	r1,r20,r7
-	sll	r1,r19,r21
-	srl	r2,r20,r8
-	beq	r18,L(end1)
-	ldq	r1,-48(r17)
-	sll	r2,r19,r22
-	ldq	r2,-56(r17)
-	srl	r3,r20,r5
-	bis	r7,r24,r7
-	sll	r3,r19,r23
-	bis	r8,r21,r8
-	srl	r4,r20,r6
-	ldq	r3,-64(r17)
-	sll	r4,r19,r24
-	ldq	r4,-72(r17)
-	subq	r18,4,r18
-	beq	r18,L(end2)
-	ALIGN(16)
-C main loop
-L(top):	stq	r7,-8(r16)
-	bis	r5,r22,r5
-	stq	r8,-16(r16)
-	bis	r6,r23,r6
-
-	srl	r1,r20,r7
-	subq	r18,4,r18
-	sll	r1,r19,r21
-	unop	C ldq	r31,-96(r17)
-
-	srl	r2,r20,r8
-	ldq	r1,-80(r17)
-	sll	r2,r19,r22
-	ldq	r2,-88(r17)
-
-	stq	r5,-24(r16)
-	bis	r7,r24,r7
-	stq	r6,-32(r16)
-	bis	r8,r21,r8
-
-	srl	r3,r20,r5
-	unop	C ldq	r31,-96(r17)
-	sll	r3,r19,r23
+$L0:	beq	r18,$Lend
+
+	ALIGN(8)
+$Loop:	ldq	r3,-8(r17)
 	subq	r16,32,r16
+	subq	r18,4,r18
+	sll	r4,r19,r5
+	srl	r3,r7,r6
+
+	ldq	r4,-16(r17)
+	sll	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,24(r16)
+	srl	r4,r7,r2
+
+	ldq	r3,-24(r17)
+	sll	r4,r19,r5
+	bis	r1,r2,r8
+	stq	r8,16(r16)
+	srl	r3,r7,r6
 
-	srl	r4,r20,r6
-	ldq	r3,-96(r17)
-	sll	r4,r19,r24
-	ldq	r4,-104(r17)
+	ldq	r4,-32(r17)
+	sll	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,8(r16)
+	srl	r4,r7,r2
 
 	subq	r17,32,r17
-	bne	r18,L(top)
-C cool down phase 2/1
-L(end2):
-	stq	r7,-8(r16)
-	bis	r5,r22,r5
-	stq	r8,-16(r16)
-	bis	r6,r23,r6
-	srl	r1,r20,r7
-	sll	r1,r19,r21
-	srl	r2,r20,r8
-	sll	r2,r19,r22
-	stq	r5,-24(r16)
-	bis	r7,r24,r7
-	stq	r6,-32(r16)
-	bis	r8,r21,r8
-	srl	r3,r20,r5
-	sll	r3,r19,r23
-	srl	r4,r20,r6
-	sll	r4,r19,r24
-C cool down phase 2/2
-	stq	r7,-40(r16)
-	bis	r5,r22,r5
-	stq	r8,-48(r16)
-	bis	r6,r23,r6
-	stq	r5,-56(r16)
-	stq	r6,-64(r16)
-C cool down phase 2/3
-	stq	r24,-72(r16)
-	ret	r31,(r26),1
+	bis	r1,r2,r8
+	stq	r8,0(r16)
 
-C cool down phase 1/1
-L(end1):
-	sll	r2,r19,r22
-	srl	r3,r20,r5
-	bis	r7,r24,r7
-	sll	r3,r19,r23
-	bis	r8,r21,r8
-	srl	r4,r20,r6
-	sll	r4,r19,r24
-C cool down phase 1/2
-	stq	r7,-8(r16)
-	bis	r5,r22,r5
-	stq	r8,-16(r16)
-	bis	r6,r23,r6
-	stq	r5,-24(r16)
-	stq	r6,-32(r16)
-	stq	r24,-40(r16)
-	ret	r31,(r26),1
+	bgt	r18,$Loop
 
-L(end):	stq	r24,-8(r16)
+$Lend:	sll	r4,r19,r8
+	stq	r8,-8(r16)
 	ret	r31,(r26),1
 EPILOGUE(mpn_lshift)
 ASM_END()
diff --git a/gmp/mpn/alpha/mod_34lsub1.asm b/gmp/mpn/alpha/mod_34lsub1.asm
index 1b03b637d8..e5c1d221f9 100644
--- a/gmp/mpn/alpha/mod_34lsub1.asm
+++ b/gmp/mpn/alpha/mod_34lsub1.asm
@@ -3,30 +3,19 @@ dnl Alpha mpn_mod_34lsub1.
 dnl  Copyright 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/mode1o.asm b/gmp/mpn/alpha/mode1o.asm
index 96dccc73ee..0611cd8acb 100644
--- a/gmp/mpn/alpha/mode1o.asm
+++ b/gmp/mpn/alpha/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  Alpha mpn_modexact_1c_odd -- mpn exact remainder
 
 dnl  Copyright 2003, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/mul_1.asm b/gmp/mpn/alpha/mul_1.asm
index a7cdbcf8eb..30b17021ba 100644
--- a/gmp/mpn/alpha/mul_1.asm
+++ b/gmp/mpn/alpha/mul_1.asm
@@ -4,30 +4,19 @@ dnl  the result in a second limb vector.
 dnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/rshift.asm b/gmp/mpn/alpha/rshift.asm
index 6e1e214558..ccedff8071 100644
--- a/gmp/mpn/alpha/rshift.asm
+++ b/gmp/mpn/alpha/rshift.asm
@@ -1,39 +1,28 @@
 dnl  Alpha mpn_rshift -- Shift a number right.
 
-dnl  Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
+dnl  Copyright 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:     ?
-C EV5:     3.25
-C EV6:     1.75
+C EV4:     4.75
+C EV5:     3.75
+C EV6:     2
 
 C  INPUT PARAMETERS
 C  rp	r16
@@ -45,136 +34,62 @@ C  cnt	r19
 ASM_START()
 PROLOGUE(mpn_rshift)
 	ldq	r4,0(r17)	C load first limb
-	subq	r31,r19,r20
+	addq	r17,8,r17
+	subq	r31,r19,r7
 	subq	r18,1,r18
-	and	r18,4-1,r28	C number of limbs in first loop
-	sll	r4,r20,r0	C compute function result
+	and	r18,4-1,r20	C number of limbs in first loop
+	sll	r4,r7,r0	C compute function result
 
-	beq	r28,L(L0)
-	subq	r18,r28,r18
+	beq	r20,$L0
+	subq	r18,r20,r18
 
 	ALIGN(8)
-L(top0):
-	ldq	r3,8(r17)
+$Loop0:	ldq	r3,0(r17)
 	addq	r16,8,r16
-	srl	r4,r19,r5
 	addq	r17,8,r17
-	subq	r28,1,r28
-	sll	r3,r20,r6
+	subq	r20,1,r20
+	srl	r4,r19,r5
+	sll	r3,r7,r6
 	bis	r3,r3,r4
 	bis	r5,r6,r8
 	stq	r8,-8(r16)
-	bne	r28,L(top0)
+	bne	r20,$Loop0
 
-L(L0):	srl	r4,r19,r24
-	beq	r18,L(end)
-C warm up phase 1
-	ldq	r1,8(r17)
-	subq	r18,4,r18
-	ldq	r2,16(r17)
-	ldq	r3,24(r17)
-	ldq	r4,32(r17)
-C warm up phase 2
-	sll	r1,r20,r7
-	srl	r1,r19,r21
-	sll	r2,r20,r8
-	beq	r18,L(end1)
-	ldq	r1,40(r17)
-	srl	r2,r19,r22
-	ldq	r2,48(r17)
-	sll	r3,r20,r5
-	bis	r7,r24,r7
-	srl	r3,r19,r23
-	bis	r8,r21,r8
-	sll	r4,r20,r6
-	ldq	r3,56(r17)
-	srl	r4,r19,r24
-	ldq	r4,64(r17)
-	subq	r18,4,r18
-	beq	r18,L(end2)
-	ALIGN(16)
-C main loop
-L(top):	stq	r7,0(r16)
-	bis	r5,r22,r5
-	stq	r8,8(r16)
-	bis	r6,r23,r6
-
-	sll	r1,r20,r7
-	subq	r18,4,r18
-	srl	r1,r19,r21
-	unop	C ldq	r31,-96(r17)
-
-	sll	r2,r20,r8
-	ldq	r1,72(r17)
-	srl	r2,r19,r22
-	ldq	r2,80(r17)
-
-	stq	r5,16(r16)
-	bis	r7,r24,r7
-	stq	r6,24(r16)
-	bis	r8,r21,r8
-
-	sll	r3,r20,r5
-	unop	C ldq	r31,-96(r17)
-	srl	r3,r19,r23
+$L0:	beq	r18,$Lend
+
+	ALIGN(8)
+$Loop:	ldq	r3,0(r17)
 	addq	r16,32,r16
+	subq	r18,4,r18
+	srl	r4,r19,r5
+	sll	r3,r7,r6
+
+	ldq	r4,8(r17)
+	srl	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,-32(r16)
+	sll	r4,r7,r2
+
+	ldq	r3,16(r17)
+	srl	r4,r19,r5
+	bis	r1,r2,r8
+	stq	r8,-24(r16)
+	sll	r3,r7,r6
 
-	sll	r4,r20,r6
-	ldq	r3,88(r17)
-	srl	r4,r19,r24
-	ldq	r4,96(r17)
+	ldq	r4,24(r17)
+	srl	r3,r19,r1
+	bis	r5,r6,r8
+	stq	r8,-16(r16)
+	sll	r4,r7,r2
 
 	addq	r17,32,r17
-	bne	r18,L(top)
-C cool down phase 2/1
-L(end2):
-	stq	r7,0(r16)
-	bis	r5,r22,r5
-	stq	r8,8(r16)
-	bis	r6,r23,r6
-	sll	r1,r20,r7
-	srl	r1,r19,r21
-	sll	r2,r20,r8
-	srl	r2,r19,r22
-	stq	r5,16(r16)
-	bis	r7,r24,r7
-	stq	r6,24(r16)
-	bis	r8,r21,r8
-	sll	r3,r20,r5
-	srl	r3,r19,r23
-	sll	r4,r20,r6
-	srl	r4,r19,r24
-C cool down phase 2/2
-	stq	r7,32(r16)
-	bis	r5,r22,r5
-	stq	r8,40(r16)
-	bis	r6,r23,r6
-	stq	r5,48(r16)
-	stq	r6,56(r16)
-C cool down phase 2/3
-	stq	r24,64(r16)
-	ret	r31,(r26),1
+	bis	r1,r2,r8
+	stq	r8,-8(r16)
 
-C cool down phase 1/1
-L(end1):
-	srl	r2,r19,r22
-	sll	r3,r20,r5
-	bis	r7,r24,r7
-	srl	r3,r19,r23
-	bis	r8,r21,r8
-	sll	r4,r20,r6
-	srl	r4,r19,r24
-C cool down phase 1/2
-	stq	r7,0(r16)
-	bis	r5,r22,r5
-	stq	r8,8(r16)
-	bis	r6,r23,r6
-	stq	r5,16(r16)
-	stq	r6,24(r16)
-	stq	r24,32(r16)
-	ret	r31,(r26),1
+	bgt	r18,$Loop
 
-L(end):	stq	r24,0(r16)
+$Lend:	srl	r4,r19,r8
+	stq	r8,0(r16)
 	ret	r31,(r26),1
 EPILOGUE(mpn_rshift)
 ASM_END()
diff --git a/gmp/mpn/alpha/sec_tabselect.asm b/gmp/mpn/alpha/sec_tabselect.asm
deleted file mode 100644
index 679b16926e..0000000000
--- a/gmp/mpn/alpha/sec_tabselect.asm
+++ /dev/null
@@ -1,137 +0,0 @@
-dnl  Alpha mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C      cycles/limb
-C EV4:      ?
-C EV5:      2.25
-C EV6:      1.64
-
-define(`rp',     `r16')
-define(`tp',     `r17')
-define(`n',      `r18')
-define(`nents',  `r19')
-define(`which',  `r20')
-
-define(`i',      `r21')
-define(`j',      `r22')
-define(`stride', `r23')
-define(`mask',   `r24')
-define(`k',      `r25')
-
-
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
-	subq	n, 4, j			C outer loop induction variable
-
-	blt	j, L(outer_end)
-L(outer_top):
-	mov	tp, r8
-	lda	r0, 0(r31)
-	lda	r1, 0(r31)
-	lda	r2, 0(r31)
-	lda	r3, 0(r31)
-	subq	j, 4, j			C outer loop induction variable
-	subq	nents, which, k
-	mov	nents, i
-
-	ALIGN(16)
-L(top):	ldq	r4, 0(tp)
-	ldq	r5, 8(tp)
-	cmpeq	k, i, mask
-	subq	i, 1, i
-	subq	r31, mask, mask
-	ldq	r6, 16(tp)
-	ldq	r7, 24(tp)
-	and	r4, mask, r4
-	and	r5, mask, r5
-	or	r0, r4, r0
-	or	r1, r5, r1
-	and	r6, mask, r6
-	and	r7, mask, r7
-	or	r2, r6, r2
-	or	r3, r7, r3
-	s8addq	n, tp, tp
-	bne	i, L(top)
-
-	stq	r0, 0(rp)
-	stq	r1, 8(rp)
-	stq	r2, 16(rp)
-	stq	r3, 24(rp)
-	addq	r8, 32, tp
-	addq	rp, 32, rp
-	bge	j, L(outer_top)
-L(outer_end):
-
-	and	n, 2, r0
-	beq	r0, L(b0x)
-L(b1x):	mov	tp, r8
-	lda	r0, 0(r31)
-	lda	r1, 0(r31)
-	subq	nents, which, k
-	mov	nents, i
-	ALIGN(16)
-L(tp2):	ldq	r4, 0(tp)
-	ldq	r5, 8(tp)
-	cmpeq	k, i, mask
-	subq	i, 1, i
-	subq	r31, mask, mask
-	and	r4, mask, r4
-	and	r5, mask, r5
-	or	r0, r4, r0
-	or	r1, r5, r1
-	s8addq	n, tp, tp
-	bne	i, L(tp2)
-	stq	r0, 0(rp)
-	stq	r1, 8(rp)
-	addq	r8, 16, tp
-	addq	rp, 16, rp
-
-L(b0x):	and	n, 1, r0
-	beq	r0, L(b00)
-L(b01):	lda	r0, 0(r31)
-	subq	nents, which, k
-	mov	nents, i
-	ALIGN(16)
-L(tp1):	ldq	r4, 0(tp)
-	cmpeq	k, i, mask
-	subq	i, 1, i
-	subq	r31, mask, mask
-	and	r4, mask, r4
-	or	r0, r4, r0
-	s8addq	n, tp, tp
-	bne	i, L(tp1)
-	stq	r0, 0(rp)
-
-L(b00):	ret	r31, (r26), 1
-EPILOGUE()
diff --git a/gmp/mpn/alpha/sqr_diag_addlsh1.asm b/gmp/mpn/alpha/sqr_diag_addlsh1.asm
deleted file mode 100644
index ee219ef7e8..0000000000
--- a/gmp/mpn/alpha/sqr_diag_addlsh1.asm
+++ /dev/null
@@ -1,93 +0,0 @@
-dnl  Alpha mpn_sqr_diag_addlsh1.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C      cycles/limb
-C EV4:      ?
-C EV5:     10.2
-C EV6:      4.5
-
-C Ideally, one-way code could run at 9 c/l (limited by mulq+umulh) on ev5 and
-C about 3.75 c/l on ev6.  Two-way code could run at about 3.25 c/l on ev6.
-
-C Algorithm: We allow ourselves to propagate carry to a product high word
-C without worrying for carry out, since (B-1)^2 = B^2-2B+1 has a high word of
-C B-2, i.e, will not spill.  We propagate carry similarly to a product low word
-C since the problem value B-1 is a quadratic non-residue mod B, but our
-C products are squares.
-
-define(`rp',	`r16')
-define(`tp',	`r17')
-define(`up',	`r18')
-define(`n',	`r19')
-
-ASM_START()
-PROLOGUE(mpn_sqr_diag_addlsh1)
-	ldq	r0, 0(up)
-	bis	r31, r31, r21
-	bis	r31, r31, r3
-	mulq	r0, r0, r7
-	stq	r7, 0(rp)
-	umulh	r0, r0, r6
-	lda	n, -1(n)
-
-	ALIGN(16)
-L(top):	ldq	r0, 8(up)
-	lda	up, 8(up)
-	ldq	r8, 0(tp)
-	ldq	r20, 8(tp)
-	mulq	r0, r0, r7
-	lda	tp, 16(tp)
-	sll	r8, 1, r23
-	srl	r8, 63, r22
-	or	r21, r23, r23
-	sll	r20, 1, r24
-	addq	r3, r6, r6		C cannot carry per comment above
-	or	r22, r24, r24
-	addq	r23, r6, r21
-	umulh	r0, r0, r6
-	cmpult	r21, r23, r1
-	addq	r1, r7, r7		C cannot carry per comment above
-	stq	r21, 8(rp)
-	addq	r24, r7, r22
-	stq	r22, 16(rp)
-	lda	n, -1(n)
-	cmpult	r22, r7, r3
-	srl	r20, 63, r21
-	lda	rp, 16(rp)
-	bne	n, L(top)
-
-	addq	r3, r6, r6		C cannot carry per comment above
-	addq	r21, r6, r21
-	stq	r21, 8(rp)
-	ret	r31, (r26), 1
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/alpha/sqr_diagonal.asm b/gmp/mpn/alpha/sqr_diagonal.asm
new file mode 100644
index 0000000000..2aa7f2e597
--- /dev/null
+++ b/gmp/mpn/alpha/sqr_diagonal.asm
@@ -0,0 +1,65 @@
+dnl  Alpha mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C      cycles/limb
+C EV4:     42
+C EV5:     18
+C EV6:      3.45
+
+C  INPUT PARAMETERS
+C  rp	r16
+C  up	r17
+C  n	r18
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+	ldq	r2,0(r17)	C r2 = s1_limb
+	lda	r18,-2(r18)	C size -= 2
+	mulq	r2,r2,r3	C r3 = prod_low
+	umulh	r2,r2,r4	C r4 = prod_high
+	blt	r18,$Lend1	C jump if size was == 1
+	ldq	r2,8(r17)	C r2 = s1_limb
+	beq	r18,$Lend2	C jump if size was == 2
+
+	ALIGN(8)
+$Loop:	stq	r3,0(r16)
+	mulq	r2,r2,r3	C r3 = prod_low
+	lda	r18,-1(r18)	C size--
+	stq	r4,8(r16)
+	umulh	r2,r2,r4	C r4 = cy_limb
+	ldq	r2,16(r17)	C r2 = s1_limb
+	lda	r17,8(r17)	C s1_ptr++
+	lda	r16,16(r16)	C res_ptr++
+	bne	r18,$Loop
+
+$Lend2:	stq	r3,0(r16)
+	mulq	r2,r2,r3	C r3 = prod_low
+	stq	r4,8(r16)
+	umulh	r2,r2,r4	C r4 = cy_limb
+	stq	r3,16(r16)
+	stq	r4,24(r16)
+	ret	r31,(r26),1
+$Lend1:	stq	r3,0(r16)
+	stq	r4,8(r16)
+	ret	r31,(r26),1
+EPILOGUE(mpn_sqr_diagonal)
+ASM_END()
diff --git a/gmp/mpn/alpha/sub_n.asm b/gmp/mpn/alpha/sub_n.asm
index 1bb72263f8..842a4f0b54 100644
--- a/gmp/mpn/alpha/sub_n.asm
+++ b/gmp/mpn/alpha/sub_n.asm
@@ -1,164 +1,117 @@
-dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
-dnl  and store difference in a third limb vector.
+dnl  Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
 
-dnl  Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C      cycles/limb
-C EV4:     ?
-C EV5:     4.75
-C EV6:     3
+C EV4:     7.75
+C EV5:     5.75
+C EV6:     4
 
-dnl  INPUT PARAMETERS
-dnl  res_ptr	r16
-dnl  s1_ptr	r17
-dnl  s2_ptr	r18
-dnl  size	r19
+C  INPUT PARAMETERS
+C  rp	r16
+C  up	r17
+C  vp	r18
+C  n	r19
 
 ASM_START()
-PROLOGUE(mpn_sub_nc)
-	bis	r31,r20,r25
-	br	L(com)
-EPILOGUE()
 PROLOGUE(mpn_sub_n)
-	bis	r31,r31,r25		C clear cy
-L(com):	subq	r19,4,r19		C decr loop cnt
-	blt	r19,$Lend2		C if less than 4 limbs, goto 2nd loop
-C Start software pipeline for 1st loop
-	ldq	r0,0(r18)
-	ldq	r4,0(r17)
-	ldq	r1,8(r18)
-	ldq	r5,8(r17)
-	addq	r17,32,r17		C update s1_ptr
-	subq	r4,r0,r28		C 1st main subtract
-	ldq	r2,16(r18)
-	subq	r28,r25,r20		C 1st carry subtract
-	ldq	r3,24(r18)
-	cmpult	r4,r0,r8		C compute cy from last subtract
-	ldq	r6,-16(r17)
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	ldq	r7,-8(r17)
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	subq	r19,4,r19		C decr loop cnt
-	subq	r5,r1,r28		C 2nd main subtract
-	addq	r18,32,r18		C update s2_ptr
-	subq	r28,r25,r21		C 2nd carry subtract
-	cmpult	r5,r1,r8		C compute cy from last subtract
-	blt	r19,$Lend1		C if less than 4 limbs remain, jump
-C 1st loop handles groups of 4 limbs in a software pipeline
-	ALIGN(16)
-$Loop:	cmpult	r28,r25,r25		C compute cy from last subtract
-	ldq	r0,0(r18)
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	ldq	r1,8(r18)
-	subq	r6,r2,r28		C 3rd main subtract
-	ldq	r4,0(r17)
-	subq	r28,r25,r22		C 3rd carry subtract
-	ldq	r5,8(r17)
-	cmpult	r6,r2,r8		C compute cy from last subtract
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	stq	r20,0(r16)
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	stq	r21,8(r16)
-	subq	r7,r3,r28		C 4th main subtract
-	subq	r28,r25,r23		C 4th carry subtract
-	cmpult	r7,r3,r8		C compute cy from last subtract
-	cmpult	r28,r25,r25		C compute cy from last subtract
-		addq	r17,32,r17		C update s1_ptr
-	bis	r8,r25,r25		C combine cy from the two subtracts
-		addq	r16,32,r16		C update res_ptr
-	subq	r4,r0,r28		C 1st main subtract
-	ldq	r2,16(r18)
-	subq	r28,r25,r20		C 1st carry subtract
-	ldq	r3,24(r18)
-	cmpult	r4,r0,r8		C compute cy from last subtract
-	ldq	r6,-16(r17)
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	ldq	r7,-8(r17)
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	subq	r19,4,r19		C decr loop cnt
-	stq	r22,-16(r16)
-	subq	r5,r1,r28		C 2nd main subtract
-	stq	r23,-8(r16)
-	subq	r28,r25,r21		C 2nd carry subtract
-		addq	r18,32,r18		C update s2_ptr
-	cmpult	r5,r1,r8		C compute cy from last subtract
-	bge	r19,$Loop
-C Finish software pipeline for 1st loop
-$Lend1:	cmpult	r28,r25,r25		C compute cy from last subtract
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	subq	r6,r2,r28		C cy add
-	subq	r28,r25,r22		C 3rd main subtract
-	cmpult	r6,r2,r8		C compute cy from last subtract
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	stq	r20,0(r16)
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	stq	r21,8(r16)
-	subq	r7,r3,r28		C cy add
-	subq	r28,r25,r23		C 4th main subtract
-	cmpult	r7,r3,r8		C compute cy from last subtract
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	bis	r8,r25,r25		C combine cy from the two subtracts
-	addq	r16,32,r16		C update res_ptr
-	stq	r22,-16(r16)
-	stq	r23,-8(r16)
-$Lend2:	addq	r19,4,r19		C restore loop cnt
-	beq	r19,$Lret
-C Start software pipeline for 2nd loop
-	ldq	r0,0(r18)
-	ldq	r4,0(r17)
+	ldq	r3,0(r17)
+	ldq	r4,0(r18)
+
 	subq	r19,1,r19
-	beq	r19,$Lend0
-C 2nd loop handles remaining 1-3 limbs
-	ALIGN(16)
-$Loop0:	subq	r4,r0,r28		C main subtract
-	cmpult	r4,r0,r8		C compute cy from last subtract
-	ldq	r0,8(r18)
-	ldq	r4,8(r17)
-	subq	r28,r25,r20		C carry subtract
-	addq	r18,8,r18
+	and	r19,4-1,r2	C number of limbs in first loop
+	bis	r31,r31,r0
+	beq	r2,$L0		C if multiple of 4 limbs, skip first loop
+
+	subq	r19,r2,r19
+
+$Loop0:	subq	r2,1,r2
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
 	addq	r17,8,r17
-	stq	r20,0(r16)
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	subq	r19,1,r19		C decr loop cnt
-	bis	r8,r25,r25		C combine cy from the two subtracts
+	addq	r18,8,r18
+	bis	r5,r5,r3
+	bis	r6,r6,r4
 	addq	r16,8,r16
-	bne	r19,$Loop0
-$Lend0:	subq	r4,r0,r28		C main subtract
-	subq	r28,r25,r20		C carry subtract
-	cmpult	r4,r0,r8		C compute cy from last subtract
-	cmpult	r28,r25,r25		C compute cy from last subtract
-	stq	r20,0(r16)
-	bis	r8,r25,r25		C combine cy from the two subtracts
-
-$Lret:	bis	r25,r31,r0		C return cy
+	bne	r2,$Loop0
+
+$L0:	beq	r19,$Lend
+
+	ALIGN(8)
+$Loop:	subq	r19,4,r19
+
+	ldq	r5,8(r17)
+	addq	r4,r0,r4
+	ldq	r6,8(r18)
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,16(r17)
+	addq	r6,r0,r6
+	ldq	r4,16(r18)
+	cmpult	r6,r0,r1
+	subq	r5,r6,r6
+	cmpult	r5,r6,r0
+	stq	r6,8(r16)
+	bis	r0,r1,r0
+
+	ldq	r5,24(r17)
+	addq	r4,r0,r4
+	ldq	r6,24(r18)
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,16(r16)
+	bis	r0,r1,r0
+
+	ldq	r3,32(r17)
+	addq	r6,r0,r6
+	ldq	r4,32(r18)
+	cmpult	r6,r0,r1
+	subq	r5,r6,r6
+	cmpult	r5,r6,r0
+	stq	r6,24(r16)
+	bis	r0,r1,r0
+
+	addq	r17,32,r17
+	addq	r18,32,r18
+	addq	r16,32,r16
+	bne	r19,$Loop
+
+$Lend:	addq	r4,r0,r4
+	cmpult	r4,r0,r1
+	subq	r3,r4,r4
+	cmpult	r3,r4,r0
+	stq	r4,0(r16)
+	bis	r0,r1,r0
 	ret	r31,(r26),1
-EPILOGUE()
+EPILOGUE(mpn_sub_n)
 ASM_END()
diff --git a/gmp/mpn/alpha/submul_1.asm b/gmp/mpn/alpha/submul_1.asm
index 2b63b52fa4..554ccf51b6 100644
--- a/gmp/mpn/alpha/submul_1.asm
+++ b/gmp/mpn/alpha/submul_1.asm
@@ -4,30 +4,19 @@ dnl  the result from a second limb vector.
 dnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/umul.asm b/gmp/mpn/alpha/umul.asm
index 039081ed48..7fa3f008f1 100644
--- a/gmp/mpn/alpha/umul.asm
+++ b/gmp/mpn/alpha/umul.asm
@@ -3,30 +3,19 @@ dnl  mpn_umul_ppmm -- 1x1->2 limb multiplication
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/alpha/unicos.m4 b/gmp/mpn/alpha/unicos.m4
index e05cf5cca6..f1f41c18e4 100644
--- a/gmp/mpn/alpha/unicos.m4
+++ b/gmp/mpn/alpha/unicos.m4
@@ -3,33 +3,22 @@ divert(-1)
 dnl  m4 macros for alpha assembler on unicos.
 
 
-dnl  Copyright 2000, 2002-2004, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Note that none of the standard GMP_ASM_ autoconf tests are done for
@@ -86,9 +75,8 @@ m4_assert_numargs(1)
 `	.extern	$1')
 
 define(`DATASTART',
-m4_assert_numargs_range(1,2)
+m4_assert_numargs(1)
 `	.psect	$1@crud,data
-	ALIGN(ifelse($#,1,2,$2))
 $1:')
 
 define(`DATAEND',
diff --git a/gmp/mpn/arm/README b/gmp/mpn/arm/README
deleted file mode 100644
index 598baa3f2e..0000000000
--- a/gmp/mpn/arm/README
+++ /dev/null
@@ -1,35 +0,0 @@
-Copyright 2002, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
-
-
-
-
-
-This directory contains mpn functions for ARM processors.  It has been
-optimised for Cortex-A9, but the code in the top-level directory should run
-on all ARM processors at architecture level v4 or later.
diff --git a/gmp/mpn/arm/add_n.asm b/gmp/mpn/arm/add_n.asm
new file mode 100644
index 0000000000..0f0791759d
--- /dev/null
+++ b/gmp/mpn/arm/add_n.asm
@@ -0,0 +1,69 @@
+dnl  ARM mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
+dnl  in a third limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 5 cycles/limb.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`vp',`r2')
+define(`n',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+	stmfd	sp!, { r8, r9, lr }
+	movs	n, n, lsr #1
+	bcc	L(skip1)
+	ldr	r12, [up], #4
+	ldr	lr, [vp], #4
+	adds	r12, r12, lr
+	str	r12, [rp], #4
+L(skip1):
+	tst	n, #1
+	beq	L(skip2)
+	ldmia	up!, { r8, r9 }
+	ldmia	vp!, { r12, lr }
+	adcs	r8, r8, r12
+	adcs	r9, r9, lr
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #1
+	beq	L(return)
+	stmfd	sp!, { r4, r5, r6, r7 }
+L(add_n_loop):
+	ldmia	up!, { r4, r5, r6, r7 }
+	ldmia	vp!, { r8, r9, r12, lr }
+	adcs	r4, r4, r8
+	ldr	r8, [rp, #12]			C cache allocate
+	adcs	r5, r5, r9
+	adcs	r6, r6, r12
+	adcs	r7, r7, lr
+	stmia	rp!, { r4, r5, r6, r7 }
+	sub	n, n, #2
+	teq	n, #0
+	bne	L(add_n_loop)
+	ldmfd	sp!, { r4, r5, r6, r7 }
+L(return):
+	adc	r0, n, #0
+	ldmfd	sp!, { r8, r9, pc }
+EPILOGUE(mpn_add_n)
diff --git a/gmp/mpn/arm/addmul_1.asm b/gmp/mpn/arm/addmul_1.asm
new file mode 100644
index 0000000000..de33f2f34b
--- /dev/null
+++ b/gmp/mpn/arm/addmul_1.asm
@@ -0,0 +1,107 @@
+dnl  ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
+dnl  to a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM:  7.75-9.75  (dependent on vl value)
+C XScale:        8-9     (dependent on vl value, estimated)
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+define(`rl',`r12')
+define(`ul',`r6')
+define(`r',`lr')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	stmfd	sp!, { r4-r6, lr }
+	mov	r4, #0			C clear r4
+	adds	r0, r0, #0		C clear cy
+	tst	n, #1
+	beq	L(skip1)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	umull	r5, r4, ul, vl
+	adds	r, rl, r5
+	str	r, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	ldr	ul, [up], #4
+	adcs	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	adcs	r, rl, r5
+	str	r, [rp], #4
+L(skip2):
+	bics	r, n, #3
+	beq	L(return)
+
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	b	L(in)
+
+L(loop):
+	ldr	ul, [up], #4
+	adcs	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+L(in):	ldr	ul, [up], #4
+	adcs	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	adcs	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	adcs	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	sub	n, n, #4
+	bics	r, n, #3
+	bne	L(loop)
+
+	adcs	r, rl, r5
+	str	r, [rp], #4
+L(return):
+	adc	r0, r4, #0
+	ldmfd	sp!, { r4-r6, pc }
+EPILOGUE(mpn_addmul_1)
diff --git a/gmp/mpn/arm/aors_n.asm b/gmp/mpn/arm/aors_n.asm
deleted file mode 100644
index fdad9f7ba6..0000000000
--- a/gmp/mpn/arm/aors_n.asm
+++ /dev/null
@@ -1,112 +0,0 @@
-dnl  ARM mpn_add_n and mpn_sub_n
-
-dnl  Contributed to the GNU project by Robert Harley.
-
-dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.5	slightly fluctuating
-C Cortex-A15	 2.25
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-ifdef(`OPERATION_add_n', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`CLRCY',	`cmn	r0, #0')
-  define(`SETCY',	`cmp	$1, #1')
-  define(`RETVAL',	`adc	r0, n, #0')
-  define(`func',	mpn_add_n)
-  define(`func_nc',	mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`CLRCY',	`cmp	r0, r0')
-  define(`SETCY',	`rsbs	$1, $1, #0')
-  define(`RETVAL',	`sbc	r0, r0, r0
-			and	r0, r0, #1')
-  define(`func',	mpn_sub_n)
-  define(`func_nc',	mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-PROLOGUE(func_nc)
-	ldr	r12, [sp, #0]
-	stmfd	sp!, { r8, r9, lr }
-	SETCY(	r12)
-	b	L(ent)
-EPILOGUE()
-PROLOGUE(func)
-	stmfd	sp!, { r8, r9, lr }
-	CLRCY(	r12)
-L(ent):	tst	n, #1
-	beq	L(skip1)
-	ldr	r12, [up], #4
-	ldr	lr, [vp], #4
-	ADDSUBC	r12, r12, lr
-	str	r12, [rp], #4
-L(skip1):
-	tst	n, #2
-	beq	L(skip2)
-	ldmia	up!, { r8, r9 }
-	ldmia	vp!, { r12, lr }
-	ADDSUBC	r8, r8, r12
-	ADDSUBC	r9, r9, lr
-	stmia	rp!, { r8, r9 }
-L(skip2):
-	bics	n, n, #3
-	beq	L(rtn)
-	stmfd	sp!, { r4, r5, r6, r7 }
-
-L(top):	ldmia	up!, { r4, r5, r6, r7 }
-	ldmia	vp!, { r8, r9, r12, lr }
-	ADDSUBC	r4, r4, r8
-	sub	n, n, #4
-	ADDSUBC	r5, r5, r9
-	ADDSUBC	r6, r6, r12
-	ADDSUBC	r7, r7, lr
-	stmia	rp!, { r4, r5, r6, r7 }
-	teq	n, #0
-	bne	L(top)
-
-	ldmfd	sp!, { r4, r5, r6, r7 }
-
-L(rtn):	RETVAL
-	ldmfd	sp!, { r8, r9, pc }
-EPILOGUE()
diff --git a/gmp/mpn/arm/aorslsh1_n.asm b/gmp/mpn/arm/aorslsh1_n.asm
deleted file mode 100644
index 1cbd4ba1af..0000000000
--- a/gmp/mpn/arm/aorslsh1_n.asm
+++ /dev/null
@@ -1,167 +0,0 @@
-dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	      addlsh1_n       sublsh1_n
-C	     cycles/limb     cycles/limb
-C StrongARM	 ?		 ?
-C XScale	 ?		 ?
-C Cortex-A7	 ?		 ?
-C Cortex-A8	 ?		 ?
-C Cortex-A9	 3.12		 3.7
-C Cortex-A15	 ?		 ?
-
-C TODO
-C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
-C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
-C    very much.  If two insns are really needed, it might help to separate them
-C    for better micro-parallelism.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-ifdef(`OPERATION_addlsh1_n', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`SETCY',	`cmp	$1, #1')
-  define(`RETVAL',	`adc	r0, $1, #2')
-  define(`SAVECY',	`sbc	$1, $2, #0')
-  define(`RESTCY',	`cmn	$1, #1')
-  define(`REVCY',	`')
-  define(`INICYR',	`mov	$1, #0')
-  define(`r10r11',	`r11')
-  define(`func',	mpn_addlsh1_n)
-  define(`func_nc',	mpn_addlsh1_nc)')
-ifdef(`OPERATION_sublsh1_n', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`SETCY',	`rsbs	$1, $1, #0')
-  define(`RETVAL',	`adc	r0, $1, #1')
-  define(`SAVECY',	`sbc	$1, $1, $1')
-  define(`RESTCY',	`cmn	$1, #1')
-  define(`REVCY',	`sbc	$1, $1, $1
-			cmn	$1, #1')
-  define(`INICYR',	`mvn	$1, #0')
-  define(`r10r11',	`r10')
-  define(`func',	mpn_sublsh1_n)
-  define(`func_nc',	mpn_sublsh1_nc)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-PROLOGUE(func)
-	push	{r4-r10r11, r14}
-
-ifdef(`OPERATION_addlsh1_n', `
-	mvn	r11, #0
-')
-	INICYR(	r14)
-	subs	n, n, #3
-	blt	L(le2)			C carry clear on branch path
-
-	cmn	r0, #0			C clear carry
-	ldmia	vp!, {r8, r9, r10}
-	b	L(mid)
-
-L(top):	RESTCY(	r14)
-	ADDSUBC	r4, r4, r8
-	ADDSUBC	r5, r5, r9
-	ADDSUBC	r6, r6, r10
-	ldmia	vp!, {r8, r9, r10}
-	stmia	rp!, {r4, r5, r6}
-	REVCY(r14)
-	adcs	r8, r8, r8
-	adcs	r9, r9, r9
-	adcs	r10, r10, r10
-	ldmia	up!, {r4, r5, r6}
-	SAVECY(	r14, r11)
-	subs	n, n, #3
-	blt	L(exi)
-	RESTCY(	r12)
-	ADDSUBC	r4, r4, r8
-	ADDSUBC	r5, r5, r9
-	ADDSUBC	r6, r6, r10
-	ldmia	vp!, {r8, r9, r10}
-	stmia	rp!, {r4, r5, r6}
-	REVCY(r12)
-L(mid):	adcs	r8, r8, r8
-	adcs	r9, r9, r9
-	adcs	r10, r10, r10
-	ldmia	up!, {r4, r5, r6}
-	SAVECY(	r12, r11)
-	subs	n, n, #3
-	bge	L(top)
-
-	mov	r7, r12			C swap alternating...
-	mov	r12, r14		C ...carry-save...
-	mov	r14, r7			C ...registers
-
-L(exi):	RESTCY(	r12)
-	ADDSUBC	r4, r4, r8
-	ADDSUBC	r5, r5, r9
-	ADDSUBC	r6, r6, r10
-	stmia	rp!, {r4, r5, r6}
-
-	REVCY(r12)
-L(le2):	tst	n, #1			C n = {-1,-2,-3} map to [2], [1], [0]
-	beq	L(e1)
-
-L(e02):	tst	n, #2
-	beq	L(rt0)
-	ldm	vp, {r8, r9}
-	adcs	r8, r8, r8
-	adcs	r9, r9, r9
-	ldm	up, {r4, r5}
-	SAVECY(	r12, r11)
-	RESTCY(	r14)
-	ADDSUBC	r4, r4, r8
-	ADDSUBC	r5, r5, r9
-	stm	rp, {r4, r5}
-	b	L(rt1)
-
-L(e1):	ldr	r8, [vp]
-	adcs	r8, r8, r8
-	ldr	r4, [up]
-	SAVECY(	r12, r11)
-	RESTCY(	r14)
-	ADDSUBC	r4, r4, r8
-	str	r4, [rp]
-
-L(rt1):	mov	r14, r12
-	REVCY(r12)
-L(rt0):	RETVAL(	r14)
-	pop	{r4-r10r11, r14}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/aorsmul_1.asm b/gmp/mpn/arm/aorsmul_1.asm
deleted file mode 100644
index b02fbb3b2a..0000000000
--- a/gmp/mpn/arm/aorsmul_1.asm
+++ /dev/null
@@ -1,135 +0,0 @@
-dnl  ARM mpn_addmul_1 and mpn_submul_1.
-
-dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:     ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 5.25
-C Cortex-A15	 4
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-define(`vl', `r3')
-define(`rl', `r12')
-define(`ul', `r6')
-define(`r',  `lr')
-
-ifdef(`OPERATION_addmul_1', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`CLRRCY',	`mov	$1, #0
-			adds	r0, r0, #0')
-  define(`RETVAL',	`adc	r0, r4, #0')
-  define(`func',	mpn_addmul_1)')
-ifdef(`OPERATION_submul_1', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`CLRRCY',	`subs	$1, r0, r0')
-  define(`RETVAL',	`sbc	r0, r0, r0
-			sub	r0, $1, r0')
-  define(`func',	mpn_submul_1)')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-ASM_START()
-PROLOGUE(func)
-	stmfd	sp!, { r4-r6, lr }
-	CLRRCY(	r4)
-	tst	n, #1
-	beq	L(skip1)
-	ldr	ul, [up], #4
-	ldr	rl, [rp, #0]
-	umull	r5, r4, ul, vl
-	ADDSUB	r, rl, r5
-	str	r, [rp], #4
-L(skip1):
-	tst	n, #2
-	beq	L(skip2)
-	ldr	ul, [up], #4
-	ldr	rl, [rp, #0]
-	mov	r5, #0
-	umlal	r4, r5, ul, vl
-	ldr	ul, [up], #4
-	ADDSUBC	r, rl, r4
-	ldr	rl, [rp, #4]
-	mov	r4, #0
-	umlal	r5, r4, ul, vl
-	str	r, [rp], #4
-	ADDSUBC	r, rl, r5
-	str	r, [rp], #4
-L(skip2):
-	bics	n, n, #3
-	beq	L(rtn)
-
-	ldr	ul, [up], #4
-	ldr	rl, [rp, #0]
-	mov	r5, #0
-	umlal	r4, r5, ul, vl
-	b	L(in)
-
-L(top):	ldr	ul, [up], #4
-	ADDSUBC	r, rl, r5
-	ldr	rl, [rp, #4]
-	mov	r5, #0
-	umlal	r4, r5, ul, vl
-	str	r, [rp], #4
-L(in):	ldr	ul, [up], #4
-	ADDSUBC	r, rl, r4
-	ldr	rl, [rp, #4]
-	mov	r4, #0
-	umlal	r5, r4, ul, vl
-	str	r, [rp], #4
-	ldr	ul, [up], #4
-	ADDSUBC	r, rl, r5
-	ldr	rl, [rp, #4]
-	mov	r5, #0
-	umlal	r4, r5, ul, vl
-	str	r, [rp], #4
-	ldr	ul, [up], #4
-	ADDSUBC	r, rl, r4
-	ldr	rl, [rp, #4]
-	mov	r4, #0
-	umlal	r5, r4, ul, vl
-	sub	n, n, #4
-	tst	n, n
-	str	r, [rp], #4
-	bne	L(top)
-
-	ADDSUBC	r, rl, r5
-	str	r, [rp]
-
-L(rtn):	RETVAL(	r4)
-	ldmfd	sp!, { r4-r6, pc }
-EPILOGUE()
diff --git a/gmp/mpn/arm/arm-defs.m4 b/gmp/mpn/arm/arm-defs.m4
index 6ca964a245..9d169e822d 100644
--- a/gmp/mpn/arm/arm-defs.m4
+++ b/gmp/mpn/arm/arm-defs.m4
@@ -2,39 +2,28 @@ divert(-1)
 
 dnl  m4 macros for ARM assembler.
 
-dnl  Copyright 2001, 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2001 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Standard commenting is with @, the default m4 # is for constants and we
 dnl  don't want to disable macro expansions in or after them.
 
-changecom(@&*$)
+changecom(@)
 
 
 dnl  APCS register names.
@@ -58,34 +47,4 @@ deflit(sp,r13)
 deflit(lr,r14)
 deflit(pc,r15)
 
-
-define(`lea_list', `')
-define(`lea_num',0)
-
-dnl  LEA(reg,gmp_symbol)
-dnl
-dnl  Load the address of gmp_symbol into a register.  The gmp_symbol must be
-dnl  either local or protected/hidden, since we assume it has a fixed distance
-dnl  from the point of use.
-
-define(`LEA',`dnl
-ldr	$1, L(ptr`'lea_num)
-ifdef(`PIC',dnl
-`dnl
-L(bas`'lea_num):dnl
-	add	$1, $1, pc`'dnl
-	m4append(`lea_list',`
-L(ptr'lea_num`):	.word	GSYM_PREFIX`'$2-L(bas'lea_num`)-8')
-	define(`lea_num', eval(lea_num+1))dnl
-',`dnl
-	m4append(`lea_list',`
-L(ptr'lea_num`):	.word	GSYM_PREFIX`'$2')
-	define(`lea_num', eval(lea_num+1))dnl
-')dnl
-')
-
-define(`EPILOGUE_cpu',
-`lea_list
-	SIZE(`$1',.-`$1')')
-
 divert
diff --git a/gmp/mpn/arm/bdiv_dbm1c.asm b/gmp/mpn/arm/bdiv_dbm1c.asm
deleted file mode 100644
index ec3de50e8e..0000000000
--- a/gmp/mpn/arm/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,113 +0,0 @@
-dnl  ARM mpn_bdiv_dbm1c.
-
-dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 4.25
-C Cortex-A15	 2.5
-
-C TODO
-C  * Try using umlal or umaal.
-C  * Try using ldm/stm.
-
-define(`qp',	  `r0')
-define(`up',	  `r1')
-define(`n',	  `r2')
-define(`bd',	  `r3')
-define(`cy',	  `sp,#0')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_dbm1c)
-	push	{r4, r5, r6, r7, r8}
-	ldr	r4, [up], #4
-	ldr	r5, [sp, #20]
-	ands	r12, n, #3
-	beq	L(fi0)
-	cmp	r12, #2
-	bcc	L(fi1)
-	beq	L(fi2)
-
-L(fi3):	umull	r8, r12, r4, bd
-	ldr	r4, [up], #4
-	b	L(lo3)
-
-L(fi0):	umull	r6, r7, r4, bd
-	ldr	r4, [up], #4
-	b	L(lo0)
-
-L(fi1):	subs	n, n, #1
-	umull	r8, r12, r4, bd
-	bls	L(wd1)
-	ldr	r4, [up], #4
-	b	L(lo1)
-
-L(fi2):	umull	r6, r7, r4, bd
-	ldr	r4, [up], #4
-	b	L(lo2)
-
-L(top):	ldr	r4, [up], #4
-	subs	r5, r5, r6
-	str	r5, [qp], #4
-	sbc	r5, r5, r7
-L(lo1):	umull	r6, r7, r4, bd
-	ldr	r4, [up], #4
-	subs	r5, r5, r8
-	str	r5, [qp], #4
-	sbc	r5, r5, r12
-L(lo0):	umull	r8, r12, r4, bd
-	ldr	r4, [up], #4
-	subs	r5, r5, r6
-	str	r5, [qp], #4
-	sbc	r5, r5, r7
-L(lo3):	umull	r6, r7, r4, bd
-	ldr	r4, [up], #4
-	subs	r5, r5, r8
-	str	r5, [qp], #4
-	sbc	r5, r5, r12
-L(lo2):	subs	n, n, #4
-	umull	r8, r12, r4, bd
-	bhi	L(top)
-
-L(wd2):	subs	r5, r5, r6
-	str	r5, [qp], #4
-	sbc	r5, r5, r7
-L(wd1):	subs	r5, r5, r8
-	str	r5, [qp]
-	sbc	r0, r5, r12
-	pop	{r4, r5, r6, r7, r8}
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/cnd_aors_n.asm b/gmp/mpn/arm/cnd_aors_n.asm
deleted file mode 100644
index e8eb60983a..0000000000
--- a/gmp/mpn/arm/cnd_aors_n.asm
+++ /dev/null
@@ -1,134 +0,0 @@
-dnl  ARM mpn_cnd_add_n, mpn_cnd_sub_n
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3
-C Cortex-A15	 2.5
-
-define(`cnd',	`r0')
-define(`rp',	`r1')
-define(`up',	`r2')
-define(`vp',	`r3')
-
-define(`n',	`r12')
-
-
-ifdef(`OPERATION_cnd_add_n', `
-	define(`ADDSUB',      adds)
-	define(`ADDSUBC',      adcs)
-	define(`INITCY',      `cmn	r0, #0')
-	define(`RETVAL',      `adc	r0, n, #0')
-	define(func,	      mpn_cnd_add_n)')
-ifdef(`OPERATION_cnd_sub_n', `
-	define(`ADDSUB',      subs)
-	define(`ADDSUBC',      sbcs)
-	define(`INITCY',      `cmp	r0, #0')
-	define(`RETVAL',      `adc	r0, n, #0
-			      rsb	r0, r0, #1')
-	define(func,	      mpn_cnd_sub_n)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	push	{r4-r11}
-	ldr	n, [sp, #32]
-
-	cmp	cnd, #1
-	sbc	cnd, cnd, cnd		C conditionally set to 0xffffffff
-
-	INITCY				C really only needed for n = 0 (mod 4)
-
-	ands	r4, n, #3
-	beq	L(top)
-	cmp	r4, #2
-	bcc	L(b1)
-	beq	L(b2)
-
-L(b3):	ldm	vp!, {r4,r5,r6}
-	ldm	up!, {r8,r9,r10}
-	bic	r4, r4, cnd
-	bic	r5, r5, cnd
-	bic	r6, r6, cnd
-	ADDSUB	r8, r8, r4
-	ADDSUBC	r9, r9, r5
-	ADDSUBC	r10, r10, r6
-	stm	rp!, {r8,r9,r10}
-	sub	n, n, #3
-	teq	n, #0
-	bne	L(top)
-	b	L(end)
-
-L(b2):	ldm	vp!, {r4,r5}
-	ldm	up!, {r8,r9}
-	bic	r4, r4, cnd
-	bic	r5, r5, cnd
-	ADDSUB	r8, r8, r4
-	ADDSUBC	r9, r9, r5
-	stm	rp!, {r8,r9}
-	sub	n, n, #2
-	teq	n, #0
-	bne	L(top)
-	b	L(end)
-
-L(b1):	ldr	r4, [vp], #4
-	ldr	r8, [up], #4
-	bic	r4, r4, cnd
-	ADDSUB	r8, r8, r4
-	str	r8, [rp], #4
-	sub	n, n, #1
-	teq	n, #0
-	beq	L(end)
-
-L(top):	ldm	vp!, {r4,r5,r6,r7}
-	ldm	up!, {r8,r9,r10,r11}
-	bic	r4, r4, cnd
-	bic	r5, r5, cnd
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	ADDSUBC	r8, r8, r4
-	ADDSUBC	r9, r9, r5
-	ADDSUBC	r10, r10, r6
-	ADDSUBC	r11, r11, r7
-	sub	n, n, #4
-	stm	rp!, {r8,r9,r10,r11}
-	teq	n, #0
-	bne	L(top)
-
-L(end):	RETVAL
-	pop	{r4-r11}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/com.asm b/gmp/mpn/arm/com.asm
deleted file mode 100644
index 42f8e3cbbe..0000000000
--- a/gmp/mpn/arm/com.asm
+++ /dev/null
@@ -1,75 +0,0 @@
-dnl  ARM mpn_com.
-
-dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.0
-C Cortex-A15	 1.75
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_com)
-	tst	n, #1
-	beq	L(skip1)
-	ldr	r3, [up], #4
-	mvn	r3, r3
-	str	r3, [rp], #4
-L(skip1):
-	tst	n, #2
-	beq	L(skip2)
-	ldmia	up!, { r3, r12 }		C load 2 limbs
-	mvn	r3, r3
-	mvn	r12, r12
-	stmia	rp!, { r3, r12 }		C store 2 limbs
-L(skip2):
-	bics	n, n, #3
-	beq	L(rtn)
-	stmfd	sp!, { r7, r8, r9 }		C save regs on stack
-
-L(top):	ldmia	up!, { r3, r8, r9, r12 }	C load 4 limbs
-	subs	n, n, #4
-	mvn	r3, r3
-	mvn	r8, r8
-	mvn	r9, r9
-	mvn	r12, r12
-	stmia	rp!, { r3, r8, r9, r12 }	C store 4 limbs
-	bne	L(top)
-
-	ldmfd	sp!, { r7, r8, r9 }		C restore regs from stack
-L(rtn):	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/copyd.asm b/gmp/mpn/arm/copyd.asm
index 3ea2035099..718b762b91 100644
--- a/gmp/mpn/arm/copyd.asm
+++ b/gmp/mpn/arm/copyd.asm
@@ -1,59 +1,37 @@
 dnl  ARM mpn_copyd.
 
-dnl  Contributed to the GNU project by Robert Harley and Torbjörn Granlund.
-
-dnl  Copyright 2003, 2012, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.25-1.5
-C Cortex-A15	 1.25
+C This runs at 3 cycles/limb in the StrongARM.
 
-C TODO
-C  * Consider wider unrolling.  Analogous 8-way code runs 10% faster on both A9
-C    and A15.  But it probably slows things down for 8 <= n < a few dozen.
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
 
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
 
 ASM_START()
 PROLOGUE(mpn_copyd)
 	mov	r12, n, lsl #2
 	sub	r12, r12, #4
-	add	rp, rp, r12
-	add	up, up, r12
+	add	rp, rp, r12			C make rp point at last limb
+	add	up, up, r12			C make up point at last limb
 
 	tst	n, #1
 	beq	L(skip1)
@@ -62,23 +40,19 @@ PROLOGUE(mpn_copyd)
 L(skip1):
 	tst	n, #2
 	beq	L(skip2)
-	ldmda	up!, { r3,r12 }
-	stmda	rp!, { r3,r12 }
+	ldmda	up!, { r3, r12 }		C load 2 limbs
+	stmda	rp!, { r3, r12 }		C store 2 limbs
 L(skip2):
 	bics	n, n, #3
-	beq	L(rtn)
-
-	push	{ r4-r5 }
+	beq	L(return)
+	stmfd	sp!, { r7, r8, r9 }		C save regs on stack
+L(loop):
+	ldmda	up!, { r3, r8, r9, r12 }	C load 4 limbs
+	ldr	r7, [rp, #-12]			C cache allocate
 	subs	n, n, #4
-	ldmda	up!, { r3,r4,r5,r12 }
-	beq	L(end)
-
-L(top):	subs	n, n, #4
-	stmda	rp!, { r3,r4,r5,r12 }
-	ldmda	up!, { r3,r4,r5,r12 }
-	bne	L(top)
-
-L(end):	stmda	rp, { r3,r4,r5,r12 }
-	pop	{ r4-r5 }
-L(rtn):	bx	lr
-EPILOGUE()
+	stmda	rp!, { r3, r8, r9, r12 }	C store 4 limbs
+	bne	L(loop)
+	ldmfd	sp!, { r7, r8, r9 }		C restore regs from stack
+L(return):
+	mov	pc, lr
+EPILOGUE(mpn_copyd)
diff --git a/gmp/mpn/arm/copyi.asm b/gmp/mpn/arm/copyi.asm
index fa454702c1..5ee93acd4c 100644
--- a/gmp/mpn/arm/copyi.asm
+++ b/gmp/mpn/arm/copyi.asm
@@ -1,52 +1,30 @@
 dnl  ARM mpn_copyi.
 
-dnl  Contributed to the GNU project by Robert Harley and Torbjörn Granlund.
-
-dnl  Copyright 2003, 2012, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.25-1.5
-C Cortex-A15	 1.25
+C This runs at 3 cycles/limb in the StrongARM.
 
-C TODO
-C  * Consider wider unrolling.  Analogous 8-way code runs 10% faster on both A9
-C    and A15.  But it probably slows things down for 8 <= n < a few dozen.
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
 
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
 
 ASM_START()
 PROLOGUE(mpn_copyi)
@@ -57,23 +35,19 @@ PROLOGUE(mpn_copyi)
 L(skip1):
 	tst	n, #2
 	beq	L(skip2)
-	ldmia	up!, { r3,r12 }
-	stmia	rp!, { r3,r12 }
+	ldmia	up!, { r3, r12 }		C load 2 limbs
+	stmia	rp!, { r3, r12 }		C store 2 limbs
 L(skip2):
 	bics	n, n, #3
-	beq	L(rtn)
-
-	push	{ r4-r5 }
+	beq	L(return)
+	stmfd	sp!, { r7, r8, r9 }		C save regs on stack
+L(loop):
+	ldmia	up!, { r3, r8, r9, r12 }	C load 4 limbs
+	ldr	r7, [rp, #12]			C cache allocate
 	subs	n, n, #4
-	ldmia	up!, { r3,r4,r5,r12 }
-	beq	L(end)
-
-L(top):	subs	n, n, #4
-	stmia	rp!, { r3,r4,r5,r12 }
-	ldmia	up!, { r3,r4,r5,r12 }
-	bne	L(top)
-
-L(end):	stm	rp, { r3,r4,r5,r12 }
-	pop	{ r4-r5 }
-L(rtn):	bx	lr
-EPILOGUE()
+	stmia	rp!, { r3, r8, r9, r12 }	C store 4 limbs
+	bne	L(loop)
+	ldmfd	sp!, { r7, r8, r9 }		C restore regs from stack
+L(return):
+	mov	pc, lr
+EPILOGUE(mpn_copyi)
diff --git a/gmp/mpn/arm/dive_1.asm b/gmp/mpn/arm/dive_1.asm
deleted file mode 100644
index a695e47c77..0000000000
--- a/gmp/mpn/arm/dive_1.asm
+++ /dev/null
@@ -1,151 +0,0 @@
-dnl  ARM v4 mpn_modexact_1c_odd
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C               cycles/limb       cycles/limb
-C               norm    unorm    modexact_1c_odd
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	10	12
-C Cortex-A15	 9	 9
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	-
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-define(`d',  `r3')
-
-define(`cy', `r7')
-define(`cnt', `r6')
-define(`tnc', `r8')
-
-ASM_START()
-PROLOGUE(mpn_divexact_1)
-	tst	d, #1
-	push	{r4-r9}
-	mov	cnt, #0
-	bne	L(inv)
-
-C count trailing zeros
-	movs	r4, d, lsl #16
-	moveq	d, d, lsr #16
-	moveq	cnt, #16
-	tst	d, #0xff
-	moveq	d, d, lsr #8
-	addeq	cnt, cnt, #8
-	LEA(	r4, ctz_tab)
-	and	r5, d, #0xff
-	ldrb	r4, [r4, r5]
-	mov	d, d, lsr r4
-	add	cnt, cnt, r4
-
-C binvert limb
-L(inv):	LEA(	r4, binvert_limb_table)
-	and	r12, d, #254
-	ldrb	r4, [r4, r12, lsr #1]
-	mul	r12, r4, r4
-	mul	r12, d, r12
-	rsb	r12, r12, r4, lsl #1
-	mul	r4, r12, r12
-	mul	r4, d, r4
-	rsb	r4, r4, r12, lsl #1	C r4 = inverse
-
-	tst	cnt, cnt
-	ldr	r5, [up], #4		C up[0]
-	mov	cy, #0
-	bne	L(unnorm)
-
-L(norm):
-	subs	n, n, #1		C set carry as side-effect
-	beq	L(end)
-
-	ALIGN(16)
-L(top):	sbcs	cy, r5, cy
-	ldr	r5, [up], #4
-	sub	n, n, #1
-	mul	r9, r4, cy
-	tst	n, n
-	umull	r12, cy, d, r9
-	str	r9, [rp], #4
-	bne	L(top)
-
-L(end):	sbc	cy, r5, cy
-	mul	r9, r4, cy
-	str	r9, [rp]
-	pop	{r4-r9}
-	bx	r14
-
-L(unnorm):
-	rsb	tnc, cnt, #32
-	mov	r5, r5, lsr cnt
-	subs	n, n, #1		C set carry as side-effect
-	beq	L(edu)
-
-	ALIGN(16)
-L(tpu):	ldr	r12, [up], #4
-	orr	r9, r5, r12, lsl tnc
-	mov	r5, r12, lsr cnt
-	sbcs	cy, r9, cy		C critical path ->cy->cy->
-	sub	n, n, #1
-	mul	r9, r4, cy		C critical path ->cy->r9->
-	tst	n, n
-	umull	r12, cy, d, r9		C critical path ->r9->cy->
-	str	r9, [rp], #4
-	bne	L(tpu)
-
-L(edu):	sbc	cy, r5, cy
-	mul	r9, r4, cy
-	str	r9, [rp]
-	pop	{r4-r9}
-	bx	r14
-EPILOGUE()
-
-	.section .rodata
-ctz_tab:
-	.byte	8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
-	.byte	5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0
diff --git a/gmp/mpn/arm/gmp-mparam.h b/gmp/mpn/arm/gmp-mparam.h
index 87eec3a149..7afb06ac89 100644
--- a/gmp/mpn/arm/gmp-mparam.h
+++ b/gmp/mpn/arm/gmp-mparam.h
@@ -1,127 +1,75 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1193MHz ARM (gcc55.fsffrance.org) */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         56
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         11
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     71
-#define USE_PREINV_DIVREM_1                  1  /* preinv always */
-#define DIVREM_2_THRESHOLD                   0  /* preinv always */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           41
-
-#define MUL_TOOM22_THRESHOLD                36
-#define MUL_TOOM33_THRESHOLD               125
-#define MUL_TOOM44_THRESHOLD               193
-#define MUL_TOOM6H_THRESHOLD               303
-#define MUL_TOOM8H_THRESHOLD               418
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     125
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     176
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     129
-
-#define SQR_BASECASE_THRESHOLD              12
-#define SQR_TOOM2_THRESHOLD                 78
-#define SQR_TOOM3_THRESHOLD                137
-#define SQR_TOOM4_THRESHOLD                212
-#define SQR_TOOM6_THRESHOLD                306
-#define SQR_TOOM8_THRESHOLD                422
-
-#define MULMOD_BNM1_THRESHOLD               20
-#define SQRMOD_BNM1_THRESHOLD               26
-
-#define MUL_FFT_MODF_THRESHOLD             436  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    436, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
-    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {    256, 9}, {    512,10}, {   1024,11}, {   2048,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 28
-#define MUL_FFT_THRESHOLD                 5760
-
-#define SQR_FFT_MODF_THRESHOLD             404  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    404, 5}, {     13, 4}, {     27, 5}, {     27, 6}, \
-    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     39, 9}, {    512,10}, \
-    {   1024,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 26
-#define SQR_FFT_THRESHOLD                 3776
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 137
-#define MULLO_MUL_N_THRESHOLD            11479
-
-#define DC_DIV_QR_THRESHOLD                150
-#define DC_DIVAPPR_Q_THRESHOLD             494
-#define DC_BDIV_QR_THRESHOLD               148
-#define DC_BDIV_Q_THRESHOLD                345
-
-#define INV_MULMOD_BNM1_THRESHOLD           70
-#define INV_NEWTON_THRESHOLD               474
-#define INV_APPR_THRESHOLD                 478
-
-#define BINV_NEWTON_THRESHOLD              542
-#define REDC_1_TO_REDC_N_THRESHOLD         117
-
-#define MU_DIV_QR_THRESHOLD               2089
-#define MU_DIVAPPR_Q_THRESHOLD            2172
-#define MUPI_DIV_QR_THRESHOLD              225
-#define MU_BDIV_QR_THRESHOLD              1528
-#define MU_BDIV_Q_THRESHOLD               2089
-
-#define MATRIX22_STRASSEN_THRESHOLD         16
-#define HGCD_THRESHOLD                     197
-#define GCD_DC_THRESHOLD                   902
-#define GCDEXT_DC_THRESHOLD                650
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                20
-#define GET_STR_PRECOMPUTE_THRESHOLD        39
-#define SET_STR_DC_THRESHOLD              1045
-#define SET_STR_PRECOMPUTE_THRESHOLD      2147
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 593MHz ARM (gcc50.fsffrance.org) */
+
+/* Generated by tuneup.c, 2009-03-05, gcc 4.3 */
+
+#define MUL_KARATSUBA_THRESHOLD          34
+#define MUL_TOOM3_THRESHOLD             125
+#define MUL_TOOM44_THRESHOLD            184
+
+#define SQR_BASECASE_THRESHOLD           15
+#define SQR_KARATSUBA_THRESHOLD          82
+#define SQR_TOOM3_THRESHOLD             147
+#define SQR_TOOM4_THRESHOLD             212
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD             128
+#define MULLOW_MUL_N_THRESHOLD         1095
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                130
+#define POWM_THRESHOLD                  200
+
+#define MATRIX22_STRASSEN_THRESHOLD      19
+#define HGCD_THRESHOLD                  110
+#define GCD_DC_THRESHOLD                734
+#define GCDEXT_DC_THRESHOLD             748
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                10
+#define MOD_1_2_THRESHOLD               996
+#define MOD_1_4_THRESHOLD               997
+#define USE_PREINV_DIVREM_1               1  /* preinv always */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVREM_2_THRESHOLD                0  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             18
+#define GET_STR_PRECOMPUTE_THRESHOLD     35
+#define SET_STR_DC_THRESHOLD            321
+#define SET_STR_PRECOMPUTE_THRESHOLD   1057
+
+#define MUL_FFT_TABLE  { 400, 928, 1920, 4608, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          416
+#define MUL_FFT_THRESHOLD              5888
+
+#define SQR_FFT_TABLE  { 432, 928, 1664, 4608, 10240, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          376
+#define SQR_FFT_THRESHOLD              4352
diff --git a/gmp/mpn/arm/invert_limb.asm b/gmp/mpn/arm/invert_limb.asm
index d4c3afe2da..39d3bb2e14 100644
--- a/gmp/mpn/arm/invert_limb.asm
+++ b/gmp/mpn/arm/invert_limb.asm
@@ -1,93 +1,95 @@
 dnl  ARM mpn_invert_limb -- Invert a normalized limb.
 
-dnl  Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-ASM_START()
+C INPUT PARAMETERS
+define(`d',`r0')	C number to be inverted
+
+
 PROLOGUE(mpn_invert_limb)
-	LEA(	r2, approx_tab-512)
-	mov	r3, r0, lsr #23
-	mov	r3, r3, asl #1
-	ldrh	r3, [r3, r2]
-	mov	r1, r3, asl #17
-	mul	r12, r3, r3
-	umull	r3, r2, r12, r0
-	sub	r1, r1, r2, asl #1
-	umull	r3, r2, r1, r1
-	umull	r12, r3, r0, r3
-	umull	r2, r12, r0, r2
-	adds	r2, r2, r3
-	adc	r12, r12, #0
-	rsb	r1, r12, r1
-	mvn	r2, r2, lsr #30
-	add	r2, r2, r1, asl #2
-	umull	r12, r3, r0, r2
-	adds	r1, r12, r0
-	adc	r3, r3, r0
-	rsb	r0, r3, r2
-	bx	lr
-EPILOGUE()
+	stmfd	sp!, {r4, lr}
+	mov	r3, d, lsr #23
+	sub	r3, r3, #256
+	add	r2, pc, #invtab-.-8
+	mov	r3, r3, lsl #1
+	ldrh	r1, [r2, r3]		C get initial approximation from table
+	mov	r2, r1, lsl #6		C start iteration 1
+	mul	ip, r2, r2
+	umull	lr, r4, ip, d
+	mov	r2, r4, lsl #1
+	rsb	r2, r2, r1, lsl #23	C iteration 1 complete
+	umull	ip, r3, r2, r2		C start iteration 2
+	umull	lr, r4, r3, d
+	umull	r3, r1, ip, d
+	adds	lr, lr, r1
+	addcs	r4, r4, #1
+	mov	r3, lr, lsr #30
+	orr	r4, r3, r4, lsl #2
+	mov	lr, lr, lsl #2
+	cmn	lr, #1
+	rsc	r2, r4, r2, lsl #2	C iteration 2 complete
+	umull	ip, r1, d, r2		C start adjustment step
+	add	r1, r1, d
+	cmn	r1, #1
+	beq	L(1)
+	adds	ip, ip, d
+	adc	r1, r1, #0
+	add	r2, r2, #1
+L(1):
+	adds	r3, ip, d
+	adcs	r1, r1, #0
+	moveq	r0, r2
+	addne	r0, r2, #1
+	ldmfd	sp!, {r4, pc}
 
-	.section .rodata
-	ALIGN(2)
-approx_tab:
-	.short    0xffc0,0xfec0,0xfdc0,0xfcc0,0xfbc0,0xfac0,0xfa00,0xf900
-	.short    0xf800,0xf700,0xf640,0xf540,0xf440,0xf380,0xf280,0xf180
-	.short    0xf0c0,0xefc0,0xef00,0xee00,0xed40,0xec40,0xeb80,0xeac0
-	.short    0xe9c0,0xe900,0xe840,0xe740,0xe680,0xe5c0,0xe500,0xe400
-	.short    0xe340,0xe280,0xe1c0,0xe100,0xe040,0xdf80,0xdec0,0xde00
-	.short    0xdd40,0xdc80,0xdbc0,0xdb00,0xda40,0xd980,0xd8c0,0xd800
-	.short    0xd740,0xd680,0xd600,0xd540,0xd480,0xd3c0,0xd340,0xd280
-	.short    0xd1c0,0xd140,0xd080,0xcfc0,0xcf40,0xce80,0xcdc0,0xcd40
-	.short    0xcc80,0xcc00,0xcb40,0xcac0,0xca00,0xc980,0xc8c0,0xc840
-	.short    0xc780,0xc700,0xc640,0xc5c0,0xc540,0xc480,0xc400,0xc380
-	.short    0xc2c0,0xc240,0xc1c0,0xc100,0xc080,0xc000,0xbf80,0xbec0
-	.short    0xbe40,0xbdc0,0xbd40,0xbc80,0xbc00,0xbb80,0xbb00,0xba80
-	.short    0xba00,0xb980,0xb900,0xb840,0xb7c0,0xb740,0xb6c0,0xb640
-	.short    0xb5c0,0xb540,0xb4c0,0xb440,0xb3c0,0xb340,0xb2c0,0xb240
-	.short    0xb1c0,0xb140,0xb0c0,0xb080,0xb000,0xaf80,0xaf00,0xae80
-	.short    0xae00,0xad80,0xad40,0xacc0,0xac40,0xabc0,0xab40,0xaac0
-	.short    0xaa80,0xaa00,0xa980,0xa900,0xa8c0,0xa840,0xa7c0,0xa740
-	.short    0xa700,0xa680,0xa600,0xa5c0,0xa540,0xa4c0,0xa480,0xa400
-	.short    0xa380,0xa340,0xa2c0,0xa240,0xa200,0xa180,0xa140,0xa0c0
-	.short    0xa080,0xa000,0x9f80,0x9f40,0x9ec0,0x9e80,0x9e00,0x9dc0
-	.short    0x9d40,0x9d00,0x9c80,0x9c40,0x9bc0,0x9b80,0x9b00,0x9ac0
-	.short    0x9a40,0x9a00,0x9980,0x9940,0x98c0,0x9880,0x9840,0x97c0
-	.short    0x9780,0x9700,0x96c0,0x9680,0x9600,0x95c0,0x9580,0x9500
-	.short    0x94c0,0x9440,0x9400,0x93c0,0x9340,0x9300,0x92c0,0x9240
-	.short    0x9200,0x91c0,0x9180,0x9100,0x90c0,0x9080,0x9000,0x8fc0
-	.short    0x8f80,0x8f40,0x8ec0,0x8e80,0x8e40,0x8e00,0x8d80,0x8d40
-	.short    0x8d00,0x8cc0,0x8c80,0x8c00,0x8bc0,0x8b80,0x8b40,0x8b00
-	.short    0x8a80,0x8a40,0x8a00,0x89c0,0x8980,0x8940,0x88c0,0x8880
-	.short    0x8840,0x8800,0x87c0,0x8780,0x8740,0x8700,0x8680,0x8640
-	.short    0x8600,0x85c0,0x8580,0x8540,0x8500,0x84c0,0x8480,0x8440
-	.short    0x8400,0x8380,0x8340,0x8300,0x82c0,0x8280,0x8240,0x8200
-	.short    0x81c0,0x8180,0x8140,0x8100,0x80c0,0x8080,0x8040,0x8000
-ASM_END()
+invtab:
+	.short	1023,1020,1016,1012,1008,1004,1000,996
+	.short	992,989,985,981,978,974,970,967
+	.short	963,960,956,953,949,946,942,939
+	.short	936,932,929,926,923,919,916,913
+	.short	910,907,903,900,897,894,891,888
+	.short	885,882,879,876,873,870,868,865
+	.short	862,859,856,853,851,848,845,842
+	.short	840,837,834,832,829,826,824,821
+	.short	819,816,814,811,809,806,804,801
+	.short	799,796,794,791,789,787,784,782
+	.short	780,777,775,773,771,768,766,764
+	.short	762,759,757,755,753,751,748,746
+	.short	744,742,740,738,736,734,732,730
+	.short	728,726,724,722,720,718,716,714
+	.short	712,710,708,706,704,702,700,699
+	.short	697,695,693,691,689,688,686,684
+	.short	682,680,679,677,675,673,672,670
+	.short	668,667,665,663,661,660,658,657
+	.short	655,653,652,650,648,647,645,644
+	.short	642,640,639,637,636,634,633,631
+	.short	630,628,627,625,624,622,621,619
+	.short	618,616,615,613,612,611,609,608
+	.short	606,605,604,602,601,599,598,597
+	.short	595,594,593,591,590,589,587,586
+	.short	585,583,582,581,579,578,577,576
+	.short	574,573,572,571,569,568,567,566
+	.short	564,563,562,561,560,558,557,556
+	.short	555,554,553,551,550,549,548,547
+	.short	546,544,543,542,541,540,539,538
+	.short	537,536,534,533,532,531,530,529
+	.short	528,527,526,525,524,523,522,521
+	.short	520,519,518,517,516,515,514,513
+EPILOGUE(mpn_invert_limb)
diff --git a/gmp/mpn/arm/logops_n.asm b/gmp/mpn/arm/logops_n.asm
deleted file mode 100644
index 5a61683fc2..0000000000
--- a/gmp/mpn/arm/logops_n.asm
+++ /dev/null
@@ -1,139 +0,0 @@
-dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb             cycles/limb
-C          and andn ior xor         nand iorn nior xnor
-C StrongARM	 ?			 ?
-C XScale	 ?			 ?
-C Cortex-A7	 ?			 ?
-C Cortex-A8	 ?			 ?
-C Cortex-A9	2.5-2.72		2.75-3
-C Cortex-A15	2.25			2.75
-
-C TODO
-C  * It seems that 2.25 c/l and 2.75 c/l is possible for A9.
-C  * Debug popping issue, see comment below.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-define(`POSTOP')
-
-ifdef(`OPERATION_and_n',`
-  define(`func',    `mpn_and_n')
-  define(`LOGOP',   `and	$1, $2, $3')')
-ifdef(`OPERATION_andn_n',`
-  define(`func',    `mpn_andn_n')
-  define(`LOGOP',   `bic	$1, $2, $3')')
-ifdef(`OPERATION_nand_n',`
-  define(`func',    `mpn_nand_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `and	$1, $2, $3')')
-ifdef(`OPERATION_ior_n',`
-  define(`func',    `mpn_ior_n')
-  define(`LOGOP',   `orr	$1, $2, $3')')
-ifdef(`OPERATION_iorn_n',`
-  define(`func',    `mpn_iorn_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `bic	$1, $3, $2')')
-ifdef(`OPERATION_nior_n',`
-  define(`func',    `mpn_nior_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `orr	$1, $2, $3')')
-ifdef(`OPERATION_xor_n',`
-  define(`func',    `mpn_xor_n')
-  define(`LOGOP',   `eor	$1, $2, $3')')
-ifdef(`OPERATION_xnor_n',`
-  define(`func',    `mpn_xnor_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `eor	$1, $2, $3')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-ASM_START()
-PROLOGUE(func)
-	push	{ r8, r9, r10 }
-	tst	n, #1
-	beq	L(skip1)
-	ldr	r10, [vp], #4
-	ldr	r12, [up], #4
-	LOGOP(	r12, r12, r10)
-	POSTOP(	r12)
-	str	r12, [rp], #4
-L(skip1):
-	tst	n, #2
-	beq	L(skip2)
-	ldmia	vp!, { r10, r12 }
-	ldmia	up!, { r8, r9 }
-	LOGOP(	r8, r8, r10)
-	LOGOP(	r9, r9, r12)
-	POSTOP(	r8)
-	POSTOP(	r9)
-	stmia	rp!, { r8, r9 }
-L(skip2):
-	bics	n, n, #3
-	beq	L(rtn)
-	push	{ r4, r5, r6, r7 }
-
-	ldmia	vp!, { r8, r9, r10, r12 }
-	b	L(mid)
-
-L(top):	ldmia	vp!, { r8, r9, r10, r12 }
-	POSTOP(	r4)
-	POSTOP(	r5)
-	POSTOP(	r6)
-	POSTOP(	r7)
-	stmia	rp!, { r4, r5, r6, r7 }
-L(mid):	sub	n, n, #4
-	ldmia	up!, { r4, r5, r6, r7 }
-	teq	n, #0
-	LOGOP(	r4, r4, r8)
-	LOGOP(	r5, r5, r9)
-	LOGOP(	r6, r6, r10)
-	LOGOP(	r7, r7, r12)
-	bne	L(top)
-
-	POSTOP(	r4)
-	POSTOP(	r5)
-	POSTOP(	r6)
-	POSTOP(	r7)
-	stmia	rp!, { r4, r5, r6, r7 }
-
-	pop	{ r4, r5, r6, r7 }	C popping r8-r10 here strangely fails
-
-L(rtn):	pop	{ r8, r9, r10 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/lshift.asm b/gmp/mpn/arm/lshift.asm
deleted file mode 100644
index 9f777eb4dd..0000000000
--- a/gmp/mpn/arm/lshift.asm
+++ /dev/null
@@ -1,88 +0,0 @@
-dnl  ARM mpn_lshift.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.5
-C Cortex-A15	 ?
-
-define(`rp',  `r0')
-define(`up',  `r1')
-define(`n',   `r2')
-define(`cnt', `r3')
-define(`tnc', `r12')
-
-ASM_START()
-PROLOGUE(mpn_lshift)
-	add	up, up, n, lsl #2
-	push	{r4, r6, r7, r8}
-	ldr	r4, [up, #-4]!
-	add	rp, rp, n, lsl #2
-	rsb	tnc, cnt, #32
-
-	mov	r7, r4, lsl cnt
-	tst	n, #1
-	beq	L(evn)			C n even
-
-L(odd):	subs	n, n, #2
-	bcc	L(1)			C n = 1
-	ldr	r8, [up, #-4]!
-	b	L(mid)
-
-L(evn):	ldr	r6, [up, #-4]!
-	subs	n, n, #2
-	beq	L(end)
-
-L(top):	ldr	r8, [up, #-4]!
-	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r6, lsl cnt
-L(mid):	ldr	r6, [up, #-4]!
-	orr	r7, r7, r8, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r8, lsl cnt
-	subs	n, n, #2
-	bgt	L(top)
-
-L(end):	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r6, lsl cnt
-L(1):	str	r7, [rp, #-4]
-	mov	r0, r4, lsr tnc
-	pop	{r4, r6, r7, r8}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/lshiftc.asm b/gmp/mpn/arm/lshiftc.asm
deleted file mode 100644
index 5f3d6e3f5b..0000000000
--- a/gmp/mpn/arm/lshiftc.asm
+++ /dev/null
@@ -1,95 +0,0 @@
-dnl  ARM mpn_lshiftc.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 4.0
-C Cortex-A15	 ?
-
-define(`rp',  `r0')
-define(`up',  `r1')
-define(`n',   `r2')
-define(`cnt', `r3')
-define(`tnc', `r12')
-
-ASM_START()
-PROLOGUE(mpn_lshiftc)
-	add	up, up, n, lsl #2
-	push	{r4, r6, r7, r8}
-	ldr	r4, [up, #-4]!
-	add	rp, rp, n, lsl #2
-	rsb	tnc, cnt, #32
-	mvn	r6, r4
-
-	mov	r7, r6, lsl cnt
-	tst	n, #1
-	beq	L(evn)			C n even
-
-L(odd):	subs	n, n, #2
-	bcc	L(1)			C n = 1
-	ldr	r8, [up, #-4]!
-	mvn	r8, r8
-	b	L(mid)
-
-L(evn):	ldr	r6, [up, #-4]!
-	mvn	r6, r6
-	subs	n, n, #2
-	beq	L(end)
-
-L(top):	ldr	r8, [up, #-4]!
-	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mvn	r8, r8
-	mov	r7, r6, lsl cnt
-L(mid):	ldr	r6, [up, #-4]!
-	orr	r7, r7, r8, lsr tnc
-	str	r7, [rp, #-4]!
-	mvn	r6, r6
-	mov	r7, r8, lsl cnt
-	subs	n, n, #2
-	bgt	L(top)
-
-L(end):	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r6, lsl cnt
-L(1):	mvn	r6, #0
-	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]
-	mov	r0, r4, lsr tnc
-	pop	{r4, r6, r7, r8}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/mod_34lsub1.asm b/gmp/mpn/arm/mod_34lsub1.asm
deleted file mode 100644
index ba3c06d8db..0000000000
--- a/gmp/mpn/arm/mod_34lsub1.asm
+++ /dev/null
@@ -1,121 +0,0 @@
-dnl  ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.33
-C Cortex-A15	 1.33
-
-define(`ap',	r0)
-define(`n',	r1)
-
-C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
-
-C TODO
-C  * Write cleverer summation code.
-C  * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mod_34lsub1)
-	push	{ r4, r5, r6, r7 }
-
-	subs	n, n, #3
-	mov	r7, #0
-	blt	L(le2)			C n <= 2
-
-	ldmia	ap!, { r2, r3, r12 }
-	subs	n, n, #3
-	blt	L(sum)			C n <= 5
-	cmn	r0, #0			C clear carry
-	sub	n, n, #3
-	b	L(mid)
-
-L(top):	adcs	r2, r2, r4
-	adcs	r3, r3, r5
-	adcs	r12, r12, r6
-L(mid):	ldmia	ap!, { r4, r5, r6 }
-	tst	n, n
-	sub	n, n, #3
-	bpl	L(top)
-
-	add	n, n, #3
-
-	adcs	r2, r2, r4
-	adcs	r3, r3, r5
-	adcs	r12, r12, r6
-	movcs	r7, #1			C r7 <= 1
-
-L(sum):	cmn	n, #2
-	movlo	r4, #0
-	ldrhs	r4, [ap], #4
-	movls	r5, #0
-	ldrhi	r5, [ap], #4
-
-	adds	r2, r2, r4
-	adcs	r3, r3, r5
-	adcs	r12, r12, #0
-	adc	r7, r7, #0		C r7 <= 2
-
-L(sum2):
-	bic	r0, r2, #0xff000000
-	add	r0, r0, r2, lsr #24
-	add	r0, r0, r7
-
-	mov	r7, r3, lsl #8
-	bic	r1, r7, #0xff000000
-	add	r0, r0, r1
-	add	r0, r0, r3, lsr #16
-
-	mov	r7, r12, lsl #16
-	bic	r1, r7, #0xff000000
-	add	r0, r0, r1
-	add	r0, r0, r12, lsr #8
-
-	pop	{ r4, r5, r6, r7 }
-	bx	lr
-
-L(le2):	cmn	n, #1
-	bne	L(1)
-	ldmia	ap!, { r2, r3 }
-	mov	r12, #0
-	b	L(sum2)
-L(1):	ldr	r2, [ap]
-	bic	r0, r2, #0xff000000
-	add	r0, r0, r2, lsr #24
-	pop	{ r4, r5, r6, r7 }
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/mode1o.asm b/gmp/mpn/arm/mode1o.asm
deleted file mode 100644
index 5e0f78fc8f..0000000000
--- a/gmp/mpn/arm/mode1o.asm
+++ /dev/null
@@ -1,92 +0,0 @@
-dnl  ARM mpn_modexact_1c_odd
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	10
-C Cortex-A15	 9
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	-
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`up', `r0')
-define(`n',  `r1')
-define(`d',  `r2')
-define(`cy', `r3')
-
-	.protected	binvert_limb_table
-ASM_START()
-PROLOGUE(mpn_modexact_1c_odd)
-	stmfd	sp!, {r4, r5}
-
-	LEA(	r4, binvert_limb_table)
-
-	ldr	r5, [up], #4		C up[0]
-
-	and	r12, d, #254
-	ldrb	r4, [r4, r12, lsr #1]
-	mul	r12, r4, r4
-	mul	r12, d, r12
-	rsb	r12, r12, r4, asl #1
-	mul	r4, r12, r12
-	mul	r4, d, r4
-	rsb	r4, r4, r12, asl #1	C r4 = inverse
-
-	subs	n, n, #1		C set carry as side-effect
-	beq	L(end)
-
-L(top):	sbcs	cy, r5, cy
-	ldr	r5, [up], #4
-	sub	n, n, #1
-	mul	r12, r4, cy
-	tst	n, n
-	umull	r12, cy, d, r12
-	bne	L(top)
-
-L(end):	sbcs	cy, r5, cy
-	mul	r12, r4, cy
-	umull	r12, r0, d, r12
-	addcc	r0, r0, #1
-
-	ldmfd	sp!, {r4, r5}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/mul_1.asm b/gmp/mpn/arm/mul_1.asm
index f7bc1bc386..e867351881 100644
--- a/gmp/mpn/arm/mul_1.asm
+++ b/gmp/mpn/arm/mul_1.asm
@@ -2,43 +2,28 @@ dnl  ARM mpn_mul_1 -- Multiply a limb vector with a limb and store the result
 dnl  in a second limb vector.
 dnl  Contributed by Robert Harley.
 
-dnl  Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C	     cycles/limb
-C StrongARM	6-8
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 4.75
-C Cortex-A15	 ?
+C            cycles/limb
+C StrongARM:     6-8  (dependent on vl value)
+C XScale:        ?-?
 
 C We should rewrite this along the lines of addmul_1.asm.  That should save a
 C cycle on StrongARM, and several cycles on XScale.
@@ -69,10 +54,10 @@ L(skip1):
 	stmia	rp!, { r8, r9 }
 L(skip2):
 	bics	n, n, #3
-	beq	L(rtn)
+	beq	L(return)
 	stmfd	sp!, { r6, r7 }
-
-L(top):	mov	r6, r12
+L(loop):
+	mov	r6, r12
 	ldmia	up!, { r8, r9, r12, lr }
 	ldr	r7, [rp, #12]			C cache allocate
 	mov	r7, #0
@@ -85,10 +70,9 @@ L(top):	mov	r6, r12
 	umlal	r9, r12, lr, vl
 	subs	n, n, #4
 	stmia	rp!, { r6, r7, r8, r9 }
-	bne	L(top)
-
+	bne	L(loop)
 	ldmfd	sp!, { r6, r7 }
-
-L(rtn):	mov	r0, r12
+L(return):
+	mov	r0, r12
 	ldmfd	sp!, { r8, r9, pc }
-EPILOGUE()
+EPILOGUE(mpn_mul_1)
diff --git a/gmp/mpn/arm/neon/README b/gmp/mpn/arm/neon/README
deleted file mode 100644
index 79e3b48ee6..0000000000
--- a/gmp/mpn/arm/neon/README
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory contains Neon code which runs and is efficient on all
-ARM CPUs which support Neon.
diff --git a/gmp/mpn/arm/neon/hamdist.asm b/gmp/mpn/arm/neon/hamdist.asm
deleted file mode 100644
index 232089647d..0000000000
--- a/gmp/mpn/arm/neon/hamdist.asm
+++ /dev/null
@@ -1,194 +0,0 @@
-dnl  ARM Neon mpn_hamdist -- mpn bit hamming distance.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.89
-C Cortex-A15	 0.95
-
-C TODO
-C  * Explore using vldr and vldm.  Does it help on A9?  (These loads do
-C    64-bits-at-a-time, which will mess up in big-endian mode.  Except not for
-C    popcount. Except perhaps also for popcount for the edge loads.)
-C  * Arrange to align the pointer, if that helps performance.  Use the same
-C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
-C    valgrind!)
-C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
-C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
-
-C INPUT PARAMETERS
-define(`ap', r0)
-define(`bp', r1)
-define(`n',  r2)
-
-C We sum into 16 16-bit counters in q8,q9, but at the end we sum them and end
-C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
-C (8*2^16-1)/32 = 0x3fff limbs.  We use a chunksize close to that, but which
-C can be represented as a 8-bit ARM constant.
-C
-define(`chunksize',0x3f80)
-
-ASM_START()
-PROLOGUE(mpn_hamdist)
-
-	cmp	n, #chunksize
-	bhi	L(gt16k)
-
-L(lt16k):
-	vmov.i64   q8, #0		C clear summation register
-	vmov.i64   q9, #0		C clear summation register
-
-	tst	   n, #1
-	beq	   L(xxx0)
-	vmov.i64   d0, #0
-	vmov.i64   d20, #0
-	sub	   n, n, #1
-	vld1.32   {d0[0]}, [ap]!	C load 1 limb
-	vld1.32   {d20[0]}, [bp]!	C load 1 limb
-	veor	   d0, d0, d20
-	vcnt.8	   d24, d0
-	vpadal.u8  d16, d24		C d16/q8 = 0; could just splat
-
-L(xxx0):tst	   n, #2
-	beq	   L(xx00)
-	sub	   n, n, #2
-	vld1.32    {d0}, [ap]!		C load 2 limbs
-	vld1.32    {d20}, [bp]!		C load 2 limbs
-	veor	   d0, d0, d20
-	vcnt.8	   d24, d0
-	vpadal.u8  d16, d24
-
-L(xx00):tst	   n, #4
-	beq	   L(x000)
-	sub	   n, n, #4
-	vld1.32    {q0}, [ap]!		C load 4 limbs
-	vld1.32    {q10}, [bp]!		C load 4 limbs
-	veor	   q0, q0, q10
-	vcnt.8	   q12, q0
-	vpadal.u8  q8, q12
-
-L(x000):tst	   n, #8
-	beq	   L(0000)
-
-	subs	   n, n, #8
-	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
-	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
-	bls	   L(sum)
-
-L(gt8):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
-	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
-	veor	   q0, q0, q10
-	veor	   q1, q1, q11
-	sub	   n, n, #8
-	vcnt.8	   q12, q0
-	vcnt.8	   q13, q1
-	b	   L(mid)
-
-L(0000):subs	   n, n, #16
-	blo	   L(e0)
-
-	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
-	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
-	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
-	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
-	veor	   q2, q2, q14
-	veor	   q3, q3, q15
-	vcnt.8	   q12, q2
-	vcnt.8	   q13, q3
-	subs	   n, n, #16
-	blo	   L(end)
-
-L(top):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
-	vld1.32    {q14,q15}, [bp]!	C load 8 limbs
-	veor	   q0, q0, q10
-	veor	   q1, q1, q11
-	vpadal.u8  q8, q12
-	vcnt.8	   q12, q0
-	vpadal.u8  q9, q13
-	vcnt.8	   q13, q1
-L(mid):	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
-	vld1.32    {q10,q11}, [bp]!	C load 8 limbs
-	veor	   q2, q2, q14
-	veor	   q3, q3, q15
-	subs	   n, n, #16
-	vpadal.u8  q8, q12
-	vcnt.8	   q12, q2
-	vpadal.u8  q9, q13
-	vcnt.8	   q13, q3
-	bhs	   L(top)
-
-L(end):	vpadal.u8  q8, q12
-	vpadal.u8  q9, q13
-L(sum):	veor	   q0, q0, q10
-	veor	   q1, q1, q11
-	vcnt.8	   q12, q0
-	vcnt.8	   q13, q1
-	vpadal.u8  q8, q12
-	vpadal.u8  q9, q13
-	vadd.i16   q8, q8, q9
-					C we have 8 16-bit counts
-L(e0):	vpaddl.u16 q8, q8		C we have 4 32-bit counts
-	vpaddl.u32 q8, q8		C we have 2 64-bit counts
-	vmov.32    r0, d16[0]
-	vmov.32    r1, d17[0]
-	add	   r0, r0, r1
-	bx	lr
-
-C Code for large count.  Splits operand and calls above code.
-define(`ap2', r5)
-define(`bp2', r6)
-L(gt16k):
-	push	{r4,r5,r6,r14}
-	mov	ap2, ap
-	mov	bp2, bp
-	mov	r3, n			C full count
-	mov	r4, #0			C total sum
-
-1:	mov	n, #chunksize		C count for this invocation
-	bl	L(lt16k)		C could jump deep inside code
-	add	ap2, ap2, #chunksize*4	C point at next chunk
-	add	bp2, bp2, #chunksize*4	C point at next chunk
-	add	r4, r4, r0
-	mov	ap, ap2			C put chunk pointer in place for call
-	mov	bp, bp2			C put chunk pointer in place for call
-	sub	r3, r3, #chunksize
-	cmp	r3, #chunksize
-	bhi	1b
-
-	mov	n, r3			C count for final invocation
-	bl	L(lt16k)
-	add	r0, r4, r0
-	pop	{r4,r5,r6,pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/neon/lorrshift.asm b/gmp/mpn/arm/neon/lorrshift.asm
deleted file mode 100644
index 3d6253fd49..0000000000
--- a/gmp/mpn/arm/neon/lorrshift.asm
+++ /dev/null
@@ -1,279 +0,0 @@
-dnl  ARM Neon mpn_lshift and mpn_rshift.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb     cycles/limb     cycles/limb      good
-C              aligned	      unaligned	      best seen	     for cpu?
-C StrongARM	 -		 -
-C XScale	 -		 -
-C Cortex-A7	 ?		 ?
-C Cortex-A8	 ?		 ?
-C Cortex-A9	 3		 3				Y
-C Cortex-A15	 1.5		 1.5				Y
-
-
-C We read 64 bits at a time at 32-bit aligned addresses, and except for the
-C first and last store, we write using 64-bit aligned addresses.  All shifting
-C is done on 64-bit words in 'extension' registers.
-C
-C It should be possible to read also using 64-bit alignment, by manipulating
-C the shift count for unaligned operands.  Not done, since it does not seem to
-C matter for A9 or A15.
-C
-C This will not work in big-endian mode.
-
-C TODO
-C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
-C    which might make it tricky.
-C  * Clean up and simplify.
-C  * Consider sharing most of the code for lshift and rshift, since the feed-in code,
-C    the loop, and most of the wind-down code are identical.
-C  * Replace the basecase code with code using 'extension' registers.
-C  * Optimise.  It is not clear that this loop insn permutation is optimal for
-C    either A9 or A15.
-
-C INPUT PARAMETERS
-define(`rp',  `r0')
-define(`ap',  `r1')
-define(`n',   `r2')
-define(`cnt', `r3')
-
-ifdef(`OPERATION_lshift',`
-	define(`IFLSH', `$1')
-	define(`IFRSH', `')
-	define(`X',`0')
-	define(`Y',`1')
-	define(`func',`mpn_lshift')
-')
-ifdef(`OPERATION_rshift',`
-	define(`IFLSH', `')
-	define(`IFRSH', `$1')
-	define(`X',`1')
-	define(`Y',`0')
-	define(`func',`mpn_rshift')
-')
-
-MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(func)
-IFLSH(`	mov	r12, n, lsl #2	')
-IFLSH(`	add	rp, rp, r12	')
-IFLSH(`	add	ap, ap, r12	')
-
-	cmp	n, #4			C SIMD code n limit
-	ble	L(base)
-
-ifdef(`OPERATION_lshift',`
-	vdup.32	d6, r3			C left shift count is positive
-	sub	r3, r3, #64		C right shift count is negative
-	vdup.32	d7, r3
-	mov	r12, #-8')		C lshift pointer update offset
-ifdef(`OPERATION_rshift',`
-	rsb	r3, r3, #0		C right shift count is negative
-	vdup.32	d6, r3
-	add	r3, r3, #64		C left shift count is positive
-	vdup.32	d7, r3
-	mov	r12, #8')		C rshift pointer update offset
-
-IFLSH(`	sub	ap, ap, #8	')
-	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
-	vshl.u64 d18, d19, d7		C retval
-
-	tst	rp, #4			C is rp 64-bit aligned already?
-	beq	L(rp_aligned)		C yes, skip
-IFLSH(`	add	ap, ap, #4	')	C move back ap pointer
-IFRSH(`	sub	ap, ap, #4	')	C move back ap pointer
-	vshl.u64 d4, d19, d6
-	sub	n, n, #1		C first limb handled
-IFLSH(`	sub	 rp, rp, #4	')
-	vst1.32	 {d4[Y]}, [rp]IFRSH(!)	C store first limb, rp gets aligned
-	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
-
-L(rp_aligned):
-IFLSH(`	sub	rp, rp, #8	')
-	subs	n, n, #6
-	blt	L(two_or_three_more)
-	tst	n, #2
-	beq	L(2)
-
-L(1):	vld1.32	 {d17}, [ap], r12
-	vshl.u64 d5, d19, d6
-	vld1.32	 {d16}, [ap], r12
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	sub	n, n, #2
-	b	 L(mid)
-
-L(2):	vld1.32	 {d16}, [ap], r12
-	vshl.u64 d4, d19, d6
-	vld1.32	 {d17}, [ap], r12
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	subs	n, n, #4
-	blt	L(end)
-
-L(top):	vld1.32	 {d16}, [ap], r12
-	vorr	 d2, d4, d1
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	vst1.32	 {d2}, [rp:64], r12
-L(mid):	vld1.32	 {d17}, [ap], r12
-	vorr	 d3, d5, d0
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	vst1.32	 {d3}, [rp:64], r12
-	subs	n, n, #4
-	bge	L(top)
-
-L(end):	tst	 n, #1
-	beq	 L(evn)
-
-	vorr	 d2, d4, d1
-	vst1.32	 {d2}, [rp:64], r12
-	b	 L(cj1)
-
-L(evn):	vorr	 d2, d4, d1
-	vshl.u64 d0, d17, d7
-	vshl.u64 d16, d17, d6
-	vst1.32	 {d2}, [rp:64], r12
-	vorr	 d2, d5, d0
-	b	 L(cj2)
-
-C Load last 2 - 3 limbs, store last 4 - 5 limbs
-L(two_or_three_more):
-	tst	n, #1
-	beq	L(l2)
-
-L(l3):	vshl.u64 d5, d19, d6
-	vld1.32	 {d17}, [ap], r12
-L(cj1):	veor	 d16, d16, d16
-IFLSH(`	add	 ap, ap, #4	')
-	vld1.32	 {d16[Y]}, [ap], r12
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	vorr	 d3, d5, d0
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	vst1.32	 {d3}, [rp:64], r12
-	vorr	 d2, d4, d1
-	vst1.32	 {d2}, [rp:64], r12
-IFLSH(`	add	 rp, rp, #4	')
-	vst1.32	 {d5[Y]}, [rp]
-	vmov.32	 r0, d18[X]
-	bx	lr
-
-L(l2):	vld1.32	 {d16}, [ap], r12
-	vshl.u64 d4, d19, d6
-	vshl.u64 d1, d16, d7
-	vshl.u64 d16, d16, d6
-	vorr	 d2, d4, d1
-L(cj2):	vst1.32	 {d2}, [rp:64], r12
-	vst1.32	 {d16}, [rp]
-	vmov.32	 r0, d18[X]
-	bx	lr
-
-
-define(`tnc', `r12')
-L(base):
-	push	{r4, r6, r7, r8}
-ifdef(`OPERATION_lshift',`
-	ldr	r4, [ap, #-4]!
-	rsb	tnc, cnt, #32
-
-	mov	r7, r4, lsl cnt
-	tst	n, #1
-	beq	L(ev)			C n even
-
-L(od):	subs	n, n, #2
-	bcc	L(ed1)			C n = 1
-	ldr	r8, [ap, #-4]!
-	b	L(md)			C n = 3
-
-L(ev):	ldr	r6, [ap, #-4]!
-	subs	n, n, #2
-	beq	L(ed)			C n = 3
-					C n = 4
-L(tp):	ldr	r8, [ap, #-4]!
-	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r6, lsl cnt
-L(md):	ldr	r6, [ap, #-4]!
-	orr	r7, r7, r8, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r8, lsl cnt
-
-L(ed):	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r6, lsl cnt
-L(ed1):	str	r7, [rp, #-4]
-	mov	r0, r4, lsr tnc
-')
-ifdef(`OPERATION_rshift',`
-	ldr	r4, [ap]
-	rsb	tnc, cnt, #32
-
-	mov	r7, r4, lsr cnt
-	tst	n, #1
-	beq	L(ev)			C n even
-
-L(od):	subs	n, n, #2
-	bcc	L(ed1)			C n = 1
-	ldr	r8, [ap, #4]!
-	b	L(md)			C n = 3
-
-L(ev):	ldr	r6, [ap, #4]!
-	subs	n, n, #2
-	beq	L(ed)			C n = 2
-					C n = 4
-
-L(tp):	ldr	r8, [ap, #4]!
-	orr	r7, r7, r6, lsl tnc
-	str	r7, [rp], #4
-	mov	r7, r6, lsr cnt
-L(md):	ldr	r6, [ap, #4]!
-	orr	r7, r7, r8, lsl tnc
-	str	r7, [rp], #4
-	mov	r7, r8, lsr cnt
-
-L(ed):	orr	r7, r7, r6, lsl tnc
-	str	r7, [rp], #4
-	mov	r7, r6, lsr cnt
-L(ed1):	str	r7, [rp], #4
-	mov	r0, r4, lsl tnc
-')
-	pop	{r4, r6, r7, r8}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/neon/lshiftc.asm b/gmp/mpn/arm/neon/lshiftc.asm
deleted file mode 100644
index 9e4096256d..0000000000
--- a/gmp/mpn/arm/neon/lshiftc.asm
+++ /dev/null
@@ -1,257 +0,0 @@
-dnl  ARM Neon mpn_lshiftc.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb     cycles/limb     cycles/limb      good
-C              aligned	      unaligned	      best seen	     for cpu?
-C StrongARM	 -		 -
-C XScale	 -		 -
-C Cortex-A7	 ?		 ?
-C Cortex-A8	 ?		 ?
-C Cortex-A9	 3.5		 3.5				Y
-C Cortex-A15	 1.75		 1.75				Y
-
-
-C We read 64 bits at a time at 32-bit aligned addresses, and except for the
-C first and last store, we write using 64-bit aligned addresses.  All shifting
-C is done on 64-bit words in 'extension' registers.
-C
-C It should be possible to read also using 64-bit alignment, by manipulating
-C the shift count for unaligned operands.  Not done, since it does not seem to
-C matter for A9 or A15.
-C
-C This will not work in big-endian mode.
-
-C TODO
-C  * Try using 128-bit operations.  Note that Neon lacks pure 128-bit shifts,
-C    which might make it tricky.
-C  * Clean up and simplify.
-C  * Consider sharing most of the code for lshift and rshift, since the feed-in
-C    code, the loop, and most of the wind-down code are identical.
-C  * Replace the basecase code with code using 'extension' registers.
-C  * Optimise.  It is not clear that this loop insn permutation is optimal for
-C    either A9 or A15.
-
-C INPUT PARAMETERS
-define(`rp',  `r0')
-define(`ap',  `r1')
-define(`n',   `r2')
-define(`cnt', `r3')
-
-	define(`IFLSH', `$1')
-	define(`IFRSH', `')
-	define(`X',`0')
-	define(`Y',`1')
-	define(`func',`mpn_lshiftc')
-define(`OPERATION_lshiftc',1)
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_lshiftc)
-IFLSH(`	mov	r12, n, lsl #2	')
-IFLSH(`	add	rp, rp, r12	')
-IFLSH(`	add	ap, ap, r12	')
-
-	cmp	n, #4			C SIMD code n limit
-	ble	L(base)
-
-ifdef(`OPERATION_lshiftc',`
-	vdup.32	d6, r3			C left shift count is positive
-	sub	r3, r3, #64		C right shift count is negative
-	vdup.32	d7, r3
-	mov	r12, #-8')		C lshift pointer update offset
-ifdef(`OPERATION_rshift',`
-	rsb	r3, r3, #0		C right shift count is negative
-	vdup.32	d6, r3
-	add	r3, r3, #64		C left shift count is positive
-	vdup.32	d7, r3
-	mov	r12, #8')		C rshift pointer update offset
-
-IFLSH(`	sub	ap, ap, #8	')
-	vld1.32	{d19}, [ap], r12	C load initial 2 limbs
-	vshl.u64 d18, d19, d7		C retval
-
-	tst	rp, #4			C is rp 64-bit aligned already?
-	beq	L(rp_aligned)		C yes, skip
-	vmvn	 d19, d19
-IFLSH(`	add	ap, ap, #4	')	C move back ap pointer
-IFRSH(`	sub	ap, ap, #4	')	C move back ap pointer
-	vshl.u64 d4, d19, d6
-	sub	n, n, #1		C first limb handled
-IFLSH(`	sub	 rp, rp, #4	')
-	vst1.32	 {d4[Y]}, [rp]IFRSH(!)	C store first limb, rp gets aligned
-	vld1.32	 {d19}, [ap], r12	C load ap[1] and ap[2]
-
-L(rp_aligned):
-IFLSH(`	sub	rp, rp, #8	')
-	subs	n, n, #6
-	vmvn	 d19, d19
-	blt	L(two_or_three_more)
-	tst	n, #2
-	beq	L(2)
-
-L(1):	vld1.32	 {d17}, [ap], r12
-	vshl.u64 d5, d19, d6
-	vmvn	 d17, d17
-	vld1.32	 {d16}, [ap], r12
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	sub	n, n, #2
-	b	 L(mid)
-
-L(2):	vld1.32	 {d16}, [ap], r12
-	vshl.u64 d4, d19, d6
-	vmvn	 d16, d16
-	vld1.32	 {d17}, [ap], r12
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	subs	n, n, #4
-	blt	L(end)
-
-L(top):	vmvn	 d17, d17
-	vld1.32	 {d16}, [ap], r12
-	vorr	 d2, d4, d1
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	vst1.32	 {d2}, [rp:64], r12
-L(mid):	vmvn	 d16, d16
-	vld1.32	 {d17}, [ap], r12
-	vorr	 d3, d5, d0
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	vst1.32	 {d3}, [rp:64], r12
-	subs	n, n, #4
-	bge	L(top)
-
-L(end):	tst	 n, #1
-	beq	 L(evn)
-
-	vorr	 d2, d4, d1
-	vst1.32	 {d2}, [rp:64], r12
-	b	 L(cj1)
-
-L(evn):	vmvn	 d17, d17
-	vorr	 d2, d4, d1
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	vst1.32	 {d2}, [rp:64], r12
-	vmvn.u8	 d17, #0
-	vorr	 d2, d5, d0
-	vshl.u64 d0, d17, d7
-	vorr	 d3, d4, d0
-	b	 L(cj2)
-
-C Load last 2 - 3 limbs, store last 4 - 5 limbs
-L(two_or_three_more):
-	tst	n, #1
-	beq	L(l2)
-
-L(l3):	vshl.u64 d5, d19, d6
-	vld1.32	 {d17}, [ap], r12
-L(cj1):	vmov.u8	 d16, #0
-IFLSH(`	add	 ap, ap, #4	')
-	vmvn	 d17, d17
-	vld1.32	 {d16[Y]}, [ap], r12
-	vshl.u64 d0, d17, d7
-	vshl.u64 d4, d17, d6
-	vmvn	 d16, d16
-	vorr	 d3, d5, d0
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	vst1.32	 {d3}, [rp:64], r12
-	vorr	 d2, d4, d1
-	vst1.32	 {d2}, [rp:64], r12
-IFLSH(`	add	 rp, rp, #4	')
-	vst1.32	 {d5[Y]}, [rp]
-	vmov.32	 r0, d18[X]
-	bx	lr
-
-L(l2):	vld1.32	 {d16}, [ap], r12
-	vshl.u64 d4, d19, d6
-	vmvn	 d16, d16
-	vshl.u64 d1, d16, d7
-	vshl.u64 d5, d16, d6
-	vmvn.u8	 d17, #0
-	vorr	 d2, d4, d1
-	vshl.u64 d0, d17, d7
-	vorr	 d3, d5, d0
-L(cj2):	vst1.32	 {d2}, [rp:64], r12
-	vst1.32	 {d3}, [rp]
-	vmov.32	 r0, d18[X]
-	bx	lr
-
-
-define(`tnc', `r12')
-L(base):
-	push	{r4, r6, r7, r8}
-	ldr	r4, [ap, #-4]!
-	rsb	tnc, cnt, #32
-	mvn	r6, r4
-
-	mov	r7, r6, lsl cnt
-	tst	n, #1
-	beq	L(ev)			C n even
-
-L(od):	subs	n, n, #2
-	bcc	L(ed1)			C n = 1
-	ldr	r8, [ap, #-4]!
-	mvn	r8, r8
-	b	L(md)			C n = 3
-
-L(ev):	ldr	r6, [ap, #-4]!
-	mvn	r6, r6
-	subs	n, n, #2
-	beq	L(ed)			C n = 3
-					C n = 4
-L(tp):	ldr	r8, [ap, #-4]!
-	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mvn	r8, r8
-	mov	r7, r6, lsl cnt
-L(md):	ldr	r6, [ap, #-4]!
-	orr	r7, r7, r8, lsr tnc
-	str	r7, [rp, #-4]!
-	mvn	r6, r6
-	mov	r7, r8, lsl cnt
-
-L(ed):	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]!
-	mov	r7, r6, lsl cnt
-L(ed1):	mvn	r6, #0
-	orr	r7, r7, r6, lsr tnc
-	str	r7, [rp, #-4]
-	mov	r0, r4, lsr tnc
-	pop	{r4, r6, r7, r8}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/neon/popcount.asm b/gmp/mpn/arm/neon/popcount.asm
deleted file mode 100644
index 2f8f9afc8d..0000000000
--- a/gmp/mpn/arm/neon/popcount.asm
+++ /dev/null
@@ -1,166 +0,0 @@
-dnl  ARM Neon mpn_popcount -- mpn bit population count.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.125
-C Cortex-A15	 0.56
-
-C TODO
-C  * Explore using vldr and vldm.  Does it help on A9?  (These loads do
-C    64-bits-at-a-time, which will mess up in big-endian mode.  Except not for
-C    popcount. Except perhaps also for popcount for the edge loads.)
-C  * Arrange to align the pointer, if that helps performance.  Use the same
-C    read-and-mask trick we use on PCs, for simplicity and performance.  (Sorry
-C    valgrind!)
-C  * Explore if explicit align directives, e.g., "[ptr:128]" help.
-C  * See rth's gmp-devel 2013-02/03 messages about final summation tricks.
-
-C INPUT PARAMETERS
-define(`ap', r0)
-define(`n',  r1)
-
-C We sum into 16 16-bit counters in q8,q9, but at the end we sum them and end
-C up with 8 16-bit counters.  Therefore, we can sum to 8(2^16-1) bits, or
-C (8*2^16-1)/32 = 0x3fff limbs.  We use a chunksize close to that, but which
-C can be represented as a 8-bit ARM constant.
-C
-define(`chunksize',0x3f80)
-
-ASM_START()
-PROLOGUE(mpn_popcount)
-
-	cmp	n, #chunksize
-	bhi	L(gt16k)
-
-L(lt16k):
-	vmov.i64   q8, #0		C clear summation register
-	vmov.i64   q9, #0		C clear summation register
-
-	tst	   n, #1
-	beq	   L(xxx0)
-	vmov.i64   d0, #0
-	sub	   n, n, #1
-	vld1.32   {d0[0]}, [ap]!	C load 1 limb
-	vcnt.8	   d24, d0
-	vpadal.u8  d16, d24		C d16/q8 = 0; could just splat
-
-L(xxx0):tst	   n, #2
-	beq	   L(xx00)
-	sub	   n, n, #2
-	vld1.32    {d0}, [ap]!		C load 2 limbs
-	vcnt.8	   d24, d0
-	vpadal.u8  d16, d24
-
-L(xx00):tst	   n, #4
-	beq	   L(x000)
-	sub	   n, n, #4
-	vld1.32    {q0}, [ap]!		C load 4 limbs
-	vcnt.8	   q12, q0
-	vpadal.u8  q8, q12
-
-L(x000):tst	   n, #8
-	beq	   L(0000)
-
-	subs	   n, n, #8
-	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
-	bls	   L(sum)
-
-L(gt8):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
-	sub	   n, n, #8
-	vcnt.8	   q12, q0
-	vcnt.8	   q13, q1
-	b	   L(mid)
-
-L(0000):subs	   n, n, #16
-	blo	   L(e0)
-
-	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
-	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
-	vcnt.8	   q12, q2
-	vcnt.8	   q13, q3
-	subs	   n, n, #16
-	blo	   L(end)
-
-L(top):	vld1.32    {q2,q3}, [ap]!	C load 8 limbs
-	vpadal.u8  q8, q12
-	vcnt.8	   q12, q0
-	vpadal.u8  q9, q13
-	vcnt.8	   q13, q1
-L(mid):	vld1.32    {q0,q1}, [ap]!	C load 8 limbs
-	subs	   n, n, #16
-	vpadal.u8  q8, q12
-	vcnt.8	   q12, q2
-	vpadal.u8  q9, q13
-	vcnt.8	   q13, q3
-	bhs	   L(top)
-
-L(end):	vpadal.u8  q8, q12
-	vpadal.u8  q9, q13
-L(sum):	vcnt.8	   q12, q0
-	vcnt.8	   q13, q1
-	vpadal.u8  q8, q12
-	vpadal.u8  q9, q13
-	vadd.i16   q8, q8, q9
-					C we have 8 16-bit counts
-L(e0):	vpaddl.u16 q8, q8		C we have 4 32-bit counts
-	vpaddl.u32 q8, q8		C we have 2 64-bit counts
-	vmov.32    r0, d16[0]
-	vmov.32    r1, d17[0]
-	add	   r0, r0, r1
-	bx	lr
-
-C Code for large count.  Splits operand and calls above code.
-define(`ap2', r2)			C caller-saves reg not used above
-L(gt16k):
-	push	{r4,r14}
-	mov	ap2, ap
-	mov	r3, n			C full count
-	mov	r4, #0			C total sum
-
-1:	mov	n, #chunksize		C count for this invocation
-	bl	L(lt16k)		C could jump deep inside code
-	add	ap2, ap2, #chunksize*4	C point at next chunk
-	add	r4, r4, r0
-	mov	ap, ap2			C put chunk pointer in place for call
-	sub	r3, r3, #chunksize
-	cmp	r3, #chunksize
-	bhi	1b
-
-	mov	n, r3			C count for final invocation
-	bl	L(lt16k)
-	add	r0, r4, r0
-	pop	{r4,pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/neon/sec_tabselect.asm b/gmp/mpn/arm/neon/sec_tabselect.asm
deleted file mode 100644
index 69fceb0063..0000000000
--- a/gmp/mpn/arm/neon/sec_tabselect.asm
+++ /dev/null
@@ -1,140 +0,0 @@
-dnl  ARM Neon mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.15
-C Cortex-A15	 0.65
-
-define(`rp',     `r0')
-define(`tp',     `r1')
-define(`n',      `r2')
-define(`nents',  `r3')
-C define(`which',  on stack)
-
-define(`i',      `r4')
-define(`j',      `r5')
-
-define(`maskq',  `q10')
-define(`maskd',  `d20')
-
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
-	push	{r4-r5}
-
-	add	  r4, sp, #8
-	vld1.32	  {d30[], d31[]}, [r4]	C 4 `which' copies
-	vmov.i32  q14, #1		C 4 copies of 1
-
-	subs	j, n, #8
-	bmi	L(outer_end)
-
-L(outer_top):
-	mov	  i, nents
-	mov	  r12, tp		C preserve tp
-	veor	  q13, q13, q13		C 4 counter copies
-	veor	  q2, q2, q2
-	veor	  q3, q3, q3
-	ALIGN(16)
-L(top):	vceq.i32  maskq, q13, q15	C compare idx copies to `which' copies
-	vld1.32	  {q0,q1}, [tp]
-	vadd.i32  q13, q13, q14
-	vbit	  q2, q0, maskq
-	vbit	  q3, q1, maskq
-	add	  tp, tp, n, lsl #2
-	subs	  i, i, #1
-	bne	  L(top)
-	vst1.32	  {q2,q3}, [rp]!
-	add	  tp, r12, #32		C restore tp, point to next slice
-	subs	  j, j, #8
-	bpl	  L(outer_top)
-L(outer_end):
-
-	tst	  n, #4
-	beq	  L(b0xx)
-L(b1xx):mov	  i, nents
-	mov	  r12, tp
-	veor	  q13, q13, q13
-	veor	  q2, q2, q2
-	ALIGN(16)
-L(tp4):	vceq.i32  maskq, q13, q15
-	vld1.32	  {q0}, [tp]
-	vadd.i32  q13, q13, q14
-	vbit	  q2, q0, maskq
-	add	  tp, tp, n, lsl #2
-	subs	  i, i, #1
-	bne	  L(tp4)
-	vst1.32	  {q2}, [rp]!
-	add	  tp, r12, #16
-
-L(b0xx):tst	  n, #2
-	beq	  L(b00x)
-L(b01x):mov	  i, nents
-	mov	  r12, tp
-	veor	  d26, d26, d26
-	veor	  d4, d4, d4
-	ALIGN(16)
-L(tp2):	vceq.i32  maskd, d26, d30
-	vld1.32	  {d0}, [tp]
-	vadd.i32  d26, d26, d28
-	vbit	  d4, d0, maskd
-	add	  tp, tp, n, lsl #2
-	subs	  i, i, #1
-	bne	  L(tp2)
-	vst1.32	  {d4}, [rp]!
-	add	  tp, r12, #8
-
-L(b00x):tst	  n, #1
-	beq	  L(b000)
-L(b001):mov	  i, nents
-	mov	  r12, tp
-	veor	  d26, d26, d26
-	veor	  d4, d4, d4
-	ALIGN(16)
-L(tp1):	vceq.i32  maskd, d26, d30
-	vld1.32	  {d0[0]}, [tp]
-	vadd.i32  d26, d26, d28
-	vbit	  d4, d0, maskd
-	add	  tp, tp, n, lsl #2
-	subs	  i, i, #1
-	bne	  L(tp1)
-	vst1.32	  {d4[0]}, [rp]
-
-L(b000):pop	{r4-r5}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/rsh1aors_n.asm b/gmp/mpn/arm/rsh1aors_n.asm
deleted file mode 100644
index 95c1f79ad9..0000000000
--- a/gmp/mpn/arm/rsh1aors_n.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl  ARM mpn_rsh1add_n and mpn_rsh1sub_n.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	3.64-3.7
-C Cortex-A15	 2.5
-
-C TODO
-C  * Not optimised.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-ifdef(`OPERATION_rsh1add_n', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`RSTCY',	`cmn	$1, $1')
-  define(`func',	mpn_rsh1add_n)
-  define(`func_nc',	mpn_rsh1add_nc)')
-ifdef(`OPERATION_rsh1sub_n', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`RSTCY',
-	`mvn	$2, #0x80000000
-	cmp	$2, $1')
-  define(`func',	mpn_rsh1sub_n)
-  define(`func_nc',	mpn_rsh1sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	push	{r4-r11}
-	ldr	r4, [up], #4
-	ldr	r8, [vp], #4
-	ADDSUB	r4, r4, r8
-	movs	r12, r7, rrx
-	and	r11, r4, #1	C return value
-	subs	n, n, #4
-	blo	L(end)
-
-L(top):	ldmia	up!, {r5,r6,r7}
-	ldmia	vp!, {r8,r9,r10}
-	cmn	r12, r12
-	ADDSUBC	r5, r5, r8
-	ADDSUBC	r6, r6, r9
-	ADDSUBC	r7, r7, r10
-	movs	r12, r7, rrx
-	movs	r6, r6, rrx
-	movs	r5, r5, rrx
-	movs	r4, r4, rrx
-	subs	n, n, #3
-	stmia	rp!, {r4,r5,r6}
-	mov	r4, r7
-	bhs	L(top)
-
-L(end):	cmn	n, #2
-	bls	L(e2)
-	ldm	up, {r5,r6}
-	ldm	vp, {r8,r9}
-	cmn	r12, r12
-	ADDSUBC	r5, r5, r8
-	ADDSUBC	r6, r6, r9
-	movs	r12, r6, rrx
-	movs	r5, r5, rrx
-	movs	r4, r4, rrx
-	stmia	rp!, {r4,r5}
-	mov	r4, r6
-	b	L(e1)
-
-L(e2):	bne	L(e1)
-	ldr	r5, [up, #0]
-	ldr	r8, [vp, #0]
-	cmn	r12, r12
-	ADDSUBC	r5, r5, r8
-	movs	r12, r5, rrx
-	movs	r4, r4, rrx
-	str	r4, [rp], #4
-	mov	r4, r5
-
-L(e1):	RSTCY(	r12, r1)
-	mov	r4, r4, rrx
-	str	r4, [rp, #0]
-	mov	r0, r11
-	pop	{r4-r11}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/rshift.asm b/gmp/mpn/arm/rshift.asm
deleted file mode 100644
index 84728d038a..0000000000
--- a/gmp/mpn/arm/rshift.asm
+++ /dev/null
@@ -1,86 +0,0 @@
-dnl  ARM mpn_rshift.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.5
-C Cortex-A15	 ?
-
-define(`rp',  `r0')
-define(`up',  `r1')
-define(`n',   `r2')
-define(`cnt', `r3')
-define(`tnc', `r12')
-
-ASM_START()
-PROLOGUE(mpn_rshift)
-	push	{r4, r6, r7, r8}
-	ldr	r4, [up]
-	rsb	tnc, cnt, #32
-
-	mov	r7, r4, lsr cnt
-	tst	n, #1
-	beq	L(evn)			C n even
-
-L(odd):	subs	n, n, #2
-	bcc	L(1)			C n = 1
-	ldr	r8, [up, #4]!
-	b	L(mid)
-
-L(evn):	ldr	r6, [up, #4]!
-	subs	n, n, #2
-	beq	L(end)
-
-L(top):	ldr	r8, [up, #4]!
-	orr	r7, r7, r6, lsl tnc
-	str	r7, [rp], #4
-	mov	r7, r6, lsr cnt
-L(mid):	ldr	r6, [up, #4]!
-	orr	r7, r7, r8, lsl tnc
-	str	r7, [rp], #4
-	mov	r7, r8, lsr cnt
-	subs	n, n, #2
-	bgt	L(top)
-
-L(end):	orr	r7, r7, r6, lsl tnc
-	str	r7, [rp], #4
-	mov	r7, r6, lsr cnt
-L(1):	str	r7, [rp]
-	mov	r0, r4, lsl tnc
-	pop	{r4, r6, r7, r8}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/sec_tabselect.asm b/gmp/mpn/arm/sec_tabselect.asm
deleted file mode 100644
index 8cf937a091..0000000000
--- a/gmp/mpn/arm/sec_tabselect.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-dnl  ARM mpn_sec_tabselect
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.33
-C Cortex-A15	 2.2
-
-C TODO
-C  * Consider using special code for small nents, either swapping the inner and
-C    outer loops, or providing a few completely unrolling the inner loops.
-
-define(`rp',    `r0')
-define(`tp',    `r1')
-define(`n',     `r2')
-define(`nents', `r3')
-C      which  on stack
-
-define(`i',     `r11')
-define(`j',     `r12')
-define(`c',     `r14')
-define(`mask',  `r7')
-
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
-	push	{r4-r11, r14}
-
-	subs	j, n, #3
-	bmi	L(outer_end)
-L(outer_top):
-	ldr	c, [sp, #36]
-	mov	i, nents
-	push	{tp}
-
-	mov	r8, #0
-	mov	r9, #0
-	mov	r10, #0
-
-L(top):	subs	c, c, #1
-	ldm	tp, {r4,r5,r6}
-	sbc	mask, mask, mask
-	subs	i, i, #1
-	add	tp, tp, n, lsl #2
-	and	r4, r4, mask
-	and	r5, r5, mask
-	and	r6, r6, mask
-	orr	r8, r8, r4
-	orr	r9, r9, r5
-	orr	r10, r10, r6
-	bge	L(top)
-
-	stmia	rp!, {r8,r9,r10}
-	pop	{tp}
-	add	tp, tp, #12
-	subs	j, j, #3
-	bpl	L(outer_top)
-L(outer_end):
-
-	cmp	j, #-1
-	bne	L(n2)
-
-	ldr	c, [sp, #36]
-	mov	i, nents
-	mov	r8, #0
-	mov	r9, #0
-L(tp2):	subs	c, c, #1
-	sbc	mask, mask, mask
-	ldm	tp, {r4,r5}
-	subs	i, i, #1
-	add	tp, tp, n, lsl #2
-	and	r4, r4, mask
-	and	r5, r5, mask
-	orr	r8, r8, r4
-	orr	r9, r9, r5
-	bge	L(tp2)
-	stmia	rp, {r8,r9}
-	pop	{r4-r11, r14}
-	bx	lr
-
-L(n2):	cmp	j, #-2
-	bne	L(n1)
-
-	ldr	c, [sp, #36]
-	mov	i, nents
-	mov	r8, #0
-L(tp1):	subs	c, c, #1
-	sbc	mask, mask, mask
-	ldr	r4, [tp]
-	subs	i, i, #1
-	add	tp, tp, n, lsl #2
-	and	r4, r4, mask
-	orr	r8, r8, r4
-	bge	L(tp1)
-	str	r8, [rp]
-L(n1):	pop	{r4-r11, r14}
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/sub_n.asm b/gmp/mpn/arm/sub_n.asm
new file mode 100644
index 0000000000..7063be4f13
--- /dev/null
+++ b/gmp/mpn/arm/sub_n.asm
@@ -0,0 +1,71 @@
+dnl  ARM mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl  store difference in a third limb vector.
+dnl  Contributed by Robert Harley.
+
+dnl  Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This code runs at 5 cycles/limb.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`vp',`r2')
+define(`n',`r3')
+
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+	stmfd	sp!, { r8, r9, lr }
+	subs	r12, r12, r12
+	tst	n, #1
+	beq	L(skip1)
+	ldr	r12, [up], #4
+	ldr	lr, [vp], #4
+	subs	r12, r12, lr
+	str	r12, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldmia	up!, { r8, r9 }
+	ldmia	vp!, { r12, lr }
+	sbcs	r8, r8, r12
+	sbcs	r9, r9, lr
+	stmia	rp!, { r8, r9 }
+L(skip2):
+	bics	n, n, #3
+	beq	L(return)
+	stmfd	sp!, { r4, r5, r6, r7 }
+L(sub_n_loop):
+	ldmia	up!, { r4, r5, r6, r7 }
+	ldmia	vp!, { r8, r9, r12, lr }
+	sbcs	r4, r4, r8
+	ldr	r8, [rp, #12]			C cache allocate
+	sbcs	r5, r5, r9
+	sbcs	r6, r6, r12
+	sbcs	r7, r7, lr
+	stmia	rp!, { r4, r5, r6, r7 }
+	sub	n, n, #4
+	teq	n, #0
+	bne	L(sub_n_loop)
+	ldmfd	sp!, { r4, r5, r6, r7 }
+L(return):
+	sbc	r0, r0, r0
+	and	r0, r0, #1
+	ldmfd	sp!, { r8, r9, pc }
+EPILOGUE(mpn_sub_n)
diff --git a/gmp/mpn/arm/submul_1.asm b/gmp/mpn/arm/submul_1.asm
new file mode 100644
index 0000000000..c3654377d7
--- /dev/null
+++ b/gmp/mpn/arm/submul_1.asm
@@ -0,0 +1,107 @@
+dnl  ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl  result from a second limb vector.
+
+dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C            cycles/limb
+C StrongARM:  7.75-9.75  (dependent on vl value)
+C XScale:        8-9     (dependent on vl value, estimated)
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n',`r2')
+define(`vl',`r3')
+define(`rl',`r12')
+define(`ul',`r6')
+define(`r',`lr')
+
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	stmfd	sp!, { r4-r6, lr }
+	subs	r4, r0, r0		C clear r4, set cy
+	tst	n, #1
+	beq	L(skip1)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	umull	r5, r4, ul, vl
+	subs	r, rl, r5
+	str	r, [rp], #4
+L(skip1):
+	tst	n, #2
+	beq	L(skip2)
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	ldr	ul, [up], #4
+	sbcs	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	sbcs	r, rl, r5
+	str	r, [rp], #4
+L(skip2):
+	bics	r, n, #3
+	beq	L(return)
+
+	ldr	ul, [up], #4
+	ldr	rl, [rp, #0]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	b	L(in)
+
+L(loop):
+	ldr	ul, [up], #4
+	sbcs	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+L(in):	ldr	ul, [up], #4
+	sbcs	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	sbcs	r, rl, r5
+	ldr	rl, [rp, #4]
+	mov	r5, #0
+	umlal	r4, r5, ul, vl
+	str	r, [rp], #4
+	ldr	ul, [up], #4
+	sbcs	r, rl, r4
+	ldr	rl, [rp, #4]
+	mov	r4, #0
+	umlal	r5, r4, ul, vl
+	str	r, [rp], #4
+	sub	n, n, #4
+	bics	r, n, #3
+	bne	L(loop)
+
+	sbcs	r, rl, r5
+	str	r, [rp], #4
+L(return):
+	sbc	r0, r0, r0
+	sub	r0, r4, r0
+	ldmfd	sp!, { r4-r6, pc }
+EPILOGUE(mpn_submul_1)
diff --git a/gmp/mpn/arm/udiv.asm b/gmp/mpn/arm/udiv.asm
index 8d441c74ed..9434a4f2b6 100644
--- a/gmp/mpn/arm/udiv.asm
+++ b/gmp/mpn/arm/udiv.asm
@@ -1,33 +1,22 @@
 dnl  ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.
 dnl  Return quotient and store remainder through a supplied pointer.
 
-dnl  Copyright 2001, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -59,9 +48,9 @@ L(oop):	divstep(n1,n0,d)
 	teq	r12, #0
 	bne	L(oop)
 
-	str	n1, [rem_ptr]		C store remainder
+	str	n1, [ rem_ptr ]		C store remainder
 	adc	r0, n0, n0		C quotient: add last carry from divstep
-	bx	lr
+	mov	pc, lr
 
 L(_large_divisor):
 	stmfd	sp!, { r8, lr }
@@ -98,7 +87,7 @@ L(oop2):
 	addcs	n0, n0, #1		C adjust quotient
 
 L(_even_divisor):
-	str	n1, [rem_ptr]		C store remainder
+	str	n1, [ rem_ptr ]		C store remainder
 	mov	r0, n0			C quotient
 	ldmfd	sp!, { r8, pc }
 EPILOGUE(mpn_udiv_qrnnd)
diff --git a/gmp/mpn/arm/v5/gcd_1.asm b/gmp/mpn/arm/v5/gcd_1.asm
deleted file mode 100644
index 169d154bf0..0000000000
--- a/gmp/mpn/arm/v5/gcd_1.asm
+++ /dev/null
@@ -1,120 +0,0 @@
-dnl  ARM v5 mpn_gcd_1.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjörn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/bit (approx)
-C StrongARM	 -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 5.9
-C Cortex-A15	 ?
-C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
-
-C TODO
-C  * Optimise inner-loop better.
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 6)
-
-C INPUT PARAMETERS
-define(`up',    `r0')
-define(`n',     `r1')
-define(`v0',    `r2')
-
-ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
-  `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_gcd_1)
-	push	{r4, r7, lr}
-	ldr	r3, [up]	C U low limb
-
-	orr	r3, r3, v0
-	rsb	r4, r3, #0
-	and	r4, r4, r3
-	clz	r4, r4		C min(ctz(u0),ctz(v0))
-	rsb	r4, r4, #31
-
-	rsb	r12, v0, #0
-	and	r12, r12, v0
-	clz	r12, r12
-	rsb	r12, r12, #31
-	mov	v0, v0, lsr r12
-
-	mov	r7, v0
-
-	cmp	n, #1
-	bne	L(nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	ldr	r3, [up]
-	cmp	v0, r3, lsr #BMOD_THRES_LOG2
-	bhi	L(red1)
-
-L(bmod):mov	r3, #0		C carry argument
-	bl	mpn_modexact_1c_odd
-	b	L(red0)
-
-L(nby1):cmp	n, #BMOD_1_TO_MOD_1_THRESHOLD
-	blo	L(bmod)
-
-	bl	mpn_mod_1
-
-L(red0):mov	r3, r0
-L(red1):rsbs	r12, r3, #0
-	and	r12, r12, r3
-	clz	r12, r12
-	rsb	r12, r12, #31
-	bne	L(mid)
-	b	L(end)
-
-	ALIGN(8)
-L(top):	rsb	r12, r12, #31
-	movcc	r3, r1		C if x-y < 0
-	movcc	r7, r0		C use x,y-x
-L(mid):	mov	r3, r3, lsr r12	C
-	mov	r0, r3		C
-	sub	r1, r7, r3	C
-	rsbs	r3, r7, r3	C
-	and	r12, r1, r3	C
-	clz	r12, r12	C
-	bne	L(top)		C
-
-L(end):	mov	r0, r7, lsl r4
-	pop	{r4, r7, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v5/mod_1_1.asm b/gmp/mpn/arm/v5/mod_1_1.asm
deleted file mode 100644
index 3cf0cd7763..0000000000
--- a/gmp/mpn/arm/v5/mod_1_1.asm
+++ /dev/null
@@ -1,129 +0,0 @@
-dnl  ARM mpn_mod_1_1p
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 7
-C Cortex-A15	 6
-
-define(`ap', `r0')
-define(`n',  `r1')
-define(`d',  `r2')
-define(`cps',`r3')
-
-ASM_START()
-PROLOGUE(mpn_mod_1_1p)
-	push	{r4-r10}
-	add	r0, r0, r1, asl #2
-	ldr	r5, [r0, #-4]!
-	ldr	r12, [r0, #-4]!
-	subs	r1, r1, #2
-	ble	L(4)
-	ldr	r8, [r3, #12]
-	mov	r4, r12
-	mov	r10, r5
-	umull	r7, r5, r10, r8
-	sub	r1, r1, #1
-	b	L(mid)
-
-L(top):	adds	r12, r6, r7
-	adcs	r10, r4, r5
-	sub	r1, r1, #1
-	mov	r6, #0
-	movcs	r6, r8
-	umull	r7, r5, r10, r8
-	adds	r4, r12, r6
-	subcs	r4, r4, r2
-L(mid):	ldr	r6, [r0, #-4]!
-	teq	r1, #0
-	bne	L(top)
-
-	adds	r12, r6, r7
-	adcs	r5, r4, r5
-	subcs	r5, r5, r2
-L(4):	ldr	r1, [r3, #4]
-	cmp	r1, #0
-	beq	L(7)
-	ldr	r4, [r3, #8]
-	umull	r0, r6, r5, r4
-	adds	r12, r0, r12
-	addcs	r6, r6, #1
-	rsb	r0, r1, #32
-	mov	r0, r12, lsr r0
-	orr	r5, r0, r6, asl r1
-	mov	r12, r12, asl r1
-	b	L(8)
-L(7):	cmp	r5, r2
-	subcs	r5, r5, r2
-L(8):	ldr	r0, [r3, #0]
-	umull	r4, r3, r5, r0
-	add	r5, r5, #1
-	adds	r0, r4, r12
-	adc	r5, r3, r5
-	mul	r5, r2, r5
-	sub	r12, r12, r5
-	cmp	r12, r0
-	addhi	r12, r12, r2
-	cmp	r2, r12
-	subls	r12, r12, r2
-	mov	r0, r12, lsr r1
-	pop	{r4-r10}
-	bx	r14
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps)
-	stmfd	sp!, {r4, r5, r6, r14}
-	mov	r5, r0
-	clz	r4, r1
-	mov	r0, r1, asl r4
-	rsb	r6, r0, #0
-	bl	mpn_invert_limb
-	str	r0, [r5, #0]
-	str	r4, [r5, #4]
-	cmp	r4, #0
-	beq	L(2)
-	rsb	r1, r4, #32
-	mov	r3, #1
-	mov	r3, r3, asl r4
-	orr	r3, r3, r0, lsr r1
-	mul	r3, r6, r3
-	mov	r4, r3, lsr r4
-	str	r4, [r5, #8]
-L(2):	mul	r0, r6, r0
-	str	r0, [r5, #12]
-	ldmfd	sp!, {r4, r5, r6, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v5/mod_1_2.asm b/gmp/mpn/arm/v5/mod_1_2.asm
deleted file mode 100644
index aa26ecb21c..0000000000
--- a/gmp/mpn/arm/v5/mod_1_2.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl  ARM mpn_mod_1s_2p
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 4.25
-C Cortex-A15	 3
-
-define(`ap', `r0')
-define(`n',  `r1')
-define(`d',  `r2')
-define(`cps',`r3')
-
-ASM_START()
-PROLOGUE(mpn_mod_1s_2p)
-	push	{r4-r10}
-	tst	n, #1
-	add	r7, r3, #8
-	ldmia	r7, {r7, r8, r12}	C load B1, B2, B3
-	add	ap, ap, n, lsl #2	C put ap at operand end
-	beq	L(evn)
-
-L(odd):	subs	n, n, #1
-	beq	L(1)
-	ldmdb	ap!, {r4,r6,r9}
-	mov	r10, #0
-	umlal	r4, r10, r6, r7
-	umlal	r4, r10, r9, r8
-	b	L(com)
-
-L(evn):	ldmdb	ap!, {r4,r10}
-L(com):	subs	n, n, #2
-	ble	L(end)
-	ldmdb	ap!, {r5,r6}
-	b	L(mid)
-
-L(top):	mov	r9, #0
-	umlal	r5, r9, r6, r7		C B1
-	umlal	r5, r9, r4, r8		C B2
-	ldmdb	ap!, {r4,r6}
-	umlal	r5, r9, r10, r12	C B3
-	ble	L(xit)
-	mov	r10, #0
-	umlal	r4, r10, r6, r7		C B1
-	umlal	r4, r10, r5, r8		C B2
-	ldmdb	ap!, {r5,r6}
-	umlal	r4, r10, r9, r12	C B3
-L(mid):	subs	n, n, #4
-	bge	L(top)
-
-	mov	r9, #0
-	umlal	r5, r9, r6, r7		C B1
-	umlal	r5, r9, r4, r8		C B2
-	umlal	r5, r9, r10, r12	C B3
-	mov	r4, r5
-
-L(end):	movge	   r9, r10		C executed iff coming via xit
-	ldr	r6, [r3, #4]		C cps[1] = cnt
-	mov	r5, #0
-	umlal	r4, r5, r9, r7
-	mov	r7, r5, lsl r6
-L(x):	rsb	r1, r6, #32
-	orr	r8, r7, r4, lsr r1
-	mov	r9, r4, lsl r6
-	ldr	r5, [r3, #0]
-	add	r0, r8, #1
-	umull	r12, r1, r8, r5
-	adds	r4, r12, r9
-	adc	r1, r1, r0
-	mul	r5, r2, r1
-	sub	r9, r9, r5
-	cmp	r9, r4
-	addhi	r9, r9, r2
-	cmp	r2, r9
-	subls	r9, r9, r2
-	mov	r0, r9, lsr r6
-	pop	{r4-r10}
-	bx	r14
-
-L(xit):	mov	r10, #0
-	umlal	r4, r10, r6, r7		C B1
-	umlal	r4, r10, r5, r8		C B2
-	umlal	r4, r10, r9, r12	C B3
-	b	L(end)
-
-L(1):	ldr	r6, [r3, #4]		C cps[1] = cnt
-	ldr	r4, [ap, #-4]		C ap[0]
-	mov	r7, #0
-	b	L(x)
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1s_2p_cps)
-	push	{r4-r8, r14}
-	clz	r4, r1
-	mov	r5, r1, lsl r4		C b <<= cnt
-	mov	r6, r0			C r6 = cps
-	mov	r0, r5
-	bl	mpn_invert_limb
-	rsb	r3, r4, #32
-	mov	r3, r0, lsr r3
-	mov	r2, #1
-	orr	r3, r3, r2, lsl r4
-	rsb	r1, r5, #0
-	mul	r2, r1, r3
-	umull	r3, r12, r2, r0
-	add	r12, r2, r12
-	mvn	r12, r12
-	mul	r1, r5, r12
-	cmp	r1, r3
-	addhi	r1, r1, r5
-	umull	r12, r7, r1, r0
-	add	r7, r1, r7
-	mvn	r7, r7
-	mul	r3, r5, r7
-	cmp	r3, r12
-	addhi	r3, r3, r5
-	mov	r5, r2, lsr r4
-	mov	r7, r1, lsr r4
-	mov	r8, r3, lsr r4
-	stmia	r6, {r0,r4,r5,r7,r8}	C fill cps
-	pop	{r4-r8, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/addmul_1.asm b/gmp/mpn/arm/v6/addmul_1.asm
deleted file mode 100644
index 57019e4b2b..0000000000
--- a/gmp/mpn/arm/v6/addmul_1.asm
+++ /dev/null
@@ -1,111 +0,0 @@
-dnl  ARM mpn_addmul_1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.25
-C Cortex-A15	 4
-
-C TODO
-C  * Micro-optimise feed-in code.
-C  * Optimise for n=1,2 by delaying register saving.
-C  * Try using ldm/stm.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n', `r2')
-define(`v0',`r3')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	stmfd	sp!, { r4, r5, r6, r7 }
-
-	ands	r6, n, #3
-	mov	r12, #0
-	beq	L(fi0)
-	cmp	r6, #2
-	bcc	L(fi1)
-	beq	L(fi2)
-
-L(fi3):	ldr	r4, [up], #4
-	ldr	r6, [rp, #0]
-	ldr	r5, [up], #4
-	b	L(lo3)
-
-L(fi0):	ldr	r5, [up], #4
-	ldr	r7, [rp], #4
-	ldr	r4, [up], #4
-	b	L(lo0)
-
-L(fi1):	ldr	r4, [up], #4
-	ldr	r6, [rp], #8
-	subs	n, n, #1
-	beq	L(1)
-	ldr	r5, [up], #4
-	b	L(lo1)
-
-L(fi2):	ldr	r5, [up], #4
-	ldr	r7, [rp], #12
-	ldr	r4, [up], #4
-	b	L(lo2)
-
-	ALIGN(16)
-L(top):	ldr	r6, [rp, #-8]
-	ldr	r5, [up], #4
-	str	r7, [rp, #-12]
-L(lo1):	umaal	r6, r12, r4, v0
-	ldr	r7, [rp, #-4]
-	ldr	r4, [up], #4
-	str	r6, [rp, #-8]
-L(lo0):	umaal	r7, r12, r5, v0
-	ldr	r6, [rp, #0]
-	ldr	r5, [up], #4
-	str	r7, [rp, #-4]
-L(lo3):	umaal	r6, r12, r4, v0
-	ldr	r7, [rp, #4]
-	ldr	r4, [up], #4
-	str	r6, [rp], #16
-L(lo2):	umaal	r7, r12, r5, v0
-	subs	n, n, #4
-	bhi	L(top)
-
-	ldr	r6, [rp, #-8]
-	str	r7, [rp, #-12]
-L(1):	umaal	r6, r12, r4, v0
-	str	r6, [rp, #-8]
-	mov	r0, r12
-	ldmfd	sp!, { r4, r5, r6, r7 }
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/addmul_2.asm b/gmp/mpn/arm/v6/addmul_2.asm
deleted file mode 100644
index 69817ce340..0000000000
--- a/gmp/mpn/arm/v6/addmul_2.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-dnl  ARM mpn_addmul_2.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.25
-C Cortex-A15	 2.5
-
-C This is believed to be optimal for A15 for any unrolling, and optimal for A9
-C for 4-way unrolling.  Using separate pointer update instructions is necessary
-C for optimal A9 speed.
-
-C TODO:
-C  * Start the first multiply or multiplies directly at function entry.
-
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n', `r2')
-define(`vp',`r3')
-
-define(`v0',`r6')
-define(`v1',`r7')
-define(`u0',`r3')
-define(`u1',`r9')
-
-define(`cya',`r8')
-define(`cyb',`r12')
-
-
-ASM_START()
-PROLOGUE(mpn_addmul_2)
-	push	{ r4, r5, r6, r7, r8, r9 }
-
-	ldm	vp, { v0, v1 }
-	mov	cya, #0
-	mov	cyb, #0
-
-	tst	n, #1
-	beq	L(evn)
-
-L(odd):	ldr	r5, [rp, #0]
-	ldr	u0, [up, #0]
-	ldr	r4, [rp, #4]
-	tst	n, #2
-	beq	L(fi1)
-L(fi3):	sub	up, up, #12
-	sub	rp, rp, #12
-	b	L(lo3)
-L(fi1):	sub	n, n, #1
-	sub	up, up, #4
-	sub	rp, rp, #4
-	b	L(lo1)
-
-L(evn):	ldr	r4, [rp, #0]
-	ldr	u1, [up, #0]
-	ldr	r5, [rp, #4]
-	tst	n, #2
-	bne	L(fi2)
-L(fi0):	sub	up, up, #8
-	sub	rp, rp, #8
-	b	L(lo0)
-L(fi2):	subs	n, n, #2
-	bls	L(end)
-
-	ALIGN(16)
-L(top):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #0]
-	ldr	r4, [rp, #8]
-	umaal	r5, cyb, u1, v1
-L(lo1):	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #4]
-	ldr	r5, [rp, #12]
-	umaal	r4, cyb, u0, v1
-L(lo0):	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #8]
-	ldr	r4, [rp, #16]
-	umaal	r5, cyb, u1, v1
-L(lo3):	ldr	u1, [up, #16]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #12]
-	ldr	r5, [rp, #20]
-	add	rp, rp, #16
-	umaal	r4, cyb, u0, v1
-	add	up, up, #16
-	subs	n, n, #4
-	bhi	L(top)
-
-L(end):	umaal	r4, cya, u1, v0
-	ldr	u0, [up, #4]
-	umaal	r5, cyb, u1, v1
-	str	r4, [rp, #0]
-	umaal	r5, cya, u0, v0
-	umaal	cya, cyb, u0, v1
-	str	r5, [rp, #4]
-	str	cya, [rp, #8]
-	mov	r0, cyb
-
-	pop	{ r4, r5, r6, r7, r8, r9 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/addmul_3.asm b/gmp/mpn/arm/v6/addmul_3.asm
deleted file mode 100644
index 046543020f..0000000000
--- a/gmp/mpn/arm/v6/addmul_3.asm
+++ /dev/null
@@ -1,187 +0,0 @@
-dnl  ARM mpn_addmul_3.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.125
-C Cortex-A15	 2
-
-C TODO
-C  * Use a fast path for n <= KARATSUBA_MUL_THRESHOLD using a jump table,
-C    avoiding the current multiply.
-C  * Start the first multiply or multiplies early.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n', `r2')
-define(`vp',`r3')
-
-define(`v0',`r4')  define(`v1',`r5')  define(`v2',`r6')
-define(`u0',`r3')  define(`u1',`r14')
-define(`w0',`r7')  define(`w1',`r8')  define(`w2',`r9')
-define(`cy0',`r10')  define(`cy1',`r11') define(`cy2',`r12')
-
-
-ASM_START()
-PROLOGUE(mpn_addmul_3)
-	push	{ r4-r11, r14 }
-
-	ldr	w0, =0xaaaaaaab		C 3^{-1} mod 2^32
-	ldm	vp, { v0,v1,v2 }
-	mov	cy0, #0
-	mov	cy1, #0
-	mov	cy2, #0
-
-C Tricky n mod 6
-	mul	w0, w0, n		C n * 3^{-1} mod 2^32
-	and	w0, w0, #0xc0000001	C pseudo-CRT mod 3,2
-	sub	n, n, #3
-ifdef(`PIC',`
-	add	pc, pc, w0, ror $28
-	nop
-	b	L(b0)
-	b	L(b2)
-	b	L(b4)
-	.word	0xe7f000f0	C udf
-	b	L(b3)
-	b	L(b5)
-	b	L(b1)
-',`
-	ldr	pc, [pc, w0, ror $28]
-	nop
-	.word	L(b0), L(b2), L(b4), 0, L(b3), L(b5), L(b1)
-')
-
-L(b5):	add	up, up, #-8
-	ldr	w1, [rp, #0]
-	ldr	w2, [rp, #4]
-	ldr	u1, [up, #8]
-	b	L(lo5)
-
-L(b4):	add	rp, rp, #-4
-	add	up, up, #-12
-	ldr	w2, [rp, #4]
-	ldr	w0, [rp, #8]
-	ldr	u0, [up, #12]
-	b	L(lo4)
-
-L(b3):	add	rp, rp, #-8
-	add	up, up, #-16
-	ldr	w0, [rp, #8]
-	ldr	w1, [rp, #12]
-	ldr	u1, [up, #16]
-	b	L(lo3)
-
-L(b1):	add	rp, rp, #8
-	ldr	w2, [rp, #-8]
-	ldr	w0, [rp, #-4]
-	ldr	u1, [up, #0]
-	b	L(lo1)
-
-L(b0):	add	rp, rp, #4
-	add	up, up, #-4
-	ldr	w0, [rp, #-4]
-	ldr	w1, [rp, #0]
-	ldr	u0, [up, #4]
-	b	L(lo0)
-
-L(b2):	add	rp, rp, #12
-	add	up, up, #4
-	ldr	w1, [rp, #-12]
-	ldr	w2, [rp, #-8]
-	ldr	u0, [up, #-4]
-
-	ALIGN(16)
-L(top):	ldr	w0, [rp, #-4]
-	umaal	w1, cy0, u0, v0
-	ldr	u1, [up, #0]
-	umaal	w2, cy1, u0, v1
-	str	w1, [rp, #-12]
-	umaal	w0, cy2, u0, v2
-L(lo1):	ldr	w1, [rp, #0]
-	umaal	w2, cy0, u1, v0
-	ldr	u0, [up, #4]
-	umaal	w0, cy1, u1, v1
-	str	w2, [rp, #-8]
-	umaal	w1, cy2, u1, v2
-L(lo0):	ldr	w2, [rp, #4]
-	umaal	w0, cy0, u0, v0
-	ldr	u1, [up, #8]
-	umaal	w1, cy1, u0, v1
-	str	w0, [rp, #-4]
-	umaal	w2, cy2, u0, v2
-L(lo5):	ldr	w0, [rp, #8]
-	umaal	w1, cy0, u1, v0
-	ldr	u0, [up, #12]
-	umaal	w2, cy1, u1, v1
-	str	w1, [rp, #0]
-	umaal	w0, cy2, u1, v2
-L(lo4):	ldr	w1, [rp, #12]
-	umaal	w2, cy0, u0, v0
-	ldr	u1, [up, #16]
-	umaal	w0, cy1, u0, v1
-	str	w2, [rp, #4]
-	umaal	w1, cy2, u0, v2
-L(lo3):	ldr	w2, [rp, #16]
-	umaal	w0, cy0, u1, v0
-	ldr	u0, [up, #20]
-	umaal	w1, cy1, u1, v1
-	str	w0, [rp, #8]
-	umaal	w2, cy2, u1, v2
-L(lo2):	subs	n, n, #6
-	add	up, up, #24
-	add	rp, rp, #24
-	bge	L(top)
-
-L(end):	umaal	w1, cy0, u0, v0
-	ldr	u1, [up, #0]
-	umaal	w2, cy1, u0, v1
-	str	w1, [rp, #-12]
-	mov	w0, #0
-	umaal	w0, cy2, u0, v2
-	umaal	w2, cy0, u1, v0
-	umaal	w0, cy1, u1, v1
-	str	w2, [rp, #-8]
-	umaal	cy1, cy2, u1, v2
-	adds	w0, w0, cy0
-	str	w0, [rp, #-4]
-	adcs	w1, cy1, #0
-	str	w1, [rp, #0]
-	adc	r0, cy2, #0
-
-	pop	{ r4-r11, pc }
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/dive_1.asm b/gmp/mpn/arm/v6/dive_1.asm
deleted file mode 100644
index 92de81473f..0000000000
--- a/gmp/mpn/arm/v6/dive_1.asm
+++ /dev/null
@@ -1,149 +0,0 @@
-dnl  ARM v6 mpn_divexact_1
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C               cycles/limb       cycles/limb
-C               norm    unorm    modexact_1c_odd
-C StrongARM	 -	 -
-C XScale	 -	 -
-C Cortex-A7	 ?	 ?
-C Cortex-A8	 ?	 ?
-C Cortex-A9	 9	10		 9
-C Cortex-A15	 7	 7		 7
-
-C Architecture requirements:
-C v5	-
-C v5t	clz
-C v5te	-
-C v6	umaal
-C v6t2	-
-C v7a	-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-define(`d',  `r3')
-
-define(`cy',  `r7')
-define(`cnt', `r6')
-define(`tnc', `r10')
-
-ASM_START()
-PROLOGUE(mpn_divexact_1)
-	push	{r4,r5,r6,r7,r8,r9}
-
-	tst	d, #1
-
-	rsb	r4, d, #0
-	and	r4, r4, d
-	clz	r4, r4
-	rsb	cnt, r4, #31		C count_trailing_zeros
-	mov	d, d, lsr cnt
-
-C binvert limb
-	LEA(	r4, binvert_limb_table)
-	and	r12, d, #254
-	ldrb	r4, [r4, r12, lsr #1]
-	mul	r12, r4, r4
-	mul	r12, d, r12
-	rsb	r12, r12, r4, lsl #1
-	mul	r4, r12, r12
-	mul	r4, d, r4
-	rsb	r4, r4, r12, lsl #1	C r4 = inverse
-
-	ldr	r5, [up], #4		C up[0]
-	mov	cy, #0
-	rsb	r8, r4, #0		C r8 = -inverse
-	beq	L(unnorm)
-
-L(norm):
-	subs	n, n, #1
-	mul	r5, r5, r4
-	beq	L(end)
-
-	ALIGN(16)
-L(top):	ldr	r9, [up], #4
-	mov	r12, #0
-	str	r5, [rp], #4
-	umaal	r12, cy, r5, d
-	mul	r5, r9, r4
-	mla	r5, cy, r8, r5
-	subs	n, n, #1
-	bne	L(top)
-
-L(end):	str	r5, [rp]
-	pop	{r4,r5,r6,r7,r8,r9}
-	bx	r14
-
-L(unnorm):
-	push	{r10,r11}
-	rsb	tnc, cnt, #32
-	mov	r11, r5, lsr cnt
-	subs	n, n, #1
-	beq	L(edx)
-
-	ldr	r12, [up], #4
-	orr	r9, r11, r12, lsl tnc
-	mov	r11, r12, lsr cnt
-	mul	r5, r9, r4
-	subs	n, n, #1
-	beq	L(edu)
-
-	ALIGN(16)
-L(tpu):	ldr	r12, [up], #4
-	orr	r9, r11, r12, lsl tnc
-	mov	r11, r12, lsr cnt
-	mov	r12, #0
-	str	r5, [rp], #4
-	umaal	r12, cy, r5, d
-	mul	r5, r9, r4
-	mla	r5, cy, r8, r5
-	subs	n, n, #1
-	bne	L(tpu)
-
-L(edu):	str	r5, [rp], #4
-	mov	r12, #0
-	umaal	r12, cy, r5, d
-	mul	r5, r11, r4
-	mla	r5, cy, r8, r5
-	str	r5, [rp]
-	pop	{r10,r11}
-	pop	{r4,r5,r6,r7,r8,r9}
-	bx	r14
-
-L(edx):	mul	r5, r11, r4
-	str	r5, [rp]
-	pop	{r10,r11}
-	pop	{r4,r5,r6,r7,r8,r9}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/gmp-mparam.h b/gmp/mpn/arm/v6/gmp-mparam.h
deleted file mode 100644
index c9c6851769..0000000000
--- a/gmp/mpn/arm/v6/gmp-mparam.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 700MHz ARM11 (raspberry pi) */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* preinv always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     29
-#define USE_PREINV_DIVREM_1                  1  /* preinv always */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           33
-
-#define MUL_TOOM22_THRESHOLD                36
-#define MUL_TOOM33_THRESHOLD               117
-#define MUL_TOOM44_THRESHOLD               462
-#define MUL_TOOM6H_THRESHOLD                 0  /* always */
-#define MUL_TOOM8H_THRESHOLD               620
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     130
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     573
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     209
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     209
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     305
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 50
-#define SQR_TOOM3_THRESHOLD                181
-#define SQR_TOOM4_THRESHOLD                686
-#define SQR_TOOM6_THRESHOLD                  0  /* always */
-#define SQR_TOOM8_THRESHOLD                915
-
-#define MULMID_TOOM42_THRESHOLD             72
-
-#define MULMOD_BNM1_THRESHOLD               25
-#define SQRMOD_BNM1_THRESHOLD               30
-
-#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    476, 5}, {     21, 6}, {     11, 5}, {     25, 6}, \
-    {     13, 5}, {     27, 6}, {     25, 7}, {     13, 6}, \
-    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
-    {     71, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
-    {    255,10}, {    143, 9}, {    287,10}, {    159,11}, \
-    {     95,10}, {    191, 9}, {    383,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 63
-#define MUL_FFT_THRESHOLD                 4736
-
-#define SQR_FFT_MODF_THRESHOLD             464  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    464, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     36, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     35, 7}, {     71, 8}, {     43, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
-    {     83, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287,10}, {    159,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 61
-#define SQR_FFT_THRESHOLD                 3776
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  67
-#define MULLO_MUL_N_THRESHOLD             8907
-
-#define DC_DIV_QR_THRESHOLD                 40
-#define DC_DIVAPPR_Q_THRESHOLD             156
-#define DC_BDIV_QR_THRESHOLD                71
-#define DC_BDIV_Q_THRESHOLD                208
-
-#define INV_MULMOD_BNM1_THRESHOLD           70
-#define INV_NEWTON_THRESHOLD               151
-#define INV_APPR_THRESHOLD                 150
-
-#define BINV_NEWTON_THRESHOLD              375
-#define REDC_1_TO_REDC_2_THRESHOLD           5
-#define REDC_2_TO_REDC_N_THRESHOLD         134
-
-#define MU_DIV_QR_THRESHOLD               2130
-#define MU_DIVAPPR_Q_THRESHOLD            2130
-#define MUPI_DIV_QR_THRESHOLD               80
-#define MU_BDIV_QR_THRESHOLD              1787
-#define MU_BDIV_Q_THRESHOLD               2130
-
-#define POWM_SEC_TABLE  7,32,460,1705
-
-#define MATRIX22_STRASSEN_THRESHOLD         19
-#define HGCD_THRESHOLD                      85
-#define HGCD_APPR_THRESHOLD                119
-#define HGCD_REDUCE_THRESHOLD             3389
-#define GCD_DC_THRESHOLD                   333
-#define GCDEXT_DC_THRESHOLD                309
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                21
-#define GET_STR_PRECOMPUTE_THRESHOLD        41
-#define SET_STR_DC_THRESHOLD               527
-#define SET_STR_PRECOMPUTE_THRESHOLD      1323
-
-#define FAC_DSC_THRESHOLD                  414
-#define FAC_ODD_THRESHOLD                  154
diff --git a/gmp/mpn/arm/v6/mode1o.asm b/gmp/mpn/arm/v6/mode1o.asm
deleted file mode 100644
index a2f77a6bf5..0000000000
--- a/gmp/mpn/arm/v6/mode1o.asm
+++ /dev/null
@@ -1,95 +0,0 @@
-dnl  ARM v6 mpn_modexact_1c_odd
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 9
-C Cortex-A15	 7
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	smulbb
-C v6	umaal
-C v6t2	-
-C v7a	-
-
-define(`up', `r0')
-define(`n',  `r1')
-define(`d',  `r2')
-define(`cy', `r3')
-
-	.protected	binvert_limb_table
-ASM_START()
-PROLOGUE(mpn_modexact_1c_odd)
-	stmfd	sp!, {r4, r5, r6, r7}
-
-	LEA(	r4, binvert_limb_table)
-
-	ldr	r6, [up], #4		C up[0]
-
-	and	r12, d, #254
-	ldrb	r4, [r4, r12, lsr #1]
-	smulbb	r12, r4, r4
-	mul	r12, d, r12
-	rsb	r12, r12, r4, asl #1
-	mul	r4, r12, r12
-	mul	r4, d, r4
-	rsb	r4, r4, r12, asl #1	C r4 = inverse
-
-	subs	n, n, #1
-	sub	r6, r6, cy
-	mul	r6, r6, r4
-	beq	L(end)
-
-	rsb	r5, r4, #0		C r5 = -inverse
-
-L(top):	ldr	r7, [up], #4
-	mov	r12, #0
-	umaal	r12, cy, r6, d
-	mul	r6, r7, r4
-	mla	r6, cy, r5, r6
-	subs	n, n, #1
-	bne	L(top)
-
-L(end):	mov	r12, #0
-	umaal	r12, cy, r6, d
-	mov	r0, cy
-
-	ldmfd	sp!, {r4, r5, r6, r7}
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/mul_1.asm b/gmp/mpn/arm/v6/mul_1.asm
deleted file mode 100644
index 0fcc0e46d9..0000000000
--- a/gmp/mpn/arm/v6/mul_1.asm
+++ /dev/null
@@ -1,114 +0,0 @@
-dnl  ARM mpn_mul_1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.25
-C Cortex-A15	 4
-
-C TODO
-C  * Micro-optimise feed-in code.
-C  * Optimise for n=1,2 by delaying register saving.
-C  * Try using ldm/stm.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n', `r2')
-define(`v0',`r3')
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	stmfd	sp!, { r4, r5, r6, r7 }
-
-	ands	r6, n, #3
-	mov	r12, #0
-	beq	L(fi0)
-	cmp	r6, #2
-	bcc	L(fi1)
-	beq	L(fi2)
-
-L(fi3):	ldr	r4, [up], #4
-	mov	r6, #0
-	ldr	r5, [up], #4
-	b	L(lo3)
-
-L(fi0):	ldr	r5, [up], #4
-	add	rp, rp, #4
-	mov	r7, #0
-	ldr	r4, [up], #4
-	b	L(lo0)
-
-L(fi1):	ldr	r4, [up], #4
-	mov	r6, #0
-	add	rp, rp, #8
-	subs	n, n, #1
-	beq	L(1)
-	ldr	r5, [up], #4
-	b	L(lo1)
-
-L(fi2):	ldr	r5, [up], #4
-	add	rp, rp, #12
-	mov	r7, #0
-	ldr	r4, [up], #4
-	b	L(lo2)
-
-	ALIGN(16)
-L(top):	mov	r6, #0
-	ldr	r5, [up], #4
-	str	r7, [rp, #-12]
-L(lo1):	umaal	r6, r12, r4, v0
-	mov	r7, #0
-	ldr	r4, [up], #4
-	str	r6, [rp, #-8]
-L(lo0):	umaal	r7, r12, r5, v0
-	mov	r6, #0
-	ldr	r5, [up], #4
-	str	r7, [rp, #-4]
-L(lo3):	umaal	r6, r12, r4, v0
-	mov	r7, #0
-	ldr	r4, [up], #4
-	str	r6, [rp], #16
-L(lo2):	umaal	r7, r12, r5, v0
-	subs	n, n, #4
-	bhi	L(top)
-
-	mov	r6, #0
-	str	r7, [rp, #-12]
-L(1):	umaal	r6, r12, r4, v0
-	str	r6, [rp, #-8]
-	mov	r0, r12
-	ldmfd	sp!, { r4, r5, r6, r7 }
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/mul_2.asm b/gmp/mpn/arm/v6/mul_2.asm
deleted file mode 100644
index 1679542a3c..0000000000
--- a/gmp/mpn/arm/v6/mul_2.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-dnl  ARM mpn_mul_2.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.25
-C Cortex-A15	 2.5
-
-C TODO
-C  * This is a trivial edit of the addmul_2 code.  Check for simplifications,
-C    and possible speedups to 2.0 c/l.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n', `r2')
-define(`vp',`r3')
-
-define(`v0',`r6')
-define(`v1',`r7')
-define(`u0',`r3')
-define(`u1',`r9')
-
-define(`cya',`r8')
-define(`cyb',`r12')
-
-
-ASM_START()
-PROLOGUE(mpn_mul_2)
-	push	{ r4, r5, r6, r7, r8, r9 }
-
-	ldm	vp, { v0, v1 }
-	mov	cya, #0
-	mov	cyb, #0
-
-	tst	n, #1
-	beq	L(evn)
-L(odd):	mov	r5, #0
-	ldr	u0, [up, #0]
-	mov	r4, #0
-	tst	n, #2
-	beq	L(fi1)
-L(fi3):	sub	up, up, #12
-	sub	rp, rp, #16
-	b	L(lo3)
-L(fi1):	sub	n, n, #1
-	sub	up, up, #4
-	sub	rp, rp, #8
-	b	L(lo1)
-L(evn):	mov	r4, #0
-	ldr	u1, [up, #0]
-	mov	r5, #0
-	tst	n, #2
-	bne	L(fi2)
-L(fi0):	sub	up, up, #8
-	sub	rp, rp, #12
-	b	L(lo0)
-L(fi2):	subs	n, n, #2
-	sub	rp, rp, #4
-	bls	L(end)
-
-	ALIGN(16)
-L(top):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #4]
-	mov	r4, #0
-	umaal	r5, cyb, u1, v1
-L(lo1):	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #8]
-	mov	r5, #0
-	umaal	r4, cyb, u0, v1
-L(lo0):	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #12]
-	mov	r4, #0
-	umaal	r5, cyb, u1, v1
-L(lo3):	ldr	u1, [up, #16]!
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #16]!
-	mov	r5, #0
-	umaal	r4, cyb, u0, v1
-	subs	n, n, #4
-	bhi	L(top)
-
-L(end):	umaal	r4, cya, u1, v0
-	ldr	u0, [up, #4]
-	umaal	r5, cyb, u1, v1
-	str	r4, [rp, #4]
-	umaal	r5, cya, u0, v0
-	umaal	cya, cyb, u0, v1
-	str	r5, [rp, #8]
-	str	cya, [rp, #12]
-	mov	r0, cyb
-
-	pop	{ r4, r5, r6, r7, r8, r9 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/popham.asm b/gmp/mpn/arm/v6/popham.asm
deleted file mode 100644
index 44c8f2361c..0000000000
--- a/gmp/mpn/arm/v6/popham.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-dnl  ARM mpn_popcount and mpn_hamdist.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		     popcount	      hamdist
-C		    cycles/limb	    cycles/limb
-C StrongARM		 -
-C XScale		 -
-C Cortex-A7		 ?
-C Cortex-A8		 ?
-C Cortex-A9		 8.94		 9.47
-C Cortex-A15		 5.67		 6.44
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	usada8
-C v6t2	-
-C v7a	-
-
-ifdef(`OPERATION_popcount',`
-  define(`func',`mpn_popcount')
-  define(`ap',		`r0')
-  define(`n',		`r1')
-  define(`a0',		`r2')
-  define(`a1',		`r3')
-  define(`s',		`r5')
-  define(`b_01010101',	`r6')
-  define(`b_00110011',	`r7')
-  define(`b_00001111',	`r8')
-  define(`zero',	`r9')
-  define(`POPC',	`$1')
-  define(`HAMD',	`dnl')
-')
-ifdef(`OPERATION_hamdist',`
-  define(`func',`mpn_hamdist')
-  define(`ap',		`r0')
-  define(`bp',		`r1')
-  define(`n',		`r2')
-  define(`a0',		`r6')
-  define(`a1',		`r7')
-  define(`b0',		`r4')
-  define(`b1',		`r5')
-  define(`s',		`r11')
-  define(`b_01010101',	`r8')
-  define(`b_00110011',	`r9')
-  define(`b_00001111',	`r10')
-  define(`zero',	`r3')
-  define(`POPC',	`dnl')
-  define(`HAMD',	`$1')
-')
-
-
-ASM_START()
-PROLOGUE(func)
-POPC(`	push	{ r4-r9 }	')
-HAMD(`	push	{ r4-r11 }	')
-
-	ldr	b_01010101, =0x55555555
-	mov	r12, #0
-	ldr	b_00110011, =0x33333333
-	mov	zero, #0
-	ldr	b_00001111, =0x0f0f0f0f
-
-	tst	n, #1
-	beq	L(evn)
-
-L(odd):	ldr	a1, [ap], #4		C 1 x 32 1-bit accumulators, 0-1
-HAMD(`	ldr	b1, [bp], #4	')	C 1 x 32 1-bit accumulators, 0-1
-HAMD(`	eor	a1, a1, b1	')
-	and	r4, b_01010101, a1, lsr #1
-	sub	a1, a1, r4
-	and	r4, a1, b_00110011
-	bic	r5, a1, b_00110011
-	add	r5, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
-	subs	n, n, #1
-	b	L(mid)
-
-L(evn):	mov	s, #0
-
-L(top):	ldrd	a0, a1, [ap], #8	C 2 x 32 1-bit accumulators, 0-1
-HAMD(`	ldrd	b0, b1, [bp], #8')
-HAMD(`	eor	a0, a0, b0	')
-HAMD(`	eor	a1, a1, b1	')
-	subs	n, n, #2
-	usada8	r12, s, zero, r12
-	and	r4, b_01010101, a0, lsr #1
-	sub	a0, a0, r4
-	and	r4, b_01010101, a1, lsr #1
-	sub	a1, a1, r4
-	and	r4, a0, b_00110011
-	bic	r5, a0, b_00110011
-	add	a0, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
-	and	r4, a1, b_00110011
-	bic	r5, a1, b_00110011
-	add	a1, r4, r5, lsr #2	C 8 4-bit accumulators, 0-4
-	add	r5, a0, a1		C 8 4-bit accumulators, 0-8
-L(mid):	and	r4, r5, b_00001111
-	bic	r5, r5, b_00001111
-	add	s, r4, r5, lsr #4	C 4 8-bit accumulators
-	bne	L(top)
-
-	usada8	r0, s, zero, r12
-POPC(`	pop	{ r4-r9 }	')
-HAMD(`	pop	{ r4-r11 }	')
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/sqr_basecase.asm b/gmp/mpn/arm/v6/sqr_basecase.asm
deleted file mode 100644
index d52970aaa7..0000000000
--- a/gmp/mpn/arm/v6/sqr_basecase.asm
+++ /dev/null
@@ -1,518 +0,0 @@
-dnl  ARM v6 mpn_sqr_basecase.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C Code structure:
-C
-C
-C        m_2(0m4)        m_2(2m4)        m_2(1m4)        m_2(3m4)
-C           |               |               |               |
-C           |               |               |               |
-C           |               |               |               |
-C          \|/             \|/             \|/             \|/
-C              ____________                   ____________
-C             /            \                 /            \
-C            \|/            \               \|/            \
-C         am_2(3m4)       am_2(1m4)       am_2(0m4)       am_2(2m4)
-C            \            /|\                \            /|\
-C             \____________/                  \____________/
-C                       \                        /
-C                        \                      /
-C                         \                    /
-C                       tail(0m2)          tail(1m2)
-C                            \              /
-C                             \            /
-C                            sqr_diag_addlsh1
-
-C TODO
-C  * Further tweak counter and updates in outer loops.  (This could save
-C    perhaps 5n cycles).
-C  * Try to use fewer register.  Perhaps coalesce r9 branch target and n_saved.
-C    (This could save 2-3 cycles for n > 4.)
-C  * Optimise sqr_diag_addlsh1 loop.  (This could save O(n) cycles.)
-C  * Implement larger final corners (xit/tix).  Also stop loops earlier
-C    suppressing writes of upper-most rp[] values.  (This could save 10-20
-C    cycles for n > 4.)
-C  * Is the branch table really faster than discrete branches?
-
-define(`rp',      r0)
-define(`up',      r1)
-define(`n',       r2)
-
-define(`v0',      r3)
-define(`v1',      r6)
-define(`i',       r8)
-define(`n_saved', r14)
-define(`cya',     r11)
-define(`cyb',     r12)
-define(`u0',      r7)
-define(`u1',      r9)
-
-ASM_START()
-PROLOGUE(mpn_sqr_basecase)
-	and	r12, n, #3
-	cmp	n, #4
-	addgt	r12, r12, #4
-	add	pc, pc, r12, lsl #2
-	nop
-	b	L(4)
-	b	L(1)
-	b	L(2)
-	b	L(3)
-	b	L(0m4)
-	b	L(1m4)
-	b	L(2m4)
-	b	L(3m4)
-
-
-L(1m4):	push	{r4-r10,r11,r14}
-	mov	n_saved, n
-	sub	i, n, #4
-	sub	n, n, #2
-	add	r10, pc, #L(am2_2m4)-.-8
-	ldm	up, {v0,v1,u0}
-	sub	up, up, #4
-	mov	cyb, #0
-	mov	r5, #0
-	umull	r4, cya, v1, v0
-	str	r4, [rp], #-12
-	mov	r4, #0
-	b	L(ko0)
-
-L(3m4):	push	{r4-r10,r11,r14}
-	mov	n_saved, n
-	sub	i, n, #4
-	sub	n, n, #2
-	add	r10, pc, #L(am2_0m4)-.-8
-	ldm	up, {v0,v1,u0}
-	add	up, up, #4
-	mov	cyb, #0
-	mov	r5, #0
-	umull	r4, cya, v1, v0
-	str	r4, [rp], #-4
-	mov	r4, #0
-	b	L(ko2)
-
-L(2m4):	push	{r4-r10,r11,r14}
-	mov	n_saved, n
-	sub	i, n, #4
-	sub	n, n, #2
-	add	r10, pc, #L(am2_3m4)-.-8
-	ldm	up, {v0,v1,u1}
-	mov	cyb, #0
-	mov	r4, #0
-	umull	r5, cya, v1, v0
-	str	r5, [rp], #-8
-	mov	r5, #0
-	b	L(ko1)
-
-L(0m4):	push	{r4-r10,r11,r14}
-	mov	n_saved, n
-	sub	i, n, #4
-	sub	n, n, #2
-	add	r10, pc, #L(am2_1m4)-.-8
-	ldm	up, {v0,v1,u1}
-	mov	cyb, #0
-	mov	r4, #0
-	add	up, up, #8
-	umull	r5, cya, v1, v0
-	str	r5, [rp, #0]
-	mov	r5, #0
-
-L(top):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #4]
-	mov	r4, #0
-	umaal	r5, cyb, u1, v1
-L(ko2):	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #8]
-	mov	r5, #0
-	umaal	r4, cyb, u0, v1
-L(ko1):	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #12]
-	mov	r4, #0
-	umaal	r5, cyb, u1, v1
-L(ko0):	ldr	u1, [up, #16]!
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #16]!
-	mov	r5, #0
-	umaal	r4, cyb, u0, v1
-	subs	i, i, #4
-	bhi	L(top)
-	bx	r10
-
-L(evnloop):
-	subs	i, n, #4
-	sub	n, n, #2
-	blt	L(tix)
-	ldm	up, {v0,v1,u0}
-	add	up, up, #4
-	mov	cya, #0
-	mov	cyb, #0
-	ldm	rp, {r4,r5}
-	sub	rp, rp, #4
-	umaal	r4, cya, v1, v0
-	str	r4, [rp, #4]
-	ldr	r4, [rp, #12]
-	b	L(lo2)
-L(ua2):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #4]
-	ldr	r4, [rp, #12]
-	umaal	r5, cyb, u1, v1
-L(lo2):	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #8]
-	ldr	r5, [rp, #16]
-	umaal	r4, cyb, u0, v1
-	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #12]
-	ldr	r4, [rp, #20]
-	umaal	r5, cyb, u1, v1
-	ldr	u1, [up, #16]!
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #16]!
-	ldr	r5, [rp, #8]
-	umaal	r4, cyb, u0, v1
-	subs	i, i, #4
-	bhi	L(ua2)
-L(am2_0m4):
-	umaal	r4, cya, u1, v0
-	ldr	u0, [up, #4]
-	umaal	r5, cyb, u1, v1
-	str	r4, [rp, #4]
-	umaal	r5, cya, u0, v0
-	umaal	cya, cyb, u0, v1
-	str	r5, [rp, #8]
-	str	cya, [rp, #12]
-	str	cyb, [rp, #16]
-	sub	up, up, n, lsl #2
-	sub	rp, rp, n, lsl #2
-	add	up, up, #8
-	sub	i, n, #4
-	sub	n, n, #2
-	ldm	up, {v0,v1,u0}
-	sub	up, up, #4
-	mov	cya, #0
-	mov	cyb, #0
-	ldr	r4, [rp, #24]
-	ldr	r5, [rp, #28]
-	add	rp, rp, #12
-	umaal	r4, cya, v1, v0
-	str	r4, [rp, #12]
-	ldr	r4, [rp, #20]
-	b	L(lo0)
-L(ua0):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #4]
-	ldr	r4, [rp, #12]
-	umaal	r5, cyb, u1, v1
-	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #8]
-	ldr	r5, [rp, #16]
-	umaal	r4, cyb, u0, v1
-	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #12]
-	ldr	r4, [rp, #20]
-	umaal	r5, cyb, u1, v1
-L(lo0):	ldr	u1, [up, #16]!
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #16]!
-	ldr	r5, [rp, #8]
-	umaal	r4, cyb, u0, v1
-	subs	i, i, #4
-	bhi	L(ua0)
-L(am2_2m4):
-	umaal	r4, cya, u1, v0
-	ldr	u0, [up, #4]
-	umaal	r5, cyb, u1, v1
-	str	r4, [rp, #4]
-	umaal	r5, cya, u0, v0
-	umaal	cya, cyb, u0, v1
-	str	r5, [rp, #8]
-	str	cya, [rp, #12]
-	str	cyb, [rp, #16]
-	sub	up, up, n, lsl #2
-	sub	rp, rp, n, lsl #2
-	add	up, up, #8
-	add	rp, rp, #24
-	b	L(evnloop)
-
-
-L(oddloop):
-	subs	i, n, #4
-	sub	n, n, #2
-	blt	L(xit)
-	ldm	up, {v0,v1,u1}
-	mov	cya, #0
-	mov	cyb, #0
-	sub	rp, rp, #8
-	ldr	r5, [rp, #8]
-	ldr	r4, [rp, #12]
-	umaal	r5, cya, v1, v0
-	str	r5, [rp, #8]
-	ldr	r5, [rp, #16]
-	b	L(lo1)
-L(ua1):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #4]
-	ldr	r4, [rp, #12]
-	umaal	r5, cyb, u1, v1
-	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #8]
-	ldr	r5, [rp, #16]
-	umaal	r4, cyb, u0, v1
-L(lo1):	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #12]
-	ldr	r4, [rp, #20]
-	umaal	r5, cyb, u1, v1
-	ldr	u1, [up, #16]!
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #16]!
-	ldr	r5, [rp, #8]
-	umaal	r4, cyb, u0, v1
-	subs	i, i, #4
-	bhi	L(ua1)
-L(am2_3m4):
-	umaal	r4, cya, u1, v0
-	ldr	u0, [up, #4]
-	umaal	r5, cyb, u1, v1
-	str	r4, [rp, #4]
-	umaal	r5, cya, u0, v0
-	umaal	cya, cyb, u0, v1
-	str	r5, [rp, #8]
-	str	cya, [rp, #12]
-	str	cyb, [rp, #16]
-	sub	up, up, n, lsl #2
-	sub	rp, rp, n, lsl #2
-	add	up, up, #8
-	add	rp, rp, #24
-	subs	i, n, #4
-	sub	n, n, #2
-	ldm	up, {v0,v1,u1}
-	mov	cya, #0
-	mov	cyb, #0
-	ldr	r5, [rp, #0]
-	ldr	r4, [rp, #4]
-	add	up, up, #8
-	umaal	r5, cya, v1, v0
-	str	r5, [rp, #0]
-	ldr	r5, [rp, #8]
-	bls	L(e3)
-L(ua3):	ldr	u0, [up, #4]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #4]
-	ldr	r4, [rp, #12]
-	umaal	r5, cyb, u1, v1
-	ldr	u1, [up, #8]
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #8]
-	ldr	r5, [rp, #16]
-	umaal	r4, cyb, u0, v1
-	ldr	u0, [up, #12]
-	umaal	r4, cya, u1, v0
-	str	r4, [rp, #12]
-	ldr	r4, [rp, #20]
-	umaal	r5, cyb, u1, v1
-	ldr	u1, [up, #16]!
-	umaal	r5, cya, u0, v0
-	str	r5, [rp, #16]!
-	ldr	r5, [rp, #8]
-	umaal	r4, cyb, u0, v1
-	subs	i, i, #4
-	bhi	L(ua3)
-L(e3):
-L(am2_1m4):
-	umaal	r4, cya, u1, v0
-	ldr	u0, [up, #4]
-	umaal	r5, cyb, u1, v1
-	str	r4, [rp, #4]
-	umaal	r5, cya, u0, v0
-	umaal	cya, cyb, u0, v1
-	str	r5, [rp, #8]
-	str	cya, [rp, #12]
-	str	cyb, [rp, #16]
-	sub	up, up, n, lsl #2
-	sub	rp, rp, n, lsl #2
-	add	up, up, #8
-	add	rp, rp, #24
-	b	L(oddloop)
-
-L(xit):	ldm	up!, {v0,u0}
-	ldr	cya, [rp], #12
-	mov	cyb, #0
-	umaal	cya, cyb, u0, v0
-	b	L(sqr_diag_addlsh1)
-
-L(tix):	ldm	up!, {v0,v1,u0}
-	ldm	rp, {r4,r5}
-	mov	cya, #0
-	mov	cyb, #0
-	umaal	r4, cya, v1, v0
-	umaal	r5, cya, u0, v0
-	stm	rp, {r4,r5}
-	umaal	cya, cyb, u0, v1
-	add	rp, rp, #20
-C	b	L(sqr_diag_addlsh1)
-
-
-define(`w0',  r6)
-define(`w1',  r7)
-define(`w2',  r8)
-define(`rbx', r9)
-
-L(sqr_diag_addlsh1):
-	str	cya, [rp, #-12]
-	str	cyb, [rp, #-8]
-	sub	n, n_saved, #1
-	sub	up, up, n_saved, lsl #2
-	sub	rp, rp, n_saved, lsl #3
-	ldr	r3, [up], #4
-	umull	w1, r5, r3, r3
-	mov	w2, #0
-	mov	r10, #0
-C	cmn	r0, #0			C clear cy (already clear by luck)
-	b	L(lm)
-
-L(tsd):	adds	w0, w0, rbx
-	adcs	w1, w1, r4
-	str	w0, [rp, #0]
-L(lm):	ldr	w0, [rp, #4]
-	str	w1, [rp, #4]
-	ldr	w1, [rp, #8]!
-	add	rbx, r5, w2
-	adcs	w0, w0, w0
-	ldr	r3, [up], #4
-	adcs	w1, w1, w1
-	adc	w2, r10, r10
-	umull	r4, r5, r3, r3
-	subs	n, n, #1
-	bne	L(tsd)
-
-	adds	w0, w0, rbx
-	adcs	w1, w1, r4
-	adc	w2, r5, w2
-	stm	rp, {w0,w1,w2}
-
-	pop	{r4-r10,r11,pc}
-
-
-C Straight line code for n <= 4
-
-L(1):	ldr	r3, [up, #0]
-	umull	r1, r2, r3, r3
-	stm	rp, {r1,r2}
-	bx	r14
-
-L(2):	push	{r4-r5}
-	ldm	up, {r5,r12}
-	umull	r1, r2, r5, r5
-	umull	r3, r4, r12, r12
-	umull	r5, r12, r5, r12
-	adds	r5, r5, r5
-	adcs	r12, r12, r12
-	adc	r4, r4, #0
-	adds	r2, r2, r5
-	adcs	r3, r3, r12
-	adc	r4, r4, #0
-	stm	rp, {r1,r2,r3,r4}
-	pop	{r4-r5}
-	bx	r14
-
-L(3):	push	{r4-r11}
-	ldm	up, {r7,r8,r9}
-	umull	r1, r2, r7, r7
-	umull	r3, r4, r8, r8
-	umull	r5, r6, r9, r9
-	umull	r10, r11, r7, r8
-	mov	r12, #0
-	umlal	r11, r12, r7, r9
-	mov	r7, #0
-	umlal	r12, r7, r8, r9
-	adds	r10, r10, r10
-	adcs	r11, r11, r11
-	adcs	r12, r12, r12
-	adcs	r7, r7, r7
-	adc	r6, r6, #0
-	adds	r2, r2, r10
-	adcs	r3, r3, r11
-	adcs	r4, r4, r12
-	adcs	r5, r5, r7
-	adc	r6, r6, #0
-	stm	rp, {r1,r2,r3,r4,r5,r6}
-	pop	{r4-r11}
-	bx	r14
-
-L(4):	push	{r4-r11, r14}
-	ldm	up, {r9,r10,r11,r12}
-	umull	r1, r2, r9, r9
-	umull	r3, r4, r10, r10
-	umull	r5, r6, r11, r11
-	umull	r7, r8, r12, r12
-	stm	rp, {r1,r2,r3,r4,r5,r6,r7}
-	umull	r1, r2, r9, r10
-	mov	r3, #0
-	umlal	r2, r3, r9, r11
-	mov	r4, #0
-	umlal	r3, r4, r9, r12
-	mov	r5, #0
-	umlal	r3, r5, r10, r11
-	umaal	r4, r5, r10, r12
-	mov	r6, #0
-	umlal	r5, r6, r11, r12
-	adds	r1, r1, r1
-	adcs	r2, r2, r2
-	adcs	r3, r3, r3
-	adcs	r4, r4, r4
-	adcs	r5, r5, r5
-	adcs	r6, r6, r6
-	adc	r7, r8, #0
-	add	rp, rp, #4
-	ldm	rp, {r8,r9,r10,r11,r12,r14}
-	adds	r1, r1, r8
-	adcs	r2, r2, r9
-	adcs	r3, r3, r10
-	adcs	r4, r4, r11
-	adcs	r5, r5, r12
-	adcs	r6, r6, r14
-	adc	r7, r7, #0
-	stm	rp, {r1,r2,r3,r4,r5,r6,r7}
-	pop	{r4-r11, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6/submul_1.asm b/gmp/mpn/arm/v6/submul_1.asm
deleted file mode 100644
index 8a21733a0a..0000000000
--- a/gmp/mpn/arm/v6/submul_1.asm
+++ /dev/null
@@ -1,125 +0,0 @@
-dnl  ARM mpn_submul_1.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM:	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.75
-C Cortex-A15	 4.0
-
-C This loop complements U on the fly,
-C   U' = B^n - 1 - U
-C and then uses that
-C   R - U*v = R + U'*v + v - B^n v
-
-C TODO
-C  * Micro-optimise feed-in code.
-C  * Optimise for n=1,2 by delaying register saving.
-C  * Try using ldm/stm.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n', `r2')
-define(`v0',`r3')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	stmfd	sp!, { r4, r5, r6, r7 }
-
-	ands	r6, n, #3
-	mov	r12, v0
-	beq	L(fi0)
-	cmp	r6, #2
-	bcc	L(fi1)
-	beq	L(fi2)
-
-L(fi3):	ldr	r4, [up], #12
-	mvn	r4, r4
-	ldr	r6, [rp, #0]
-	ldr	r5, [up, #-8]
-	b	L(lo3)
-
-L(fi0):	ldr	r5, [up], #16
-	mvn	r5, r5
-	ldr	r7, [rp], #4
-	ldr	r4, [up, #-12]
-	b	L(lo0)
-
-L(fi1):	ldr	r4, [up], #4
-	mvn	r4, r4
-	ldr	r6, [rp], #8
-	subs	n, n, #1
-	beq	L(1)
-	ldr	r5, [up]
-	b	L(lo1)
-
-L(fi2):	ldr	r5, [up], #8
-	mvn	r5, r5
-	ldr	r7, [rp], #12
-	ldr	r4, [up, #-4]
-	b	L(lo2)
-
-	ALIGN(16)
-L(top):	ldr	r6, [rp, #-8]
-	ldr	r5, [up]
-	str	r7, [rp, #-12]
-L(lo1):	umaal	r6, r12, r4, v0
-	add	up, up, #16
-	mvn	r5, r5
-	ldr	r7, [rp, #-4]
-	ldr	r4, [up, #-12]
-	str	r6, [rp, #-8]
-L(lo0):	umaal	r7, r12, r5, v0
-	mvn	r4, r4
-	ldr	r6, [rp, #0]
-	ldr	r5, [up, #-8]
-	str	r7, [rp, #-4]
-L(lo3):	umaal	r6, r12, r4, v0
-	mvn	r5, r5
-	ldr	r7, [rp, #4]
-	ldr	r4, [up, #-4]
-	str	r6, [rp], #16
-L(lo2):	umaal	r7, r12, r5, v0
-	mvn	r4, r4
-	subs	n, n, #4
-	bhi	L(top)
-
-	ldr	r6, [rp, #-8]
-	str	r7, [rp, #-12]
-L(1):	umaal	r6, r12, r4, v0
-	str	r6, [rp, #-8]
-	sub	r0, v0, r12
-	ldmfd	sp!, { r4, r5, r6, r7 }
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6t2/divrem_1.asm b/gmp/mpn/arm/v6t2/divrem_1.asm
deleted file mode 100644
index be24615acb..0000000000
--- a/gmp/mpn/arm/v6t2/divrem_1.asm
+++ /dev/null
@@ -1,212 +0,0 @@
-dnl  ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		norm	unorm	frac
-C StrongARM	 -	 -	 -
-C XScale	 -	 -	 -
-C Cortex-A7	 ?	 ?	 ?
-C Cortex-A8	 ?	 ?	 ?
-C Cortex-A9	13	14	13
-C Cortex-A15	11.4	11.8	11.1
-
-C TODO
-C  * Optimise inner-loops better, they could likely run a cycle or two faster.
-C  * Decrease register usage, streamline non-loop code.
-
-define(`qp_arg',  `r0')
-define(`fn',      `r1')
-define(`up_arg',  `r2')
-define(`n_arg',   `r3')
-define(`d_arg',   `0')
-define(`dinv_arg',`4')
-define(`cnt_arg', `8')
-
-define(`n',       `r9')
-define(`qp',      `r5')
-define(`up',      `r6')
-define(`cnt',     `r7')
-define(`tnc',     `r10')
-define(`dinv',    `r0')
-define(`d',       `r4')
-
-ASM_START()
-PROLOGUE(mpn_preinv_divrem_1)
-	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	ldr	d,    [sp, #9*4+d_arg]
-	ldr	cnt,  [sp, #9*4+cnt_arg]
-	str	r1, [sp, #9*4+d_arg]	C reuse d stack slot for fn
-	sub	n, r3, #1
-	add	r3, r1, n
-	cmp	d, #0
-	add	qp, qp_arg, r3, lsl #2	C put qp at Q[] end
-	add	up, up_arg, n, lsl #2	C put up at U[] end
-	ldr	dinv, [sp, #9*4+dinv_arg]
-	blt	L(nent)
-	b	L(uent)
-EPILOGUE()
-
-PROLOGUE(mpn_divrem_1)
-	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-	sub	n, r3, #1
-	ldr	d, [sp, #9*4+d_arg]	C d
-	str	r1, [sp, #9*4+d_arg]	C reuse d stack slot for fn
-	add	r3, r1, n
-	cmp	d, #0
-	add	qp, qp_arg, r3, lsl #2	C put qp at Q[] end
-	add	up, up_arg, n, lsl #2	C put up at U[] end
-	blt	L(normalised)
-
-L(unnorm):
-	clz	cnt, d
-	mov	r0, d, lsl cnt		C pass d << cnt
-	bl	mpn_invert_limb
-L(uent):
-	mov	d, d, lsl cnt		C d <<= cnt
-	cmp	n, #0
-	mov	r1, #0			C r
-	blt	L(frac)
-
-	ldr	r11, [up, #0]
-
-	rsb	tnc, cnt, #32
-	mov	r1, r11, lsr tnc
-	mov	r11, r11, lsl cnt
-	beq	L(uend)
-
-	ldr	r3, [up, #-4]!
-	orr	r2, r11, r3, lsr tnc
-	b	L(mid)
-
-L(utop):
-	mls	r1, d, r8, r11
-	mov	r11, r3, lsl cnt
-	ldr	r3, [up, #-4]!
-	cmp	r1, r2
-	addhi	r1, r1, d
-	subhi	r8, r8, #1
-	orr	r2, r11, r3, lsr tnc
-	cmp	r1, d
-	bcs	L(ufx)
-L(uok):	str	r8, [qp], #-4
-L(mid):	add	r8, r1, #1
-	mov	r11, r2
-	umlal	r2, r8, r1, dinv
-	subs	n, n, #1
-	bne	L(utop)
-
-	mls	r1, d, r8, r11
-	mov	r11, r3, lsl cnt
-	cmp	r1, r2
-	addhi	r1, r1, d
-	subhi	r8, r8, #1
-	cmp	r1, d
-	rsbcs	r1, d, r1
-	addcs	r8, r8, #1
-	str	r8, [qp], #-4
-
-L(uend):add	r8, r1, #1
-	mov	r2, r11
-	umlal	r2, r8, r1, dinv
-	mls	r1, d, r8, r11
-	cmp	r1, r2
-	addhi	r1, r1, d
-	subhi	r8, r8, #1
-	cmp	r1, d
-	rsbcs	r1, d, r1
-	addcs	r8, r8, #1
-	str	r8, [qp], #-4
-L(frac):
-	ldr	r2, [sp, #9*4+d_arg]	C fn
-	cmp	r2, #0
-	beq	L(fend)
-
-L(ftop):mov	r6, #0
-	add	r3, r1, #1
-	umlal	r6, r3, r1, dinv
-	mov	r8, #0
-	mls	r1, d, r3, r8
-	cmp	r1, r6
-	addhi	r1, r1, d
-	subhi	r3, r3, #1
-	subs	r2, r2, #1
-	str	r3, [qp], #-4
-	bne	L(ftop)
-
-L(fend):mov	r11, r1, lsr cnt
-L(rtn):	mov	r0, r11
-	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-
-L(normalised):
-	mov	r0, d
-	bl	mpn_invert_limb
-L(nent):
-	cmp	n, #0
-	mov	r11, #0			C r
-	blt	L(nend)
-
-	ldr	r11, [up, #0]
-	cmp	r11, d
-	movlo	r2, #0			C hi q limb
-	movhs	r2, #1			C hi q limb
-	subhs	r11, r11, d
-
-	str	r2, [qp], #-4
-	cmp	n, #0
-	beq	L(nend)
-
-L(ntop):ldr	r1, [up, #-4]!
-	add	r12, r11, #1
-	umlal	r1, r12, r11, dinv
-	ldr	r3, [up, #0]
-	mls	r11, d, r12, r3
-	cmp	r11, r1
-	addhi	r11, r11, d
-	subhi	r12, r12, #1
-	cmp	d, r11
-	bls	L(nfx)
-L(nok):	str	r12, [qp], #-4
-	subs	n, n, #1
-	bne	L(ntop)
-
-L(nend):mov	r1, r11			C r
-	mov	cnt, #0			C shift cnt
-	b	L(frac)
-
-L(nfx):	add	r12, r12, #1
-	rsb	r11, d, r11
-	b	L(nok)
-L(ufx):	rsb	r1, d, r1
-	add	r8, r8, #1
-	b	L(uok)
-EPILOGUE()
diff --git a/gmp/mpn/arm/v6t2/gcd_1.asm b/gmp/mpn/arm/v6t2/gcd_1.asm
deleted file mode 100644
index 2063647963..0000000000
--- a/gmp/mpn/arm/v6t2/gcd_1.asm
+++ /dev/null
@@ -1,115 +0,0 @@
-dnl  ARM v6t2 mpn_gcd_1.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjörn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/bit (approx)
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 5.3
-C Cortex-A15	 3.5
-C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
-
-C TODO
-C  * Optimise inner-loop better.
-C  * Push saving/restoring of callee-user regs into call code
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 7)
-
-C INPUT PARAMETERS
-define(`up',    `r0')
-define(`n',     `r1')
-define(`v0',    `r2')
-
-ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
-  `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_gcd_1)
-	push	{r4, r7, lr}
-	ldr	r3, [up]	C U low limb
-
-	orr	r3, r3, v0
-	rbit	r4, r3
-	clz	r4, r4		C min(ctz(u0),ctz(v0))
-
-	rbit	r12, v0
-	clz	r12, r12
-	mov	v0, v0, lsr r12
-
-	mov	r7, v0
-
-	cmp	n, #1
-	bne	L(nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	ldr	r3, [up]
-	cmp	v0, r3, lsr #BMOD_THRES_LOG2
-	bhi	L(red1)
-
-L(bmod):mov	r3, #0		C carry argument
-	bl	mpn_modexact_1c_odd
-	b	L(red0)
-
-L(nby1):cmp	n, #BMOD_1_TO_MOD_1_THRESHOLD
-	blo	L(bmod)
-
-	bl	mpn_mod_1
-
-L(red0):mov	r3, r0
-L(red1):cmp	r3, #0
-	rbit	r12, r3
-	clz	r12, r12
-	bne	L(mid)
-	b	L(end)
-
-	ALIGN(8)
-L(top):	movcs	r3, r1		C if x-y < 0
-	movcs	r7, r0		C use x,y-x
-L(mid):	mov	r3, r3, lsr r12	C
-	mov	r0, r3		C
-	subs	r1, r7, r3	C
-	rsb	r3, r7, r3	C
-	rbit	r12, r1
-	clz	r12, r12	C
-	bne	L(top)		C
-
-L(end):	mov	r0, r7, lsl r4
-	pop	{r4, r7, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/addmul_1.asm b/gmp/mpn/arm/v7a/cora15/addmul_1.asm
deleted file mode 100644
index c2277b32b2..0000000000
--- a/gmp/mpn/arm/v7a/cora15/addmul_1.asm
+++ /dev/null
@@ -1,145 +0,0 @@
-dnl  ARM mpn_addmul_1 optimised for A15.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb		best
-C StrongARM:     -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 6			3.25
-C Cortex-A15	 2			this
-
-C This code uses umlal for adding in the rp[] data, keeping the recurrency path
-C separate from any multiply instructions.  It performs well on A15, at umlal's
-C bandwidth.
-C
-C An A9 variant should perhaps stick to 3-way unrolling, and use ldm and stm
-C for all loads and stores.  Alternatively, it could do 2-way or 4-way, but
-C then alignment aware code will be necessary (adding O(1) bookkeeping
-C overhead).
-C
-C We don't use r12 due to ldrd and strd limitations.
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-define(`v0', `r3')
-
-define(`w0', `r10') define(`w1', `r11')
-define(`u0', `r8')  define(`u1', `r9')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	push	{ r4-r11 }
-
-	ands	r6, n, #3
-	sub	n, n, #3
-	beq	L(b00)
-	cmp	r6, #2
-	bcc	L(b01)
-	beq	L(b10)
-
-L(b11):	mov	r6, #0
-	cmn	r13, #0			C carry clear
-	ldr	u1, [up], #-4
-	ldr	w1, [rp], #-4
-	mov	r7, #0
-	b	L(mid)
-
-L(b00):	ldrd	u0, u1, [up]
-	ldrd	w0, w1, [rp]
-	mov	r6, #0
-	umlal	w0, r6, u0, v0
-	cmn	r13, #0			C carry clear
-	mov	r7, #0
-	str	w0, [rp]
-	b	L(mid)
-
-L(b10):	ldrd	u0, u1, [up], #8
-	ldrd	w0, w1, [rp]
-	mov	r4, #0
-	umlal	w0, r4, u0, v0
-	cmn	r13, #0			C carry clear
-	mov	r5, #0
-	str	w0, [rp], #8
-	umlal	w1, r5, u1, v0
-	tst	n, n
-	bmi	L(end)
-	b	L(top)
-
-L(b01):	mov	r4, #0
-	ldr	u1, [up], #4
-	ldr	w1, [rp], #4
-	mov	r5, #0
-	umlal	w1, r5, u1, v0
-	tst	n, n
-	bmi	L(end)
-
-	ALIGN(16)
-L(top):	ldrd	u0, u1, [up, #0]
-	adcs	r4, r4, w1
-	ldrd	w0, w1, [rp, #0]
-	mov	r6, #0
-	umlal	w0, r6, u0, v0		C 1 2
-	adcs	r5, r5, w0
-	mov	r7, #0
-	strd	r4, r5, [rp, #-4]
-L(mid):	umlal	w1, r7, u1, v0		C 2 3
-	ldrd	u0, u1, [up, #8]
-	adcs	r6, r6, w1
-	ldrd	w0, w1, [rp, #8]
-	mov	r4, #0
-	umlal	w0, r4, u0, v0		C 3 4
-	adcs	r7, r7, w0
-	mov	r5, #0
-	strd	r6, r7, [rp, #4]
-	umlal	w1, r5, u1, v0		C 0 1
-	sub	n, n, #4
-	add	up, up, #16
-	add	rp, rp, #16
-	tst	n, n
-	bpl	L(top)
-
-L(end):	adcs	r4, r4, w1
-	str	r4, [rp, #-4]
-	adc	r0, r5, #0
-	pop	{ r4-r11 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/aors_n.asm b/gmp/mpn/arm/v7a/cora15/aors_n.asm
deleted file mode 100644
index dc3f83992e..0000000000
--- a/gmp/mpn/arm/v7a/cora15/aors_n.asm
+++ /dev/null
@@ -1,162 +0,0 @@
-dnl  ARM mpn_add_n/mpn_sub_n optimised for A15.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb		best
-C StrongARM:     -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.55			2.5
-C Cortex-A15	 1.27			this
-
-C This was a major improvement compared to the code we had before, but it might
-C not be the best 8-way code possible.  We've tried some permutations of auto-
-C increments and separate pointer updates, but they all ran at the same speed
-C on A15.
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-ifdef(`OPERATION_add_n', `
-  define(`ADDSUBC',	adcs)
-  define(`IFADD',	`$1')
-  define(`SETCY',	`cmp	$1, #1')
-  define(`RETVAL',	`adc	r0, n, #0')
-  define(`RETVAL2',	`adc	r0, n, #1')
-  define(`func',	mpn_add_n)
-  define(`func_nc',	mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-  define(`ADDSUBC',	sbcs)
-  define(`IFADD',	`')
-  define(`SETCY',	`rsbs	$1, $1, #0')
-  define(`RETVAL',	`sbc	r0, r0, r0
-			and	r0, r0, #1')
-  define(`RETVAL2',	`RETVAL')
-  define(`func',	mpn_sub_n)
-  define(`func_nc',	mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-PROLOGUE(func_nc)
-	ldr	r12, [sp]
-	b	L(ent)
-EPILOGUE()
-PROLOGUE(func)
-	mov	r12, #0
-L(ent):	push	{ r4-r9 }
-
-	ands	r6, n, #3
-	mov	n, n, lsr #2
-	beq	L(b00)
-	cmp	r6, #2
-	bcc	L(b01)
-	beq	L(b10)
-
-L(b11):	ldr	r5, [up], #4
-	ldr	r7, [vp], #4
-	SETCY(	r12)
-	ADDSUBC	r9, r5, r7
-	ldrd	r4, r5, [up, #0]
-	ldrd	r6, r7, [vp, #0]
-	str	r9, [rp], #-4
-	b	L(lo)
-
-L(b00):	ldrd	r4, r5, [up], #-8
-	ldrd	r6, r7, [vp], #-8
-	SETCY(	r12)
-	sub	rp, rp, #16
-	b	L(mid)
-
-L(b01):	ldr	r5, [up], #-4
-	ldr	r7, [vp], #-4
-	SETCY(	r12)
-	ADDSUBC	r9, r5, r7
-	str	r9, [rp], #-12
-	tst	n, n
-	beq	L(wd1)
-L(gt1):	ldrd	r4, r5, [up, #8]
-	ldrd	r6, r7, [vp, #8]
-	b	L(mid)
-
-L(b10):	ldrd	r4, r5, [up]
-	ldrd	r6, r7, [vp]
-	SETCY(	r12)
-	sub	rp, rp, #8
-	b	L(lo)
-
-	ALIGN(16)
-L(top):	ldrd	r4, r5, [up, #8]
-	ldrd	r6, r7, [vp, #8]
-	strd	r8, r9, [rp, #8]
-L(mid):	ADDSUBC	r8, r4, r6
-	ADDSUBC	r9, r5, r7
-	ldrd	r4, r5, [up, #16]
-	ldrd	r6, r7, [vp, #16]
-	strd	r8, r9, [rp, #16]
-	ADDSUBC	r8, r4, r6
-	ADDSUBC	r9, r5, r7
-	sub	n, n, #2
-	tst	n, n
-	bmi	L(dne)
-	ldrd	r4, r5, [up, #24]
-	ldrd	r6, r7, [vp, #24]
-	strd	r8, r9, [rp, #24]
-	ADDSUBC	r8, r4, r6
-	ADDSUBC	r9, r5, r7
-	ldrd	r4, r5, [up, #32]!
-	ldrd	r6, r7, [vp, #32]!
-	strd	r8, r9, [rp, #32]!
-L(lo):	ADDSUBC	r8, r4, r6
-	ADDSUBC	r9, r5, r7
-	tst	n, n
-	bne	L(top)
-
-L(end):	strd	r8, r9, [rp, #8]
-L(wd1):	RETVAL
-	pop	{ r4-r9 }
-	bx	r14
-L(dne):	strd	r8, r9, [rp, #24]
-	RETVAL2
-	pop	{ r4-r9 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm b/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm
deleted file mode 100644
index b9e5cd3f79..0000000000
--- a/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm
+++ /dev/null
@@ -1,158 +0,0 @@
-dnl  ARM mpn_cnd_add_n/mpn_cnd_sub_n optimised for A15.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb		best
-C StrongARM:     -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 3.75			 3
-C Cortex-A15	 1.78			this
-
-C This code does not run as well as one could have hoped, since 1.5 c/l seems
-C realistic for this insn mix.
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`cnd',`r0')
-define(`rp', `r1')
-define(`up', `r2')
-define(`vp', `r3')
-define(`n',  `r12')
-
-ifdef(`OPERATION_cnd_add_n', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`IFADD',	`$1')
-  define(`INITCY',      `cmn	r0, #0')
-  define(`RETVAL',	`adc	r0, n, #0')
-  define(`RETVAL2',	`adc	r0, n, #1')
-  define(`func',	mpn_cnd_add_n)
-  define(`func_nc',	mpn_add_nc)')
-ifdef(`OPERATION_cnd_sub_n', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`IFADD',	`')
-  define(`INITCY',      `cmp	r0, #0')
-  define(`RETVAL',	`sbc	r0, r0, r0
-			and	r0, r0, #1')
-  define(`RETVAL2',	`RETVAL')
-  define(`func',	mpn_cnd_sub_n)
-  define(`func_nc',	mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	ldr	n, [sp]
-	push	{ r4-r9 }
-
-	cmp	cnd, #1
-	sbc	cnd, cnd, cnd		C conditionally set to 0xffffffff
-
-	ands	r6, n, #3
-	mov	n, n, lsr #2
-	beq	L(b00)
-	cmp	r6, #2
-	bcc	L(b01)
-	beq	L(b10)
-
-L(b11):	ldr	r5, [up], #4
-	ldr	r7, [vp], #4
-	bic	r7, r7, cnd
-	ADDSUB	r9, r5, r7
-	ldrd	r4, r5, [up, #0]
-	ldrd	r6, r7, [vp, #0]
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	str	r9, [rp], #-4
-	b	L(lo)
-
-L(b00):	ldrd	r4, r5, [up], #-8
-	ldrd	r6, r7, [vp], #-8
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	INITCY
-	sub	rp, rp, #16
-	b	L(mid)
-
-L(b01):	ldr	r5, [up], #-4
-	ldr	r7, [vp], #-4
-	bic	r7, r7, cnd
-	ADDSUB	r9, r5, r7
-	str	r9, [rp], #-12
-	tst	n, n
-	beq	L(wd1)
-L(gt1):	ldrd	r4, r5, [up, #8]
-	ldrd	r6, r7, [vp, #8]
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	b	L(mid)
-
-L(b10):	ldrd	r4, r5, [up]
-	ldrd	r6, r7, [vp]
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	INITCY
-	sub	rp, rp, #8
-	b	L(lo)
-
-	ALIGN(16)
-L(top):	ldrd	r6, r7, [vp, #8]
-	ldrd	r4, r5, [up, #8]
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	strd	r8, r9, [rp, #8]
-L(mid):	ADDSUBC	r8, r4, r6
-	ADDSUBC	r9, r5, r7
-	ldrd	r6, r7, [vp, #16]!
-	ldrd	r4, r5, [up, #16]!
-	bic	r6, r6, cnd
-	bic	r7, r7, cnd
-	sub	n, n, #1
-	strd	r8, r9, [rp, #16]!
-L(lo):	ADDSUBC	r8, r4, r6
-	ADDSUBC	r9, r5, r7
-	tst	n, n
-	bne	L(top)
-
-L(end):	strd	r8, r9, [rp, #8]
-L(wd1):	RETVAL
-	pop	{ r4-r9 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/com.asm b/gmp/mpn/arm/v7a/cora15/com.asm
deleted file mode 100644
index a258afe934..0000000000
--- a/gmp/mpn/arm/v7a/cora15/com.asm
+++ /dev/null
@@ -1,180 +0,0 @@
-dnl  ARM mpn_com optimised for A15.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	2.5
-C Cortex-A15	1.0
-
-C This is great A15 core register code, but it is a bit large.
-C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
-define(`UNROLL', 4x2)		C alternatives: 4 4x2
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_com)
-	push	{ r4-r5,r8-r9 }
-
-ifelse(FEEDIN_VARIANT,0,`
-	ands	r12, n, #3
-	mov	n, n, lsr #2
-	beq	L(b00a)
-	tst	r12, #1
-	beq	L(bx0)
-	ldr	r5, [up], #4
-	mvn	r9, r5
-	str	r9, [rp], #4
-	tst	r12, #2
-	beq	L(b00)
-L(bx0):	ldrd	r4, r5, [up, #0]
-	sub	rp, rp, #8
-	b	L(lo)
-L(b00):	tst	n, n
-	beq	L(wd1)
-L(b00a):ldrd	r4, r5, [up], #-8
-	sub	rp, rp, #16
-	b	L(mid)
-')
-ifelse(FEEDIN_VARIANT,1,`
-	and	r12, n, #3
-	mov	n, n, lsr #2
-	tst	r12, #1
-	beq	L(bx0)
-	ldr	r5, [up], #4
-	mvn	r9, r5
-	str	r9, [rp], #4
-L(bx0):	tst	r12, #2
-	beq	L(b00)
-	ldrd	r4, r5, [up, #0]
-	sub	rp, rp, #8
-	b	L(lo)
-L(b00):	tst	n, n
-	beq	L(wd1)
-	ldrd	r4, r5, [up], #-8
-	sub	rp, rp, #16
-	b	L(mid)
-')
-ifelse(FEEDIN_VARIANT,2,`
-	ands	r12, n, #3
-	mov	n, n, lsr #2
-	beq	L(b00)
-	cmp	r12, #2
-	bcc	L(b01)
-	beq	L(b10)
-
-L(b11):	ldr	r5, [up], #4
-	mvn	r9, r5
-	ldrd	r4, r5, [up, #0]
-	str	r9, [rp], #-4
-	b	L(lo)
-
-L(b00):	ldrd	r4, r5, [up], #-8
-	sub	rp, rp, #16
-	b	L(mid)
-
-L(b01):	ldr	r5, [up], #-4
-	mvn	r9, r5
-	str	r9, [rp], #-12
-	tst	n, n
-	beq	L(wd1)
-L(gt1):	ldrd	r4, r5, [up, #8]
-	b	L(mid)
-
-L(b10):	ldrd	r4, r5, [up]
-	sub	rp, rp, #8
-	b	L(lo)
-')
-	ALIGN(16)
-ifelse(UNROLL,4,`
-L(top):	ldrd	r4, r5, [up, #8]
-	strd	r8, r9, [rp, #8]
-L(mid):	mvn	r8, r4
-	mvn	r9, r5
-	ldrd	r4, r5, [up, #16]!
-	strd	r8, r9, [rp, #16]!
-	sub	n, n, #1
-L(lo):	mvn	r8, r4
-	mvn	r9, r5
-	tst	n, n
-	bne	L(top)
-')
-ifelse(UNROLL,4x2,`
-L(top):	ldrd	r4, r5, [up, #8]
-	strd	r8, r9, [rp, #8]
-L(mid):	mvn	r8, r4
-	mvn	r9, r5
-	ldrd	r4, r5, [up, #16]
-	strd	r8, r9, [rp, #16]
-	mvn	r8, r4
-	mvn	r9, r5
-	sub	n, n, #2
-	tst	n, n
-	bmi	L(dne)
-	ldrd	r4, r5, [up, #24]
-	strd	r8, r9, [rp, #24]
-	mvn	r8, r4
-	mvn	r9, r5
-	ldrd	r4, r5, [up, #32]!
-	strd	r8, r9, [rp, #32]!
-L(lo):	mvn	r8, r4
-	mvn	r9, r5
-	tst	n, n
-	bne	L(top)
-')
-
-L(end):	strd	r8, r9, [rp, #8]
-L(wd1):	pop	{ r4-r5,r8-r9 }
-	bx	r14
-ifelse(UNROLL,4x2,`
-L(dne):	strd	r8, r9, [rp, #24]
-	pop	{ r4-r5,r8-r9 }
-	bx	r14
-')
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/gmp-mparam.h b/gmp/mpn/arm/v7a/cora15/gmp-mparam.h
deleted file mode 100644
index 2a06532b3e..0000000000
--- a/gmp/mpn/arm/v7a/cora15/gmp-mparam.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012-2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1700MHz Cortex-A15 with Neon (in spite of file position) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           15
-
-#define MUL_TOOM22_THRESHOLD                23
-#define MUL_TOOM33_THRESHOLD                90
-#define MUL_TOOM44_THRESHOLD               262
-#define MUL_TOOM6H_THRESHOLD               351
-#define MUL_TOOM8H_THRESHOLD               557
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      90
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     169
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 43
-#define SQR_TOOM3_THRESHOLD                138
-#define SQR_TOOM4_THRESHOLD                363
-#define SQR_TOOM6_THRESHOLD                517
-#define SQR_TOOM8_THRESHOLD                725
-
-#define MULMID_TOOM42_THRESHOLD             52
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               23
-
-#define MUL_FFT_MODF_THRESHOLD             550  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    550, 5}, {     25, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 6}, {     39, 7}, {     25, 6}, \
-    {     51, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     51, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     99, 9}, {     55,10}, {     31, 9}, \
-    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
-    {     95, 9}, {    191,10}, {    111,11}, {     63,10}, \
-    {    159,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
-    {    351,11}, {    191,10}, {    383, 9}, {    767,10}, \
-    {    399, 9}, {    799,10}, {    415,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    671,11}, {    351,12}, \
-    {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
-    {    831,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,11}, {    607,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    735,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    831,12}, {    447,11}, {    895,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    575,11}, \
-    {   1151,12}, {    639,11}, {   1279,12}, {    703,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1663,12}, {    895,13}, {    511,12}, {   1087,13}, \
-    {    639,12}, {   1407,13}, {    767,12}, {   1599,13}, \
-    {    895,14}, {    511,13}, {   1023,12}, {   2111,13}, \
-    {   1151,12}, {   2431,13}, {   1279,14}, {    767,13}, \
-    {   1535,12}, {   3071,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1279,13}, {   2559,12}, {   5119,13}, \
-    {   2815,12}, {   5631,13}, {   2943,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 137
-#define MUL_FFT_THRESHOLD                 5760
-
-#define SQR_FFT_MODF_THRESHOLD             525  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    525, 5}, {     25, 6}, {     27, 7}, {     15, 6}, \
-    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     25, 6}, {     51, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     51, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     31, 7}, {     63, 8}, {     39, 9}, \
-    {     23, 8}, {     55,10}, {     15, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    143, 9}, \
-    {    287,10}, {    159,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
-    {    159,10}, {    335, 9}, {    671,10}, {    351,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
-    {    799,10}, {    415,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,11}, {    351,12}, {    191,11}, \
-    {    383,10}, {    799,11}, {    415,10}, {    831,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    831,12}, {    447,11}, {    895,12}, {    511,11}, \
-    {   1023,12}, {    575,11}, {   1151,12}, {    639,11}, \
-    {   1343,12}, {    703,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1663,12}, {    895,13}, \
-    {    511,12}, {   1087,13}, {    639,12}, {   1407,13}, \
-    {    767,12}, {   1727,13}, {    895,14}, {    511,13}, \
-    {   1023,12}, {   2047,13}, {   1151,12}, {   2431,13}, \
-    {   1279,14}, {    767,13}, {   1535,12}, {   3071,15}, \
-    {    511,14}, {   1023,13}, {   2047,12}, {   4095,13}, \
-    {   2175,14}, {   1279,13}, {   2559,12}, {   5119,13}, \
-    {   2687,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 139
-#define SQR_FFT_THRESHOLD                 4736
-
-#define MULLO_BASECASE_THRESHOLD             9
-#define MULLO_DC_THRESHOLD                  39
-#define MULLO_MUL_N_THRESHOLD            11278
-
-#define DC_DIV_QR_THRESHOLD                 54
-#define DC_DIVAPPR_Q_THRESHOLD             296
-#define DC_BDIV_QR_THRESHOLD                52
-#define DC_BDIV_Q_THRESHOLD                300
-
-#define INV_MULMOD_BNM1_THRESHOLD           44
-#define INV_NEWTON_THRESHOLD               294
-#define INV_APPR_THRESHOLD                 294
-
-#define BINV_NEWTON_THRESHOLD              375
-#define REDC_1_TO_REDC_2_THRESHOLD         102
-#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
-
-#define MU_DIV_QR_THRESHOLD               1718
-#define MU_DIVAPPR_Q_THRESHOLD            1718
-#define MUPI_DIV_QR_THRESHOLD              108
-#define MU_BDIV_QR_THRESHOLD              1528
-#define MU_BDIV_Q_THRESHOLD               1718
-
-#define POWM_SEC_TABLE  3,32,70,416,1464
-
-#define MATRIX22_STRASSEN_THRESHOLD         22
-#define HGCD_THRESHOLD                     152
-#define HGCD_APPR_THRESHOLD                230
-#define HGCD_REDUCE_THRESHOLD             3259
-#define GCD_DC_THRESHOLD                   702
-#define GCDEXT_DC_THRESHOLD                538
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                18
-#define GET_STR_PRECOMPUTE_THRESHOLD        32
-#define SET_STR_DC_THRESHOLD               119
-#define SET_STR_PRECOMPUTE_THRESHOLD      1063
-
-#define FAC_DSC_THRESHOLD                  262
-#define FAC_ODD_THRESHOLD                   26
diff --git a/gmp/mpn/arm/v7a/cora15/logops_n.asm b/gmp/mpn/arm/v7a/cora15/logops_n.asm
deleted file mode 100644
index 06026143e1..0000000000
--- a/gmp/mpn/arm/v7a/cora15/logops_n.asm
+++ /dev/null
@@ -1,253 +0,0 @@
-dnl  ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc, optimised for A15.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb             cycles/limb
-C          and andn ior xor         nand iorn nior xnor
-C StrongARM	 ?			 ?
-C XScale	 ?			 ?
-C Cortex-A7	 ?			 ?
-C Cortex-A8	 ?			 ?
-C Cortex-A9	3.5			3.56
-C Cortex-A15	1.27			1.64
-
-C This is great A15 core register code, but it is a bit large.
-C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	-
-C v6t2	-
-C v7a	-
-
-define(`FEEDIN_VARIANT', 1)	C alternatives: 0 1 2
-define(`UNROLL', 4x2)		C alternatives: 4 4x2
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-define(`POSTOP')
-
-ifdef(`OPERATION_and_n',`
-  define(`func',    `mpn_and_n')
-  define(`LOGOP',   `and	$1, $2, $3')')
-ifdef(`OPERATION_andn_n',`
-  define(`func',    `mpn_andn_n')
-  define(`LOGOP',   `bic	$1, $2, $3')')
-ifdef(`OPERATION_nand_n',`
-  define(`func',    `mpn_nand_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `and	$1, $2, $3')')
-ifdef(`OPERATION_ior_n',`
-  define(`func',    `mpn_ior_n')
-  define(`LOGOP',   `orr	$1, $2, $3')')
-ifdef(`OPERATION_iorn_n',`
-  define(`func',    `mpn_iorn_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `bic	$1, $3, $2')')
-ifdef(`OPERATION_nior_n',`
-  define(`func',    `mpn_nior_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `orr	$1, $2, $3')')
-ifdef(`OPERATION_xor_n',`
-  define(`func',    `mpn_xor_n')
-  define(`LOGOP',   `eor	$1, $2, $3')')
-ifdef(`OPERATION_xnor_n',`
-  define(`func',    `mpn_xnor_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `eor	$1, $2, $3')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-ASM_START()
-PROLOGUE(func)
-	push	{ r4-r9 }
-
-ifelse(FEEDIN_VARIANT,0,`
-	ands	r6, n, #3
-	mov	n, n, lsr #2
-	beq	L(b00a)
-	tst	r6, #1
-	beq	L(bx0)
-	ldr	r5, [up], #4
-	ldr	r7, [vp], #4
-	LOGOP(	r9, r5, r7)
-	POSTOP(	r9)
-	str	r9, [rp], #4
-	tst	r6, #2
-	beq	L(b00)
-L(bx0):	ldrd	r4, r5, [up, #0]
-	ldrd	r6, r7, [vp, #0]
-	sub	rp, rp, #8
-	b	L(lo)
-L(b00):	tst	n, n
-	beq	L(wd1)
-L(b00a):ldrd	r4, r5, [up], #-8
-	ldrd	r6, r7, [vp], #-8
-	sub	rp, rp, #16
-	b	L(mid)
-')
-ifelse(FEEDIN_VARIANT,1,`
-	and	r6, n, #3
-	mov	n, n, lsr #2
-	tst	r6, #1
-	beq	L(bx0)
-	ldr	r5, [up], #4
-	ldr	r7, [vp], #4
-	LOGOP(	r9, r5, r7)
-	POSTOP(	r9)
-	str	r9, [rp], #4
-L(bx0):	tst	r6, #2
-	beq	L(b00)
-	ldrd	r4, r5, [up, #0]
-	ldrd	r6, r7, [vp, #0]
-	sub	rp, rp, #8
-	b	L(lo)
-L(b00):	tst	n, n
-	beq	L(wd1)
-	ldrd	r4, r5, [up], #-8
-	ldrd	r6, r7, [vp], #-8
-	sub	rp, rp, #16
-	b	L(mid)
-')
-ifelse(FEEDIN_VARIANT,2,`
-	ands	r6, n, #3
-	mov	n, n, lsr #2
-	beq	L(b00)
-	cmp	r6, #2
-	bcc	L(b01)
-	beq	L(b10)
-
-L(b11):	ldr	r5, [up], #4
-	ldr	r7, [vp], #4
-	LOGOP(	r9, r5, r7)
-	ldrd	r4, r5, [up, #0]
-	ldrd	r6, r7, [vp, #0]
-	POSTOP(	r9)
-	str	r9, [rp], #-4
-	b	L(lo)
-
-L(b00):	ldrd	r4, r5, [up], #-8
-	ldrd	r6, r7, [vp], #-8
-	sub	rp, rp, #16
-	b	L(mid)
-
-L(b01):	ldr	r5, [up], #-4
-	ldr	r7, [vp], #-4
-	LOGOP(	r9, r5, r7)
-	POSTOP(	r9)
-	str	r9, [rp], #-12
-	tst	n, n
-	beq	L(wd1)
-L(gt1):	ldrd	r4, r5, [up, #8]
-	ldrd	r6, r7, [vp, #8]
-	b	L(mid)
-
-L(b10):	ldrd	r4, r5, [up]
-	ldrd	r6, r7, [vp]
-	sub	rp, rp, #8
-	b	L(lo)
-')
-	ALIGN(16)
-ifelse(UNROLL,4,`
-L(top):	ldrd	r4, r5, [up, #8]
-	ldrd	r6, r7, [vp, #8]
-	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #8]
-L(mid):	LOGOP(	r8, r4, r6)
-	LOGOP(	r9, r5, r7)
-	ldrd	r4, r5, [up, #16]!
-	ldrd	r6, r7, [vp, #16]!
-	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #16]!
-	sub	n, n, #1
-L(lo):	LOGOP(	r8, r4, r6)
-	LOGOP(	r9, r5, r7)
-	tst	n, n
-	bne	L(top)
-')
-ifelse(UNROLL,4x2,`
-L(top):	ldrd	r4, r5, [up, #8]
-	ldrd	r6, r7, [vp, #8]
-	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #8]
-L(mid):	LOGOP(	r8, r4, r6)
-	LOGOP(	r9, r5, r7)
-	ldrd	r4, r5, [up, #16]
-	ldrd	r6, r7, [vp, #16]
-	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #16]
-	LOGOP(	r8, r4, r6)
-	LOGOP(	r9, r5, r7)
-	sub	n, n, #2
-	tst	n, n
-	bmi	L(dne)
-	ldrd	r4, r5, [up, #24]
-	ldrd	r6, r7, [vp, #24]
-	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #24]
-	LOGOP(	r8, r4, r6)
-	LOGOP(	r9, r5, r7)
-	ldrd	r4, r5, [up, #32]!
-	ldrd	r6, r7, [vp, #32]!
-	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #32]!
-L(lo):	LOGOP(	r8, r4, r6)
-	LOGOP(	r9, r5, r7)
-	tst	n, n
-	bne	L(top)
-')
-
-L(end):	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #8]
-L(wd1):	pop	{ r4-r9 }
-	bx	r14
-ifelse(UNROLL,4x2,`
-L(dne):	POSTOP(	r8)
-	POSTOP(	r9)
-	strd	r8, r9, [rp, #24]
-	pop	{ r4-r9 }
-	bx	r14
-')
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/mul_1.asm b/gmp/mpn/arm/v7a/cora15/mul_1.asm
deleted file mode 100644
index 766ba5c57f..0000000000
--- a/gmp/mpn/arm/v7a/cora15/mul_1.asm
+++ /dev/null
@@ -1,104 +0,0 @@
-dnl  ARM mpn_mul_1 optimised for A15.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb		best
-C StrongARM:	 -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 5.25			3.25
-C Cortex-A15	 2.25			this
-
-
-C This runs well on A15 but very poorly on A9.  By scheduling loads and adds
-C it is possible to get good A9 performance as well, but at the cost of using
-C many more (callee-saves) registers.
-
-C This is armv5 code, optimized for the armv7a cpu A15.  Its location in the
-C GMP file structure might be misleading.
-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-define(`v0', `r3')
-
-ASM_START()
-PROLOGUE(mpn_mul_1c)
-	ldr	r12, [sp]
-	b	L(ent)
-EPILOGUE()
-PROLOGUE(mpn_mul_1)
-	mov	r12, #0
-L(ent):	push	{r4-r7}
-
-	ldr	r6, [up], #4
-	tst	n, #1
-	beq	L(bx0)
-
-L(bx1):	umull	r4, r7, r6, v0
-	adds	r4, r4, r12
-	tst	n, #2
-	beq	L(lo1)
-	b	L(lo3)
-
-L(bx0):	umull	r4, r5, r6, v0
-	adds	r4, r4, r12
-	tst	n, #2
-	beq	L(lo0)
-	b	L(lo2)
-
-L(top):	ldr	r6, [up], #4
-	str	r4, [rp], #4
-	umull	r4, r5, r6, v0
-	adds	r4, r4, r7
-L(lo0):	ldr	r6, [up], #4
-	str	r4, [rp], #4
-	umull	r4, r7, r6, v0
-	adcs	r4, r4, r5
-L(lo3):	ldr	r6, [up], #4
-	str	r4, [rp], #4
-	umull	r4, r5, r6, v0
-	adcs	r4, r4, r7
-L(lo2):	ldr	r6, [up], #4
-	str	r4, [rp], #4
-	umull	r4, r7, r6, v0
-	adcs	r4, r4, r5
-L(lo1):	adc	r7, r7, #0
-	subs	n, n, #4
-	bgt	L(top)
-
-	str	r4, [rp]
-	mov	r0, r7
-	pop	{r4-r7}
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
deleted file mode 100644
index d8cfe3f78f..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl  ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH,		1)
-
-ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
-
-include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
deleted file mode 100644
index b48204d926..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl  ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH,		2)
-
-ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
-
-include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
deleted file mode 100644
index 16c34a2699..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl  ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 5.25
-C Cortex-A15	 2.25
-
-C TODO
-C  * Consider using 4-way feed-in code.
-C  * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
-C    insufficiently for A7 and A8.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-ifdef(`DO_add', `
-  define(`ADCSBCS',	`adcs	$1, $2, $3')
-  define(`CLRCY',	`cmn	r13, #1')
-  define(`RETVAL',	`adc	r0, $1, #0')
-  define(`func',	mpn_addlsh`'LSH`'_n)')
-ifdef(`DO_sub', `
-  define(`ADCSBCS',	`sbcs	$1, $2, $3')
-  define(`CLRCY',	`cmp	r13, #0')
-  define(`RETVAL',	`sbc	$2, $2, $2
-			cmn	$2, #1
-			adc	 r0, $1, #0')
-  define(`func',	mpn_sublsh`'LSH`'_n)')
-ifdef(`DO_rsb', `
-  define(`ADCSBCS',	`sbcs	$1, $3, $2')
-  define(`CLRCY',	`cmp	r13, #0')
-  define(`RETVAL',	`sbc	r0, $1, #0')
-  define(`func',	mpn_rsblsh`'LSH`'_n)')
-
-
-ASM_START()
-PROLOGUE(func)
-	push	 {r4-r10}
-	vmov.i8	 d0, #0			C could feed carry through here
-	CLRCY
-	tst	n, #1
-	beq	L(bb0)
-
-L(bb1):	vld1.32	 {d3[0]}, [vp]!
-	vsli.u32 d0, d3, #LSH
-	ldr	 r12, [up], #4
-	vmov.32	 r5, d0[0]
-	vshr.u32 d0, d3, #32-LSH
-	ADCSBCS( r12, r12, r5)
-	str	 r12, [rp], #4
-	bics	 n, n, #1
-	beq	 L(rtn)
-
-L(bb0):	tst	n, #2
-	beq	L(b00)
-
-L(b10):	vld1.32	 {d3}, [vp]!
-	vsli.u64 d0, d3, #LSH
-	ldmia	 up!, {r10,r12}
-	vmov	 r4, r5, d0
-	vshr.u64 d0, d3, #64-LSH
-	ADCSBCS( r10, r10, r4)
-	ADCSBCS( r12, r12, r5)
-	stmia	 rp!, {r10,r12}
-	bics	 n, n, #2
-	beq	 L(rtn)
-
-L(b00):	vld1.32	 {d2}, [vp]!
-	vsli.u64 d0, d2, #LSH
-	vshr.u64 d1, d2, #64-LSH
-	vld1.32	 {d3}, [vp]!
-	vsli.u64 d1, d3, #LSH
-	vmov	 r6, r7, d0
-	vshr.u64 d0, d3, #64-LSH
-	sub	 n, n, #4
-	tst	 n, n
-	beq	 L(end)
-
-	ALIGN(16)
-L(top):	ldmia	 up!, {r8,r9,r10,r12}
-	vld1.32	 {d2}, [vp]!
-	vsli.u64 d0, d2, #LSH
-	vmov	 r4, r5, d1
-	vshr.u64 d1, d2, #64-LSH
-	ADCSBCS( r8, r8, r6)
-	ADCSBCS( r9, r9, r7)
-	vld1.32	 {d3}, [vp]!
-	vsli.u64 d1, d3, #LSH
-	vmov	 r6, r7, d0
-	vshr.u64 d0, d3, #64-LSH
-	ADCSBCS( r10, r10, r4)
-	ADCSBCS( r12, r12, r5)
-	stmia	 rp!, {r8,r9,r10,r12}
-	sub	 n, n, #4
-	tst	 n, n
-	bne	 L(top)
-
-L(end):	ldmia	 up!, {r8,r9,r10,r12}
-	vmov	 r4, r5, d1
-	ADCSBCS( r8, r8, r6)
-	ADCSBCS( r9, r9, r7)
-	ADCSBCS( r10, r10, r4)
-	ADCSBCS( r12, r12, r5)
-	stmia	 rp!, {r8,r9,r10,r12}
-L(rtn):	vmov.32	 r0, d0[0]
-	RETVAL(	 r0, r1)
-	pop	 {r4-r10}
-	bx	 r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/com.asm b/gmp/mpn/arm/v7a/cora15/neon/com.asm
deleted file mode 100644
index 9e7a629287..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/com.asm
+++ /dev/null
@@ -1,97 +0,0 @@
-dnl  ARM Neon mpn_com optimised for A15.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C StrongARM	 ?
-C XScale	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 2.1
-C Cortex-A15	 0.65
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_com)
-	cmp		n, #7
-	ble		L(bc)
-
-C Perform a few initial operation until rp is 128-bit aligned
-	tst		rp, #4
-	beq		L(al1)
-	vld1.32		{d0[0]}, [up]!
-	sub		n, n, #1
-	vmvn		d0, d0
-	vst1.32		{d0[0]}, [rp]!
-L(al1):	tst		rp, #8
-	beq		L(al2)
-	vld1.32		{d0}, [up]!
-	sub		n, n, #2
-	vmvn		d0, d0
-	vst1.32		{d0}, [rp:64]!
-L(al2):	vld1.32		{q2}, [up]!
-	subs		n, n, #12
-	blt		L(end)
-
-	ALIGN(16)
-L(top):	vld1.32		{q0}, [up]!
-	vmvn		q2, q2
-	subs		n, n, #8
-	vst1.32		{q2}, [rp:128]!
-	vld1.32		{q2}, [up]!
-	vmvn		q0, q0
-	vst1.32		{q0}, [rp:128]!
-	bge	L(top)
-
-L(end):	vmvn		q2, q2
-	vst1.32		{q2}, [rp:128]!
-
-C Handle last 0-7 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tst		n, #4
-	beq		L(tl1)
-	vld1.32		{q0}, [up]!
-	vmvn		q0, q0
-	vst1.32		{q0}, [rp]!
-L(tl1):	tst		n, #2
-	beq		L(tl2)
-	vld1.32		{d0}, [up]!
-	vmvn		d0, d0
-	vst1.32		{d0}, [rp]!
-L(tl2):	tst		n, #1
-	beq		L(tl3)
-	vld1.32		{d0[0]}, [up]
-	vmvn		d0, d0
-	vst1.32		{d0[0]}, [rp]
-L(tl3):	bx		lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm b/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
deleted file mode 100644
index 98fe535def..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
+++ /dev/null
@@ -1,110 +0,0 @@
-dnl  ARM Neon mpn_copyd optimised for A15.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.75		slower than core register code
-C Cortex-A15	 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
-	add	rp, rp, n, lsl #2
-	add	up, up, n, lsl #2
-
-	cmp	n, #7
-	ble	L(bc)
-
-C Copy until rp is 128-bit aligned
-	tst	rp, #4
-	beq	L(al1)
-	sub	up, up, #4
-	vld1.32	{d22[0]}, [up]
-	sub	n, n, #1
-	sub	rp, rp, #4
-	vst1.32	{d22[0]}, [rp]
-L(al1):	tst	rp, #8
-	beq	L(al2)
-	sub	up, up, #8
-	vld1.32	{d22}, [up]
-	sub	n, n, #2
-	sub	rp, rp, #8
-	vst1.32	{d22}, [rp:64]
-L(al2):	sub	up, up, #16
-	vld1.32	{d26-d27}, [up]
-	subs	n, n, #12
-	sub	rp, rp, #16			C offset rp for loop
-	blt	L(end)
-
-	sub	up, up, #16			C offset up for loop
-	mov	r12, #-16
-
-	ALIGN(16)
-L(top):	vld1.32	{d22-d23}, [up], r12
-	vst1.32	{d26-d27}, [rp:128], r12
-	vld1.32	{d26-d27}, [up], r12
-	vst1.32	{d22-d23}, [rp:128], r12
-	subs	n, n, #8
-	bge	L(top)
-
-	add	up, up, #16			C undo up offset
-						C rp offset undoing folded
-L(end):	vst1.32	{d26-d27}, [rp:128]
-
-C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tst	n, #4
-	beq	L(tl1)
-	sub	up, up, #16
-	vld1.32	{d22-d23}, [up]
-	sub	rp, rp, #16
-	vst1.32	{d22-d23}, [rp]
-L(tl1):	tst	n, #2
-	beq	L(tl2)
-	sub	up, up, #8
-	vld1.32	{d22}, [up]
-	sub	rp, rp, #8
-	vst1.32	{d22}, [rp]
-L(tl2):	tst	n, #1
-	beq	L(tl3)
-	sub	up, up, #4
-	vld1.32	{d22[0]}, [up]
-	sub	rp, rp, #4
-	vst1.32	{d22[0]}, [rp]
-L(tl3):	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm b/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
deleted file mode 100644
index 2e05afe5e8..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
+++ /dev/null
@@ -1,90 +0,0 @@
-dnl  ARM Neon mpn_copyi optimised for A15.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 1.75		slower than core register code
-C Cortex-A15	 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
-	cmp	n, #7
-	ble	L(bc)
-
-C Copy until rp is 128-bit aligned
-	tst	rp, #4
-	beq	L(al1)
-	vld1.32	{d22[0]}, [up]!
-	sub	n, n, #1
-	vst1.32	{d22[0]}, [rp]!
-L(al1):	tst	rp, #8
-	beq	L(al2)
-	vld1.32	{d22}, [up]!
-	sub	n, n, #2
-	vst1.32	{d22}, [rp:64]!
-L(al2):	vld1.32	{d26-d27}, [up]!
-	subs	n, n, #12
-	blt	L(end)
-
-	ALIGN(16)
-L(top):	vld1.32	{d22-d23}, [up]!
-	vst1.32	{d26-d27}, [rp:128]!
-	vld1.32	{d26-d27}, [up]!
-	vst1.32	{d22-d23}, [rp:128]!
-	subs	n, n, #8
-	bge	L(top)
-
-L(end):	vst1.32	{d26-d27}, [rp:128]!
-
-C Copy last 0-7 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tst	n, #4
-	beq	L(tl1)
-	vld1.32	{d22-d23}, [up]!
-	vst1.32	{d22-d23}, [rp]!
-L(tl1):	tst	n, #2
-	beq	L(tl2)
-	vld1.32	{d22}, [up]!
-	vst1.32	{d22}, [rp]!
-L(tl2):	tst	n, #1
-	beq	L(tl3)
-	vld1.32	{d22[0]}, [up]
-	vst1.32	{d22[0]}, [rp]
-L(tl3):	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm b/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
deleted file mode 100644
index 2c11d6debd..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
+++ /dev/null
@@ -1,177 +0,0 @@
-dnl  ARM Neon mpn_rsh1add_n, mpn_rsh1sub_n.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C StrongARM	 -
-C XScale	 -
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	4-5
-C Cortex-A15	 2.5
-
-C TODO
-C  * Try to make this smaller, its size (384 bytes) is excessive.
-C  * Try to reach 2.25 c/l on A15, to match the addlsh_1 family.
-C  * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
-C    insufficiently for A7 and A8.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n',  `r3')
-
-ifdef(`OPERATION_rsh1add_n', `
-  define(`ADDSUBS',	`adds	$1, $2, $3')
-  define(`ADCSBCS',	`adcs	$1, $2, $3')
-  define(`IFADD',	`$1')
-  define(`IFSUB',	`')
-  define(`func',	mpn_rsh1add_n)')
-ifdef(`OPERATION_rsh1sub_n', `
-  define(`ADDSUBS',	`subs	$1, $2, $3')
-  define(`ADCSBCS',	`sbcs	$1, $2, $3')
-  define(`IFADD',	`')
-  define(`IFSUB',	`$1')
-  define(`func',	mpn_rsh1sub_n)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	push	 {r4-r10}
-
-	ands	r4, n, #3
-	beq	L(b00)
-	cmp	r4, #2
-	blo	L(b01)
-	beq	L(b10)
-
-L(b11):	ldmia	 up!, {r9,r10,r12}
-	ldmia	 vp!, {r5,r6,r7}
-	ADDSUBS( r9, r9, r5)
-	vmov	 d4, r9, r9
-	ADCSBCS( r10, r10, r6)
-	ADCSBCS( r12, r12, r7)
-	vshr.u64 d3, d4, #1
-	vmov	 d1, r10, r12
-	vsli.u64 d3, d1, #31
-	vshr.u64 d2, d1, #1
-	vst1.32	 d3[0], [rp]!
-	bics	 n, n, #3
-	beq	 L(wd2)
-L(gt3):	ldmia	 up!, {r8,r9,r10,r12}
-	ldmia	 vp!, {r4,r5,r6,r7}
-	b	 L(mi0)
-
-L(b10):	ldmia	 up!, {r10,r12}
-	ldmia	 vp!, {r6,r7}
-	ADDSUBS( r10, r10, r6)
-	ADCSBCS( r12, r12, r7)
-	vmov	 d4, r10, r12
-	bics	 n, n, #2
-	vshr.u64 d2, d4, #1
-	beq	 L(wd2)
-L(gt2):	ldmia	 up!, {r8,r9,r10,r12}
-	ldmia	 vp!, {r4,r5,r6,r7}
-	b	 L(mi0)
-
-L(b01):	ldr	 r12, [up], #4
-	ldr	 r7, [vp], #4
-	ADDSUBS( r12, r12, r7)
-	vmov	 d4, r12, r12
-	bics	 n, n, #1
-	bne	 L(gt1)
-	mov	 r5, r12, lsr #1
-IFADD(`	adc	 r1, n, #0')
-IFSUB(`	adc	 r1, n, #1')
-	bfi	 r5, r1, #31, #1
-	str	 r5, [rp]
-	and	 r0, r12, #1
-	pop	 {r4-r10}
-	bx	 r14
-L(gt1):	ldmia	 up!, {r8,r9,r10,r12}
-	ldmia	 vp!, {r4,r5,r6,r7}
-	vshr.u64 d2, d4, #1
-	ADCSBCS( r8, r8, r4)
-	ADCSBCS( r9, r9, r5)
-	vmov	 d0, r8, r9
-	ADCSBCS( r10, r10, r6)
-	ADCSBCS( r12, r12, r7)
-	vsli.u64 d2, d0, #31
-	vshr.u64 d3, d0, #1
-	vst1.32	 d2[0], [rp]!
-	b	 L(mi1)
-
-L(b00):	ldmia	 up!, {r8,r9,r10,r12}
-	ldmia	 vp!, {r4,r5,r6,r7}
-	ADDSUBS( r8, r8, r4)
-	ADCSBCS( r9, r9, r5)
-	vmov	 d4, r8, r9
-	ADCSBCS( r10, r10, r6)
-	ADCSBCS( r12, r12, r7)
-	vshr.u64 d3, d4, #1
-	b	 L(mi1)
-
-	ALIGN(16)
-L(top):	ldmia	 up!, {r8,r9,r10,r12}
-	ldmia	 vp!, {r4,r5,r6,r7}
-	vsli.u64 d3, d1, #63
-	vshr.u64 d2, d1, #1
-	vst1.32	 d3, [rp]!
-L(mi0):	ADCSBCS( r8, r8, r4)
-	ADCSBCS( r9, r9, r5)
-	vmov	 d0, r8, r9
-	ADCSBCS( r10, r10, r6)
-	ADCSBCS( r12, r12, r7)
-	vsli.u64 d2, d0, #63
-	vshr.u64 d3, d0, #1
-	vst1.32	 d2, [rp]!
-L(mi1):	vmov	 d1, r10, r12
-	sub	 n, n, #4
-	tst	 n, n
-	bne	 L(top)
-
-L(end):	vsli.u64 d3, d1, #63
-	vshr.u64 d2, d1, #1
-	vst1.32	 d3, [rp]!
-L(wd2):	vmov	 r4, r5, d2
-IFADD(`	adc	 r1, n, #0')
-IFSUB(`	adc	 r1, n, #1')
-	bfi	 r5, r1, #31, #1
-	stm	 rp, {r4,r5}
-
-L(rtn):	vmov.32	 r0, d4[0]
-	and	 r0, r0, #1
-	pop	 {r4-r10}
-	bx	 r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/submul_1.asm b/gmp/mpn/arm/v7a/cora15/submul_1.asm
deleted file mode 100644
index ed7bfe820b..0000000000
--- a/gmp/mpn/arm/v7a/cora15/submul_1.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl  ARM mpn_submul_1 optimised for A15.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb		best
-C StrongARM:     -
-C XScale	 ?
-C Cortex-A7	 ?
-C Cortex-A8	 ?
-C Cortex-A9	 5.75			3.75
-C Cortex-A15	 2.32			this
-
-C This code uses umlal and umaal for adding in the rp[] data, keeping the
-C recurrency path separate from any multiply instructions.  It performs well on
-C A15, but not quite at the multiply bandwidth like the corresponding addmul_1
-C code.
-C
-C We don't use r12 due to ldrd and strd limitations.
-C
-C This loop complements U on the fly,
-C   U' = B^n - 1 - U
-C and then uses that
-C   R - U*v = R + U'*v + v - B^n v
-
-C Architecture requirements:
-C v5	-
-C v5t	-
-C v5te	ldrd strd
-C v6	umaal
-C v6t2	-
-C v7a	-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n',  `r2')
-define(`v0', `r3')
-
-define(`w0', `r10') define(`w1', `r11')
-define(`u0', `r8')  define(`u1', `r9')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	sub	sp, sp, #32
-	strd	r10, r11, [sp, #24]
-	strd	r8, r9, [sp, #16]
-	strd	r6, r7, [sp, #8]
-	strd	r4, r5, [sp, #0]
-C	push	{ r4-r11 }
-
-	ands	r6, n, #3
-	sub	n, n, #3
-	beq	L(b00)
-	cmp	r6, #2
-	bcc	L(b01)
-	beq	L(b10)
-
-L(b11):	mov	r6, #0
-	ldr	u1, [up], #-4
-	ldr	w1, [rp], #-16
-	mvn	u1, u1
-	adds	r7, v0, #0
-	b	L(mid)
-
-L(b00):	ldrd	u0, u1, [up]
-	ldrd	w0, w1, [rp], #-12
-	mvn	u0, u0
-	mvn	u1, u1
-	mov	r6, v0
-	umaal	w0, r6, u0, v0
-	cmn	r13, #0			C carry clear
-	mov	r7, #0
-	str	w0, [rp, #12]
-	b	L(mid)
-
-L(b10):	ldrd	u0, u1, [up], #8
-	ldrd	w0, w1, [rp]
-	mvn	u0, u0
-	mvn	u1, u1
-	mov	r4, v0
-	umaal	w0, r4, u0, v0
-	mov	r5, #0
-	str	w0, [rp], #-4
-	umlal	w1, r5, u1, v0
-	adds	n, n, #0
-	bmi	L(end)
-	b	L(top)
-
-L(b01):	ldr	u1, [up], #4
-	ldr	w1, [rp], #-8
-	mvn	u1, u1
-	mov	r5, v0
-	mov	r4, #0
-	umaal	w1, r5, u1, v0
-	tst	n, n
-	bmi	L(end)
-
-C	ALIGN(16)
-L(top):	ldrd	u0, u1, [up, #0]
-	adcs	r4, r4, w1
-	mvn	u0, u0
-	ldrd	w0, w1, [rp, #12]
-	mvn	u1, u1
-	mov	r6, #0
-	umlal	w0, r6, u0, v0		C 1 2
-	adcs	r5, r5, w0
-	mov	r7, #0
-	strd	r4, r5, [rp, #8]
-L(mid):	umaal	w1, r7, u1, v0		C 2 3
-	ldrd	u0, u1, [up, #8]
-	add	up, up, #16
-	adcs	r6, r6, w1
-	mvn	u0, u0
-	ldrd	w0, w1, [rp, #20]
-	mvn	u1, u1
-	mov	r4, #0
-	umlal	w0, r4, u0, v0		C 3 4
-	adcs	r7, r7, w0
-	mov	r5, #0
-	strd	r6, r7, [rp, #16]!
-	sub	n, n, #4
-	umlal	w1, r5, u1, v0		C 0 1
-	tst	n, n
-	bpl	L(top)
-
-L(end):	adcs	r4, r4, w1
-	str	r4, [rp, #8]
-	adc	r0, r5, #0
-	sub	r0, v0, r0
-	pop	{ r4-r11 }
-	bx	r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora9/gmp-mparam.h b/gmp/mpn/arm/v7a/cora9/gmp-mparam.h
deleted file mode 100644
index 9660257820..0000000000
--- a/gmp/mpn/arm/v7a/cora9/gmp-mparam.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012-2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1000MHz Cortex-A9 */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD     MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           20
-
-#define MUL_TOOM22_THRESHOLD                45
-#define MUL_TOOM33_THRESHOLD               129
-#define MUL_TOOM44_THRESHOLD               387
-#define MUL_TOOM6H_THRESHOLD               517
-#define MUL_TOOM8H_THRESHOLD               774
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     137
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     222
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     137
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     235
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     208
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 54
-#define SQR_TOOM3_THRESHOLD                181
-#define SQR_TOOM4_THRESHOLD                490
-#define SQR_TOOM6_THRESHOLD                656
-#define SQR_TOOM8_THRESHOLD                  0  /* always */
-
-#define MULMID_TOOM42_THRESHOLD             64
-
-#define MULMOD_BNM1_THRESHOLD               26
-#define SQRMOD_BNM1_THRESHOLD               28
-
-#define MUL_FFT_MODF_THRESHOLD             624  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    624, 5}, {     28, 6}, {     15, 5}, {     34, 6}, \
-    {     18, 5}, {     37, 6}, {     28, 7}, {     15, 6}, \
-    {     36, 7}, {     19, 6}, {     40, 7}, {     21, 6}, \
-    {     43, 7}, {     23, 6}, {     47, 7}, {     25, 6}, \
-    {     51, 7}, {     27, 6}, {     55, 7}, {     29, 8}, \
-    {     15, 7}, {     31, 6}, {     63, 7}, {     37, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
-    {     27, 7}, {     57, 9}, {     15, 8}, {     31, 7}, \
-    {     65, 8}, {     35, 7}, {     71, 8}, {     43, 9}, \
-    {     23, 8}, {     55,10}, {     15, 9}, {     31, 8}, \
-    {     71, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
-    {     99, 9}, {     55,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    167,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
-    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
-    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
-    {    671,10}, {    351,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399, 9}, {    799,10}, {    415,11}, \
-    {    223,12}, {    127,11}, {    255,10}, {    511, 9}, \
-    {   1023,10}, {    543,11}, {    287,10}, {    575, 9}, \
-    {   1151,11}, {    319,10}, {    671,11}, {    351,12}, \
-    {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
-    {    831,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    607,12}, {    319,11}, {    735,12}, \
-    {    383,11}, {    863,12}, {    447,11}, {    959,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,12}, {    639,11}, {   1279,12}, {    703,13}, \
-    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
-    {   1663,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1023,11}, {   2047,12}, {   1215,13}, {    639,12}, \
-    {   1407,13}, {    767,12}, {   1663,13}, {    895,12}, \
-    {   1791,14}, {    511,13}, {   1023,12}, {   2111,13}, \
-    {   1151,12}, {   2431,13}, {   1279,12}, {   2559,13}, \
-    {   1407,14}, {    767,13}, {   1535,12}, {   3071,13}, \
-    {   1663,12}, {   3455,13}, {   1791,15}, {    511,14}, \
-    {   1023,13}, {   2047,12}, {   4095,13}, {   2175,12}, \
-    {   4351,13}, {   2431,14}, {   1279,13}, {   2559,12}, \
-    {   5119,13}, {   2815,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             560  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    560, 5}, {     19, 4}, {     39, 5}, {     21, 4}, \
-    {     43, 5}, {     29, 6}, {     15, 5}, {     33, 6}, \
-    {     17, 5}, {     35, 6}, {     36, 7}, {     19, 6}, \
-    {     40, 7}, {     21, 6}, {     43, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
-    {     27, 7}, {     55, 9}, {     15, 8}, {     31, 7}, \
-    {     65, 8}, {     35, 7}, {     71, 8}, {     43, 9}, \
-    {     23, 8}, {     55, 9}, {     31, 8}, {     71, 9}, \
-    {     39, 8}, {     83, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {    103,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    159,11}, {     95,10}, \
-    {    191, 9}, {    383,10}, {    207,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511, 8}, {   1023, 9}, \
-    {    543,10}, {    287,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671,10}, {    351,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
-    {    799,10}, {    415, 9}, {    831,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    511, 9}, {   1023,10}, \
-    {    543,11}, {    287,10}, {    575, 9}, {   1151,10}, \
-    {    607,11}, {    319,10}, {    671,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    799,11}, \
-    {    415,10}, {    831,13}, {    127,11}, {    511,10}, \
-    {   1023,11}, {    543,10}, {   1087,11}, {    575,10}, \
-    {   1151,11}, {    607,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    735,12}, {    383,11}, {    863,12}, \
-    {    447,11}, {    959,12}, {    511,11}, {   1087,12}, \
-    {    575,11}, {   1215,12}, {    639,11}, {   1343,12}, \
-    {    703,11}, {   1407,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1663,12}, {    895,11}, \
-    {   1791,12}, {    959,13}, {    511,12}, {   1023,11}, \
-    {   2047,12}, {   1215,13}, {    639,12}, {   1407,13}, \
-    {    767,12}, {   1663,13}, {    895,12}, {   1791,14}, \
-    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
-    {   2431,13}, {   1279,12}, {   2559,13}, {   1407,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,12}, \
-    {   3455,13}, {   1791,15}, {    511,14}, {   1023,13}, \
-    {   2047,12}, {   4095,13}, {   2175,12}, {   4351,13}, \
-    {   2431,14}, {   1279,13}, {   2559,12}, {   5119,13}, \
-    {   2815,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 167
-#define SQR_FFT_THRESHOLD                 5312
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  38
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 42
-#define DC_DIVAPPR_Q_THRESHOLD             100
-#define DC_BDIV_QR_THRESHOLD                43
-#define DC_BDIV_Q_THRESHOLD                104
-
-#define INV_MULMOD_BNM1_THRESHOLD           98
-#define INV_NEWTON_THRESHOLD               138
-#define INV_APPR_THRESHOLD                 133
-
-#define BINV_NEWTON_THRESHOLD              333
-#define REDC_1_TO_REDC_2_THRESHOLD           2
-#define REDC_2_TO_REDC_N_THRESHOLD         142
-
-#define MU_DIV_QR_THRESHOLD               2350
-#define MU_DIVAPPR_Q_THRESHOLD            2259
-#define MUPI_DIV_QR_THRESHOLD               70
-#define MU_BDIV_QR_THRESHOLD              2089
-#define MU_BDIV_Q_THRESHOLD               2172
-
-#define POWM_SEC_TABLE  37,48,81,615,1925
-
-#define MATRIX22_STRASSEN_THRESHOLD         22
-#define HGCD_THRESHOLD                      64
-#define HGCD_APPR_THRESHOLD                 50
-#define HGCD_REDUCE_THRESHOLD             4284
-#define GCD_DC_THRESHOLD                   416
-#define GCDEXT_DC_THRESHOLD                298
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                18
-#define GET_STR_PRECOMPUTE_THRESHOLD        33
-#define SET_STR_DC_THRESHOLD               140
-#define SET_STR_PRECOMPUTE_THRESHOLD       748
-
-#define FAC_DSC_THRESHOLD                  309
-#define FAC_ODD_THRESHOLD                   29
diff --git a/gmp/mpn/arm64/aors_n.asm b/gmp/mpn/arm64/aors_n.asm
deleted file mode 100644
index a880cd35cf..0000000000
--- a/gmp/mpn/arm64/aors_n.asm
+++ /dev/null
@@ -1,98 +0,0 @@
-dnl  ARM64 mpn_add_n and mpn_sub_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-changecom(@&*$)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`vp', `x2')
-define(`n',  `x3')
-
-ifdef(`OPERATION_add_n', `
-  define(`ADDSUBC',	adcs)
-  define(`CLRCY',	`cmn	xzr, xzr')
-  define(`SETCY',	`cmp	$1, #1')
-  define(`RETVAL',	`adc	x0, xzr, xzr')
-  define(`func',	mpn_add_n)
-  define(`func_nc',	mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-  define(`ADDSUBC',	sbcs)
-  define(`CLRCY',	`cmp	xzr, xzr')
-  define(`SETCY',	`subs	$1, xzr, $1')
-  define(`RETVAL',	`sbc	x0, xzr, xzr
-			and	x0, x0, #1')
-  define(`func',	mpn_sub_n)
-  define(`func_nc',	mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-PROLOGUE(func_nc)
-	SETCY(	x4)
-	b	L(ent)
-EPILOGUE()
-PROLOGUE(func)
-	CLRCY
-L(ent):	tbz	n, #0, L(b0)
-
-	ldr	x4, [up],#8
-	ldr	x6, [vp],#8
-	sub	n, n, #1
-	ADDSUBC	x8, x4, x6
-	str	x8, [rp],#8
-	cbz	n, L(rt)
-
-L(b0):	ldp	x4, x5, [up],#16
-	ldp	x6, x7, [vp],#16
-	sub	n, n, #2
-	ADDSUBC	x8, x4, x6
-	ADDSUBC	x9, x5, x7
-	cbz	n, L(end)
-
-L(top):	ldp	x4, x5, [up],#16
-	ldp	x6, x7, [vp],#16
-	sub	n, n, #2
-	stp	x8, x9, [rp],#16
-	ADDSUBC	x8, x4, x6
-	ADDSUBC	x9, x5, x7
-	cbnz	n, L(top)
-
-L(end):	stp	x8, x9, [rp]
-L(rt):	RETVAL
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/aorsmul_1.asm b/gmp/mpn/arm64/aorsmul_1.asm
deleted file mode 100644
index bf765a7f77..0000000000
--- a/gmp/mpn/arm64/aorsmul_1.asm
+++ /dev/null
@@ -1,122 +0,0 @@
-dnl  ARM64 mpn_submul_1
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-changecom(@&*$)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`n',  `x2')
-define(`v0', `x3')
-
-ifdef(`OPERATION_addmul_1', `
-  define(`ADDSUB',	adds)
-  define(`ADDSUBC',	adcs)
-  define(`COND',	`cc')
-  define(`func',	mpn_addmul_1)')
-ifdef(`OPERATION_submul_1', `
-  define(`ADDSUB',	subs)
-  define(`ADDSUBC',	sbcs)
-  define(`COND',	`cs')
-  define(`func',	mpn_submul_1)')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-PROLOGUE(func)
-	mov	x15, #0
-
-	tbz	n, #0, L(1)
-
-	ldr	x4, [up],#8
-	mul	x8, x4, v0
-	umulh	x12, x4, v0
-	adds	x8, x8, x15
-	ldr	x4, [rp,#0]
-	ADDSUB	x8, x4, x8
-	csinc	x15, x12, x12, COND
-	str	x8, [rp],#8
-
-L(1):	tbz	n, #1, L(2)
-
-	ldp	x4, x5, [up],#16
-	mul	x8, x4, v0
-	umulh	x12, x4, v0
-	mul	x9, x5, v0
-	umulh	x13, x5, v0
-	adds	x8, x8, x15
-	adcs	x9, x9, x12
-	ldp	x4, x5, [rp,#0]
-	adc	x15, x13, xzr
-	sub	n, n, #1
-	ADDSUB	x8, x4, x8
-	ADDSUBC	x9, x5, x9
-	csinc	x15, x15, x15, COND
-	stp	x8, x9, [rp],#16
-
-L(2):	lsr	n, n, 2
-	cbz	n, L(end)
-
-L(top):	ldp	x4, x5, [up],#16
-	ldp	x6, x7, [up],#16
-	mul	x8, x4, v0
-	umulh	x12, x4, v0
-	mul	x9, x5, v0
-	umulh	x13, x5, v0
-	adds	x8, x8, x15
-	mul	x10, x6, v0
-	umulh	x14, x6, v0
-	adcs	x9, x9, x12
-	mul	x11, x7, v0
-	umulh	x15, x7, v0
-	adcs	x10, x10, x13
-	ldp	x4, x5, [rp,#0]
-	adcs	x11, x11, x14
-	ldp	x6, x7, [rp,#16]
-	adc	x15, x15, xzr
-	sub	n, n, #1
-	ADDSUB	x8, x4, x8
-	ADDSUBC	x9, x5, x9
-	ADDSUBC	x10, x6, x10
-	ADDSUBC	x11, x7, x11
-	stp	x8, x9, [rp],#16
-	csinc	x15, x15, x15, COND
-	stp	x10, x11, [rp],#16
-	cbnz	n, L(top)
-
-L(end):	mov	x0, x15
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/cnd_aors_n.asm b/gmp/mpn/arm64/cnd_aors_n.asm
deleted file mode 100644
index e7836500d5..0000000000
--- a/gmp/mpn/arm64/cnd_aors_n.asm
+++ /dev/null
@@ -1,99 +0,0 @@
-dnl  ARM64 mpn_cnd_add_n, mpn_cnd_sub_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-changecom(@&*$)
-
-define(`cnd',	`x0')
-define(`rp',	`x1')
-define(`up',	`x2')
-define(`vp',	`x3')
-define(`n',	`x4')
-
-ifdef(`OPERATION_cnd_add_n', `
-  define(`ADDSUBC',      adcs)
-  define(`CLRCY',	`cmn	xzr, xzr')
-  define(`RETVAL',	`adc	x0, xzr, xzr')
-  define(func,		mpn_cnd_add_n)')
-ifdef(`OPERATION_cnd_sub_n', `
-  define(`ADDSUBC',      sbcs)
-  define(`CLRCY',	`cmp	xzr, xzr')
-  define(`RETVAL',	`sbc	x0, xzr, xzr
-			and	x0, x0, #1')
-  define(func,		mpn_cnd_sub_n)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	cmp	cnd, #1
-	sbc	cnd, cnd, cnd
-
-	CLRCY				C really only needed for n = 0 (mod 4)
-
-	tbz	n, #0, L(1)
-	ldr	x10, [up], #8
-	ldr	x12, [vp], #8
-	bic	x6, x12, cnd
-	ADDSUBC	x8, x10, x6
-	sub	n, n, #1
-	str	x8, [rp], #8
-	cbz	n, L(rt)
-
-L(1):	ldp	x10, x11, [up], #16
-	ldp	x12, x13, [vp], #16
-	sub	n, n, #2
-	cbz	n, L(end)
-
-L(top):	bic	x6, x12, cnd
-	bic	x7, x13, cnd
-	ldp	x12, x13, [vp], #16
-	ADDSUBC	x8, x10, x6
-	ADDSUBC	x9, x11, x7
-	ldp	x10, x11, [up], #16
-	sub	n, n, #2
-	stp	x8, x9, [rp], #16
-	cbnz	n, L(top)
-
-L(end):	bic	x6, x12, cnd
-	bic	x7, x13, cnd
-	ADDSUBC	x8, x10, x6
-	ADDSUBC	x9, x11, x7
-	stp	x8, x9, [rp]
-L(rt):	RETVAL
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/copyd.asm b/gmp/mpn/arm64/copyd.asm
deleted file mode 100644
index bb477716e5..0000000000
--- a/gmp/mpn/arm64/copyd.asm
+++ /dev/null
@@ -1,93 +0,0 @@
-dnl  ARM64 mpn_copyd.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-changecom(@&*$)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`n',  `x2')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
-	add	rp, rp, n, lsl #3
-	add	up, up, n, lsl #3
-
-	cmp	n, #3
-	b.le	L(bc)
-
-C Copy until rp is 128-bit aligned
-	tbz	rp, #3, L(al2)
-	sub	up, up, #8
-	ld1	{v22.1d}, [up]
-	sub	n, n, #1
-	sub	rp, rp, #8
-	st1	{v22.1d}, [rp]
-
-L(al2):	sub	up, up, #16
-	ld1	{v26.2d}, [up]
-	subs	n, n, #6
-	sub	rp, rp, #16			C offset rp for loop
-	b.lt	L(end)
-
-	sub	up, up, #16			C offset up for loop
-	mov	x12, #-16
-
-	ALIGN(16)
-L(top):	ld1	{v22.2d}, [up], x12
-	st1	{v26.2d}, [rp], x12
-	ld1	{v26.2d}, [up], x12
-	st1	{v22.2d}, [rp], x12
-	subs	n, n, #4
-	b.ge	L(top)
-
-	add	up, up, #16			C undo up offset
-
-L(end):	st1	{v26.2d}, [rp]
-
-C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tbz	n, #1, L(tl1)
-	sub	up, up, #16
-	ld1	{v22.2d}, [up]
-	sub	rp, rp, #16
-	st1	{v22.2d}, [rp]
-L(tl1):	tbz	n, #0, L(tl2)
-	sub	up, up, #8
-	ld1	{v22.1d}, [up]
-	sub	rp, rp, #8
-	st1	{v22.1d}, [rp]
-L(tl2):	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/copyi.asm b/gmp/mpn/arm64/copyi.asm
deleted file mode 100644
index 8f7dbd4f52..0000000000
--- a/gmp/mpn/arm64/copyi.asm
+++ /dev/null
@@ -1,77 +0,0 @@
-dnl  ARM64 mpn_copyi.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-changecom(@&*$)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`n',  `x2')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
-	cmp	n, #3
-	b.le	L(bc)
-
-C Copy until rp is 128-bit aligned
-	tbz	rp, #3, L(al2)
-	ld1	{v22.1d}, [up], #8
-	sub	n, n, #1
-	st1	{v22.1d}, [rp], #8
-
-L(al2):	ld1	{v26.2d}, [up], #16
-	subs	n, n, #6
-	b.lt	L(end)
-
-	ALIGN(16)
-L(top):	ld1	{v22.2d}, [up], #16
-	st1	{v26.2d}, [rp], #16
-	ld1	{v26.2d}, [up], #16
-	st1	{v22.2d}, [rp], #16
-	subs	n, n, #4
-	b.ge	L(top)
-
-L(end):	st1	{v26.2d}, [rp], #16
-
-C Copy last 0-3 limbs.  Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc):	tbz	n, #1, L(tl1)
-	ld1	{v22.2d}, [up], #16
-	st1	{v22.2d}, [rp], #16
-L(tl1):	tbz	n, #0, L(tl2)
-	ld1	{v22.1d}, [up]
-	st1	{v22.1d}, [rp]
-L(tl2):	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/gcd_1.asm b/gmp/mpn/arm64/gcd_1.asm
deleted file mode 100644
index d231dbcbb9..0000000000
--- a/gmp/mpn/arm64/gcd_1.asm
+++ /dev/null
@@ -1,125 +0,0 @@
-dnl  ARM v6t2 mpn_gcd_1.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for ARM by Torbjorn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-changecom(@&*$)
-
-C	     cycles/bit (approx)
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-C TODO
-C  * Optimise inner-loop better.
-C  * Push saving/restoring of callee-user regs into call code
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 7)
-
-C INPUT PARAMETERS
-define(`up',    `x0')
-define(`n',     `x1')
-define(`v0',    `x2')
-
-ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
-  `define(`BMOD_1_TO_MOD_1_THRESHOLD',30)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_gcd_1)
-	stp	x29, x30, [sp,#-32]!
-	ldr	x3, [up]		C U low limb
-	stp     x19, x20, [sp,#16]
-
-	orr	x3, x3, v0
-	rbit	x4, x3
-	clz	x20, x4			C min(ctz(u0),ctz(v0))
-
-	rbit	x12, v0
-	clz	x12, x12
-	lsr	v0, v0, x12
-
-	mov	x19, v0
-
-	cmp	n, #1
-	bne	L(nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	ldr	x3, [up]
-	cmp	v0, x3, lsr #BMOD_THRES_LOG2
-	bhi	L(red1)
-
-L(bmod):mov	x3, #0			C carry argument
-	bl	mpn_modexact_1c_odd
-	b	L(red0)
-
-L(nby1):cmp	n, #BMOD_1_TO_MOD_1_THRESHOLD
-	blo	L(bmod)
-
-	bl	mpn_mod_1
-
-L(red0):mov	x3, x0
-L(red1):cmp	x3, #0
-	rbit	x12, x3
-	clz	x12, x12
-	bne	L(mid)
-	b	L(end)
-
-	ALIGN(8)
-L(top):
-ifelse(1,1,`
-C This shorter variant makes full use of armv8 insns
-	csneg	x3, x1, x1, cs		C if x-y < 0
-	csel	x19, x4, x19, cs	C use x,y-x
-L(mid):	lsr	x4, x3, x12		C
-	subs	x1, x19, x4		C
-',`
-C This variant is akin to the 32-bit v6t2 code
-	csel	x3, x1, x3, cs		C if x-y < 0
-	csel	x19, x0, x19, cs	C use x,y-x
-L(mid):	lsr	x3, x3, x12		C
-	mov	x0, x3			C
-	subs	x1, x19, x3		C
-	sub	x3, x3, x19		C
-')
-	rbit	x12, x1
-	clz	x12, x12		C
-	bne	L(top)			C
-
-L(end):	lsl	x0, x19, x20
-	ldp     x19, x20, [sp,#16]
-	ldp	x29, x30, [sp],#32
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/invert_limb.asm b/gmp/mpn/arm64/invert_limb.asm
deleted file mode 100644
index 2302d047e5..0000000000
--- a/gmp/mpn/arm64/invert_limb.asm
+++ /dev/null
@@ -1,83 +0,0 @@
-dnl  ARM64 mpn_invert_limb -- Invert a normalized limb.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C Cortex-A53     ?
-C Cortex-A57     ?
-
-C Compiler generated, mildly edited.  Could surely be further optimised.
-
-ASM_START()
-PROLOGUE(mpn_invert_limb)
-	lsr	x2, x0, 54
-	adrp	x1, approx_tab
-	and	x2, x2, #0x1fe
-	add	x1, x1, :lo12:approx_tab
-	ldrh	w3, [x1,x2]
-	lsr	x4, x0, 24
-	add	x4, x4, 1
-	ubfiz	x2, x3, 11, 16
-	umull	x3, w3, w3
-	mul	x3, x3, x4
-	sub	x2, x2, #1
-	sub	x2, x2, x3, lsr 40
-	lsl	x3, x2, 60
-	mul	x1, x2, x2
-	msub	x1, x1, x4, x3
-	lsl	x2, x2, 13
-	add	x1, x2, x1, lsr 47
-	and	x2, x0, 1
-	neg	x3, x2
-	and	x3, x3, x1, lsr 1
-	add	x2, x2, x0, lsr 1
-	msub	x2, x1, x2, x3
-	umulh	x2, x2, x1
-	lsl	x1, x1, 31
-	add	x1, x1, x2, lsr 1
-	mul	x3, x1, x0
-	umulh	x2, x1, x0
-	adds	x4, x3, x0
-	adc	x0, x2, x0
-	sub	x0, x1, x0
-	ret
-EPILOGUE()
-
-	RODATA
-	ALIGN(2)
-	TYPE(   approx_tab, object)
-	SIZE(   approx_tab, 512)
-approx_tab:
-forloop(i,256,512-1,dnl
-`	.hword	eval(0x7fd00/i)
-')dnl
diff --git a/gmp/mpn/arm64/logops_n.asm b/gmp/mpn/arm64/logops_n.asm
deleted file mode 100644
index 0f75700cfd..0000000000
--- a/gmp/mpn/arm64/logops_n.asm
+++ /dev/null
@@ -1,106 +0,0 @@
-dnl  ARM64 mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-changecom(@&*$)
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`vp', `x2')
-define(`n',  `x3')
-
-define(`POSTOP', `dnl')
-
-ifdef(`OPERATION_and_n',`
-  define(`func',    `mpn_and_n')
-  define(`LOGOP',   `and	$1, $2, $3')')
-ifdef(`OPERATION_andn_n',`
-  define(`func',    `mpn_andn_n')
-  define(`LOGOP',   `bic	$1, $2, $3')')
-ifdef(`OPERATION_nand_n',`
-  define(`func',    `mpn_nand_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `and	$1, $2, $3')')
-ifdef(`OPERATION_ior_n',`
-  define(`func',    `mpn_ior_n')
-  define(`LOGOP',   `orr	$1, $2, $3')')
-ifdef(`OPERATION_iorn_n',`
-  define(`func',    `mpn_iorn_n')
-  define(`LOGOP',   `orn	$1, $2, $3')')
-ifdef(`OPERATION_nior_n',`
-  define(`func',    `mpn_nior_n')
-  define(`POSTOP',  `mvn	$1, $1')
-  define(`LOGOP',   `orr	$1, $2, $3')')
-ifdef(`OPERATION_xor_n',`
-  define(`func',    `mpn_xor_n')
-  define(`LOGOP',   `eor	$1, $2, $3')')
-ifdef(`OPERATION_xnor_n',`
-  define(`func',    `mpn_xnor_n')
-  define(`LOGOP',   `eon	$1, $2, $3')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-ASM_START()
-PROLOGUE(func)
-	tbz	n, #0, L(b0)
-
-	ldr	x4, [up],#8
-	ldr	x6, [vp],#8
-	sub	n, n, #1
-	LOGOP(	x8, x4, x6)
-	POSTOP(	x8)
-	str	x8, [rp],#8
-	cbz	n, L(rtn)
-
-L(b0):	ldp	x4, x5, [up],#16
-	ldp	x6, x7, [vp],#16
-	sub	n, n, #2
-	b	L(mid)
-
-L(top):	ldp	x4, x5, [up],#16
-	ldp	x6, x7, [vp],#16
-	sub	n, n, #2
-	stp	x8, x9, [rp],#16
-L(mid):	LOGOP(	x8, x4, x6)
-	LOGOP(	x9, x5, x7)
-	POSTOP(	x8)
-	POSTOP(	x9)
-	cbnz	n, L(top)
-
-	stp	x8, x9, [rp],#16
-L(rtn):	ret
-EPILOGUE()
diff --git a/gmp/mpn/arm64/mul_1.asm b/gmp/mpn/arm64/mul_1.asm
deleted file mode 100644
index c0c2570f0d..0000000000
--- a/gmp/mpn/arm64/mul_1.asm
+++ /dev/null
@@ -1,98 +0,0 @@
-dnl  ARM64 mpn_mul_1
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C Cortex-A53	 ?
-C Cortex-A57	 ?
-
-define(`rp', `x0')
-define(`up', `x1')
-define(`n',  `x2')
-define(`v0', `x3')
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	ldr	x12, [up], #8
-	and	x6, n, #3
-	and	n, n, #-4
-	cbz	x6, L(fi0)
-	cmp	x6, #2
-	bcc	L(fi1)
-	beq	L(fi2)
-
-L(fi3):	mul	x8, x12, v0
-	umulh	x13, x12, v0
-	cmn	xzr, xzr
-	b	L(L3)
-L(fi2):	mul	x7, x12, v0
-	umulh	x5, x12, v0
-	cmn	xzr, xzr
-	b	L(L2)
-L(fi0):	mul	x9, x12, v0
-	umulh	x5, x12, v0
-	sub	n, n, #4
-	cmn	xzr, xzr
-	b	L(L0)
-L(fi1):	mul	x10, x12, v0
-	umulh	x13, x12, v0
-	cmn	xzr, xzr
-	cbz	n, L(end)
-
-L(top):	sub	n, n, #4
-	ldr	x12, [up], #8
-	mul	x6, x12, v0
-	umulh	x5, x12, v0
-	str	x10, [rp], #8
-	adcs	x9, x6, x13
-L(L0):	ldr	x12, [up], #8
-	mul	x6, x12, v0
-	umulh	x13, x12, v0
-	str	x9, [rp] ,#8
-	adcs	x8, x6, x5
-L(L3):	ldr	x12, [up], #8
-	mul	x6, x12, v0
-	umulh	x5, x12, v0
-	str	x8, [rp], #8
-	adcs	x7, x6, x13
-L(L2):	ldr	x12, [up], #8
-	mul	x6, x12, v0
-	umulh	x13, x12, v0
-	str	x7, [rp], #8
-	adcs	x10, x6, x5
-	cbnz	n, L(top)
-
-L(end):	str	x10, [rp]
-	adc	x0, x13, xzr
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/asm-defs.m4 b/gmp/mpn/asm-defs.m4
index e573cc4ca8..ee9626dd57 100644
--- a/gmp/mpn/asm-defs.m4
+++ b/gmp/mpn/asm-defs.m4
@@ -2,33 +2,23 @@ divert(-1)
 dnl
 dnl  m4 macros for gmp assembly code, shared by all CPUs.
 
-dnl  Copyright 1999-2006, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  These macros are designed for use with any m4 and have been used on
@@ -59,7 +49,7 @@ dnl
 dnl  But note that when a quoted string is being read, a # isn't special, so
 dnl  apostrophes in comments in quoted strings must be avoided or they'll be
 dnl  interpreted as a closing quote mark.  But when the quoted text is
-dnl  re-read # will still act like a normal comment, suppressing macro
+dnl  re-read # will still act like a normal comment, supressing macro
 dnl  expansion.
 dnl
 dnl  For example,
@@ -229,7 +219,7 @@ undefine(`m4_dollarhash_1_if_noparen_test')
 
 dnl  Usage: m4wrap_prepend(string)
 dnl
-dnl  Prepend the given string to what will be expanded under m4wrap at the
+dnl  Prepend the given string to what will be exapanded under m4wrap at the
 dnl  end of input.
 dnl
 dnl  This macro exists to work around variations in m4wrap() behaviour in
@@ -877,7 +867,7 @@ ifelse(eval($'`#>1 || m4_length('m4_doublequote($`'1)`)!=0),1,($'`@))')')
 dnl  Called: deflit_emptyargcheck(macroname,$#,`$1')
 define(deflit_emptyargcheck,
 `ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
-`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-defs.m4 for more information)
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)
 ')')')
 
 
@@ -1064,18 +1054,6 @@ dnl  aors_n
 m4_not_for_expansion(`OPERATION_add_n')
 m4_not_for_expansion(`OPERATION_sub_n')
 
-dnl  aors_err1_n
-m4_not_for_expansion(`OPERATION_add_err1_n')
-m4_not_for_expansion(`OPERATION_sub_err1_n')
-
-dnl  aors_err2_n
-m4_not_for_expansion(`OPERATION_add_err2_n')
-m4_not_for_expansion(`OPERATION_sub_err2_n')
-
-dnl  aors_err3_n
-m4_not_for_expansion(`OPERATION_add_err3_n')
-m4_not_for_expansion(`OPERATION_sub_err3_n')
-
 dnl  aorsmul_1
 m4_not_for_expansion(`OPERATION_addmul_1')
 m4_not_for_expansion(`OPERATION_submul_1')
@@ -1101,12 +1079,6 @@ m4_not_for_expansion(`OPERATION_rshift')
 dnl  aorslsh1_n
 m4_not_for_expansion(`OPERATION_addlsh1_n')
 m4_not_for_expansion(`OPERATION_sublsh1_n')
-m4_not_for_expansion(`OPERATION_rsblsh1_n')
-
-dnl  aorslsh2_n
-m4_not_for_expansion(`OPERATION_addlsh2_n')
-m4_not_for_expansion(`OPERATION_sublsh2_n')
-m4_not_for_expansion(`OPERATION_rsblsh2_n')
 
 dnl  rsh1aors_n
 m4_not_for_expansion(`OPERATION_rsh1add_n')
@@ -1119,7 +1091,7 @@ dnl  Check that `symbol' is defined.  If it isn't, issue an error and
 dnl  terminate immediately.  The error message explains that the symbol
 dnl  should be in config.m4, copied from gmp-mparam.h.
 dnl
-dnl  Termination is immediate since missing say SQR_TOOM2_THRESHOLD can
+dnl  Termination is immediate since missing say SQR_KARATSUBA_THRESHOLD can
 dnl  lead to infinite loops and endless error messages.
 
 define(m4_config_gmp_mparam,
@@ -1243,10 +1215,10 @@ dnl  definitions.  If COUNT is redefined, the LOG2, MASK and BYTES follow
 dnl  the new definition automatically.
 dnl
 dnl  LOG2 is the log base 2 of COUNT.  MASK is COUNT-1, which can be used as
-dnl  a bit mask.  BYTES is GMP_LIMB_BYTES*COUNT, the number of bytes
+dnl  a bit mask.  BYTES is BYTES_PER_MP_LIMB*COUNT, the number of bytes
 dnl  processed in each unrolled loop.
 dnl
-dnl  GMP_LIMB_BYTES is defined in a CPU specific m4 include file.  It
+dnl  BYTES_PER_MP_LIMB is defined in a CPU specific m4 include file.  It
 dnl  exists only so the BYTES definitions here can be common to all CPUs.
 dnl  In the actual code for a given CPU, an explicit 4 or 8 may as well be
 dnl  used because the code is only for a particular CPU, it doesn't need to
@@ -1283,8 +1255,8 @@ m4_assert_defined(`UNROLL_COUNT')
 
 deflit(UNROLL_BYTES,
 m4_assert_defined(`UNROLL_COUNT')
-m4_assert_defined(`GMP_LIMB_BYTES')
-`eval(UNROLL_COUNT * GMP_LIMB_BYTES)')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(UNROLL_COUNT * BYTES_PER_MP_LIMB)')
 
 deflit(CHUNK_LOG2,
 m4_assert_defined(`CHUNK_COUNT')
@@ -1296,8 +1268,8 @@ m4_assert_defined(`CHUNK_COUNT')
 
 deflit(CHUNK_BYTES,
 m4_assert_defined(`CHUNK_COUNT')
-m4_assert_defined(`GMP_LIMB_BYTES')
-`eval(CHUNK_COUNT * GMP_LIMB_BYTES)')
+m4_assert_defined(`BYTES_PER_MP_LIMB')
+`eval(CHUNK_COUNT * BYTES_PER_MP_LIMB)')
 
 
 dnl  Usage: MPN(name)
@@ -1324,65 +1296,31 @@ dnl  function that might be implemented in assembler is here.
 
 define(define_mpn,
 m4_assert_numargs(1)
-`deflit(`mpn_$1',`MPN(`$1')')')
+`define(`mpn_$1',`MPN(`$1')')')
 
 define_mpn(add)
 define_mpn(add_1)
-define_mpn(add_err1_n)
-define_mpn(add_err2_n)
-define_mpn(add_err3_n)
 define_mpn(add_n)
 define_mpn(add_nc)
 define_mpn(addlsh1_n)
-define_mpn(addlsh1_nc)
-define_mpn(addlsh2_n)
-define_mpn(addlsh2_nc)
-define_mpn(addlsh_n)
-define_mpn(addlsh_nc)
-define_mpn(addlsh1_n_ip1)
-define_mpn(addlsh1_nc_ip1)
-define_mpn(addlsh2_n_ip1)
-define_mpn(addlsh2_nc_ip1)
-define_mpn(addlsh_n_ip1)
-define_mpn(addlsh_nc_ip1)
-define_mpn(addlsh1_n_ip2)
-define_mpn(addlsh1_nc_ip2)
-define_mpn(addlsh2_n_ip2)
-define_mpn(addlsh2_nc_ip2)
-define_mpn(addlsh_n_ip2)
-define_mpn(addlsh_nc_ip2)
 define_mpn(addmul_1)
 define_mpn(addmul_1c)
 define_mpn(addmul_2)
 define_mpn(addmul_3)
 define_mpn(addmul_4)
-define_mpn(addmul_5)
-define_mpn(addmul_6)
-define_mpn(addmul_7)
-define_mpn(addmul_8)
-define_mpn(addmul_2s)
-define_mpn(add_n_sub_n)
-define_mpn(add_n_sub_nc)
+define_mpn(addsub_n)
+define_mpn(addsub_nc)
 define_mpn(addaddmul_1msb0)
 define_mpn(and_n)
 define_mpn(andn_n)
-define_mpn(bdiv_q_1)
-define_mpn(pi1_bdiv_q_1)
 define_mpn(bdiv_dbm1c)
+define_mpn(bdivmod)
 define_mpn(cmp)
-define_mpn(cnd_add_n)
-define_mpn(cnd_sub_n)
-define_mpn(com)
+define_mpn(com_n)
 define_mpn(copyd)
 define_mpn(copyi)
 define_mpn(count_leading_zeros)
 define_mpn(count_trailing_zeros)
-define_mpn(div_qr_1n_pi1)
-define_mpn(div_qr_2)
-define_mpn(div_qr_2n_pi1)
-define_mpn(div_qr_2u_pi1)
-define_mpn(div_qr_2n_pi2)
-define_mpn(div_qr_2u_pi2)
 define_mpn(divexact_1)
 define_mpn(divexact_by3c)
 define_mpn(divrem)
@@ -1398,19 +1336,16 @@ define_mpn(gcdext)
 define_mpn(get_str)
 define_mpn(hamdist)
 define_mpn(invert_limb)
-define_mpn(invert_limb_table)
 define_mpn(ior_n)
 define_mpn(iorn_n)
+define_mpn(kara_mul_n)
+define_mpn(kara_sqr_n)
 define_mpn(lshift)
 define_mpn(lshiftc)
-define_mpn(mod_1_1p)
-define_mpn(mod_1_1p_cps)
-define_mpn(mod_1s_2p)
-define_mpn(mod_1s_2p_cps)
-define_mpn(mod_1s_3p)
-define_mpn(mod_1s_3p_cps)
-define_mpn(mod_1s_4p)
-define_mpn(mod_1s_4p_cps)
+define_mpn(mod_1_1)
+define_mpn(mod_1_2)
+define_mpn(mod_1_3)
+define_mpn(mod_1_4)
 define_mpn(mod_1)
 define_mpn(mod_1c)
 define_mpn(mod_34lsub1)
@@ -1422,18 +1357,14 @@ define_mpn(mul_1c)
 define_mpn(mul_2)
 define_mpn(mul_3)
 define_mpn(mul_4)
-define_mpn(mul_5)
-define_mpn(mul_6)
 define_mpn(mul_basecase)
 define_mpn(mul_n)
-define_mpn(mullo_basecase)
-define_mpn(mulmid_basecase)
 define_mpn(perfect_square_p)
 define_mpn(popcount)
 define_mpn(preinv_divrem_1)
 define_mpn(preinv_mod_1)
 define_mpn(nand_n)
-define_mpn(neg)
+define_mpn(neg_n)
 define_mpn(nior_n)
 define_mpn(powm)
 define_mpn(powlo)
@@ -1441,16 +1372,8 @@ define_mpn(random)
 define_mpn(random2)
 define_mpn(redc_1)
 define_mpn(redc_2)
-define_mpn(rsblsh1_n)
-define_mpn(rsblsh1_nc)
-define_mpn(rsblsh2_n)
-define_mpn(rsblsh2_nc)
-define_mpn(rsblsh_n)
-define_mpn(rsblsh_nc)
 define_mpn(rsh1add_n)
-define_mpn(rsh1add_nc)
 define_mpn(rsh1sub_n)
-define_mpn(rsh1sub_nc)
 define_mpn(rshift)
 define_mpn(rshiftc)
 define_mpn(scan0)
@@ -1458,31 +1381,17 @@ define_mpn(scan1)
 define_mpn(set_str)
 define_mpn(sqr_basecase)
 define_mpn(sqr_diagonal)
-define_mpn(sqr_diag_addlsh1)
 define_mpn(sub_n)
 define_mpn(sublsh1_n)
-define_mpn(sublsh1_nc)
-define_mpn(sublsh1_n_ip1)
-define_mpn(sublsh1_nc_ip1)
-define_mpn(sublsh2_n)
-define_mpn(sublsh2_nc)
-define_mpn(sublsh2_n_ip1)
-define_mpn(sublsh2_nc_ip1)
-define_mpn(sublsh_n)
-define_mpn(sublsh_nc)
-define_mpn(sublsh_n_ip1)
-define_mpn(sublsh_nc_ip1)
 define_mpn(sqrtrem)
 define_mpn(sub)
 define_mpn(sub_1)
-define_mpn(sub_err1_n)
-define_mpn(sub_err2_n)
-define_mpn(sub_err3_n)
 define_mpn(sub_n)
 define_mpn(sub_nc)
 define_mpn(submul_1)
 define_mpn(submul_1c)
-define_mpn(sec_tabselect)
+define_mpn(toom3_mul_n)
+define_mpn(toom3_sqr_n)
 define_mpn(umul_ppmm)
 define_mpn(umul_ppmm_r)
 define_mpn(udiv_qrnnd)
@@ -1724,22 +1633,6 @@ m4_assert_numargs(1)
 )
 
 
-dnl  Usage: ABI_SUPPORT(abi)
-dnl
-dnl  A dummy macro which is grepped for by ./configure to know what ABIs
-dnl  are supported in an asm file.
-dnl
-dnl  If multiple non-standard ABIs are supported, several ABI_SUPPORT
-dnl  declarations should be used:
-dnl
-dnl         ABI_SUPPORT(FOOABI)
-dnl         ABI_SUPPORT(BARABI)
-
-define(ABI_SUPPORT,
-m4_assert_numargs(1)
-)
-
-
 dnl  Usage: GMP_NUMB_MASK
 dnl
 dnl  A bit mask for the number part of a limb.  Eg. with 6 bit nails in a
@@ -1751,11 +1644,4 @@ m4_assert_defined(`GMP_NUMB_BITS')
 `m4_hex_lowmask(GMP_NUMB_BITS)')
 
 
-dnl  Usage: m4append(`variable',`value-to-append')
-
-define(`m4append',
-`define(`$1',  defn(`$1')`$2')
-'
-)
-
 divert`'dnl
diff --git a/gmp/mpn/clipper/add_n.s b/gmp/mpn/clipper/add_n.s
new file mode 100644
index 0000000000..225b95042c
--- /dev/null
+++ b/gmp/mpn/clipper/add_n.s
@@ -0,0 +1,46 @@
+; Clipper __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align 16
+.globl ___gmpn_add_n
+___gmpn_add_n:
+	subq	$8,sp
+	storw	r6,(sp)
+	loadw	12(sp),r2
+	loadw	16(sp),r3
+	loadq	$0,r6		; clear carry-save register
+
+.Loop:	loadw	(r1),r4
+	loadw	(r2),r5
+	addwc	r6,r6		; restore carry from r6
+	addwc	r5,r4
+	storw	r4,(r0)
+	subwc	r6,r6		; save carry in r6
+	addq	$4,r0
+	addq	$4,r1
+	addq	$4,r2
+	subq	$1,r3
+	brne	.Loop
+
+	negw	r6,r0
+	loadw	(sp),r6
+	addq	$8,sp
+	ret	sp
diff --git a/gmp/mpn/clipper/mul_1.s b/gmp/mpn/clipper/mul_1.s
new file mode 100644
index 0000000000..058a317617
--- /dev/null
+++ b/gmp/mpn/clipper/mul_1.s
@@ -0,0 +1,45 @@
+; Clipper __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	16
+.globl	___gmpn_mul_1
+___gmpn_mul_1:
+	subq	$8,sp
+	storw	r6,(sp)
+	loadw	12(sp),r2
+	loadw	16(sp),r3
+	loadq	$0,r6		; clear carry limb
+
+.Loop:	loadw	(r1),r4
+	mulwux	r3,r4
+	addw	r6,r4		; add old carry limb into low product limb
+	loadq	$0,r6
+	addwc	r5,r6		; propagate cy into high product limb
+	storw	r4,(r0)
+	addq	$4,r0
+	addq	$4,r1
+	subq	$1,r2
+	brne	.Loop
+
+	movw	r6,r0
+	loadw	0(sp),r6
+	addq	$8,sp
+	ret	sp
diff --git a/gmp/mpn/clipper/sub_n.s b/gmp/mpn/clipper/sub_n.s
new file mode 100644
index 0000000000..58c2cb3342
--- /dev/null
+++ b/gmp/mpn/clipper/sub_n.s
@@ -0,0 +1,46 @@
+; Clipper __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright 1995, 2000 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align 16
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+	subq	$8,sp
+	storw	r6,(sp)
+	loadw	12(sp),r2
+	loadw	16(sp),r3
+	loadq	$0,r6		; clear carry-save register
+
+.Loop:	loadw	(r1),r4
+	loadw	(r2),r5
+	addwc	r6,r6		; restore carry from r6
+	subwc	r5,r4
+	storw	r4,(r0)
+	subwc	r6,r6		; save carry in r6
+	addq	$4,r0
+	addq	$4,r1
+	addq	$4,r2
+	subq	$1,r3
+	brne	.Loop
+
+	negw	r6,r0
+	loadw	(sp),r6
+	addq	$8,sp
+	ret	sp
diff --git a/gmp/mpn/cpp-ccas b/gmp/mpn/cpp-ccas
index 25f7cdcbeb..fd62f902d1 100755
--- a/gmp/mpn/cpp-ccas
+++ b/gmp/mpn/cpp-ccas
@@ -4,31 +4,20 @@
 
 # Copyright 2001 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage: cpp-cc --cpp=CPP CC ... file.S ...
diff --git a/gmp/mpn/cray/README b/gmp/mpn/cray/README
index 3a347d2805..ab3b032706 100644
--- a/gmp/mpn/cray/README
+++ b/gmp/mpn/cray/README
@@ -1,30 +1,19 @@
-Copyright 2000-2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -45,7 +34,7 @@ systems with cfp floating point, the main obstacle is the forming of
 128-bit products.  For IEEE systems, adding, and in particular
 computing carry is the main issue.  There are no vectorizing
 unsigned-less-than instructions, and the sequence that implement that
-operation is very long.
+opetration is very long.
 
 Shifting is the only operation that is simple to make fast.  All Cray
 systems have a bitblt instructions (Vi Vj,Vj<Ak and Vi Vj,Vj>Ak) that
@@ -118,4 +107,4 @@ down to 2.5 cycles/limb and mpn_addmul_1 times to 4 cycles/limb.  By
 storing even fewer bits per limb, perhaps 56, it would be possible to
 write a mul_mul_basecase that would run at effectively 1 cycle/limb.
 (Use VM here to better handle the romb-shaped multiply area, perhaps
-rounding operand sizes up to the next power of 2.)
+rouding operand sizes up to the next power of 2.)
diff --git a/gmp/mpn/cray/add_n.c b/gmp/mpn/cray/add_n.c
index 65b53bf87a..e4f8a0da9b 100644
--- a/gmp/mpn/cray/add_n.c
+++ b/gmp/mpn/cray/add_n.c
@@ -6,28 +6,17 @@ Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* This code runs at 4 cycles/limb.  It may be possible to bring it down
    to 3 cycles/limb.  */
diff --git a/gmp/mpn/cray/cfp/addmul_1.c b/gmp/mpn/cray/cfp/addmul_1.c
index e1d52e4a5f..c981b3d3a8 100644
--- a/gmp/mpn/cray/cfp/addmul_1.c
+++ b/gmp/mpn/cray/cfp/addmul_1.c
@@ -5,28 +5,17 @@ Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
diff --git a/gmp/mpn/cray/cfp/mul_1.c b/gmp/mpn/cray/cfp/mul_1.c
index 611a9d2532..5038e93fef 100644
--- a/gmp/mpn/cray/cfp/mul_1.c
+++ b/gmp/mpn/cray/cfp/mul_1.c
@@ -5,28 +5,17 @@ Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
diff --git a/gmp/mpn/cray/cfp/mulwwc90.s b/gmp/mpn/cray/cfp/mulwwc90.s
index 71d2285fd7..3234913c10 100644
--- a/gmp/mpn/cray/cfp/mulwwc90.s
+++ b/gmp/mpn/cray/cfp/mulwwc90.s
@@ -1,33 +1,22 @@
-*    Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+*     Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
 
-*    Copyright 1996, 2000 Free Software Foundation, Inc.
-*    This file is generated from mulww.f in this same directory.
+*     Copyright 1996, 2000 Free Software Foundation, Inc.
+*     This file is generated from mulww.f in this same directory.
 
-*  This file is part of the GNU MP Library.
-*
-*  The GNU MP Library is free software; you can redistribute it and/or modify
-*  it under the terms of either:
-*
-*    * the GNU Lesser General Public License as published by the Free
-*      Software Foundation; either version 3 of the License, or (at your
-*      option) any later version.
-*
-*  or
-*
-*    * the GNU General Public License as published by the Free Software
-*      Foundation; either version 2 of the License, or (at your option) any
-*      later version.
-*
-*  or both in parallel, as here.
-*
-*  The GNU MP Library is distributed in the hope that it will be useful, but
-*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-*  for more details.
-*
-*  You should have received copies of the GNU General Public License and the
-*  GNU Lesser General Public License along with the GNU MP Library.  If not,
-*  see https://www.gnu.org/licenses/.
+*     This file is part of the GNU MP Library.
+
+*     The GNU MP Library is free software; you can redistribute it and/or
+*     modify it under the terms of the GNU Lesser General Public License as
+*     published by the Free Software Foundation; either version 3 of the
+*     License, or (at your option) any later version.
+
+*     The GNU MP Library is distributed in the hope that it will be useful,
+*     but WITHOUT ANY WARRANTY; without even the implied warranty of
+*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*     Lesser General Public License for more details.
+
+*     You should have received a copy of the GNU Lesser General Public License
+*     along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
             IDENT           GMPN_MULWW
 **********************************************
diff --git a/gmp/mpn/cray/cfp/mulwwj90.s b/gmp/mpn/cray/cfp/mulwwj90.s
index 1c2c7cddbe..94d391c2f9 100644
--- a/gmp/mpn/cray/cfp/mulwwj90.s
+++ b/gmp/mpn/cray/cfp/mulwwj90.s
@@ -1,33 +1,22 @@
-*    Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+*     Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
 
-*    Copyright 1996, 2000 Free Software Foundation, Inc.
-*    This file is generated from mulww.f in this same directory.
+*     Copyright 1996, 2000 Free Software Foundation, Inc.
+*     This file is generated from mulww.f in this same directory.
 
-*  This file is part of the GNU MP Library.
-*
-*  The GNU MP Library is free software; you can redistribute it and/or modify
-*  it under the terms of either:
-*
-*    * the GNU Lesser General Public License as published by the Free
-*      Software Foundation; either version 3 of the License, or (at your
-*      option) any later version.
-*
-*  or
-*
-*    * the GNU General Public License as published by the Free Software
-*      Foundation; either version 2 of the License, or (at your option) any
-*      later version.
-*
-*  or both in parallel, as here.
-*
-*  The GNU MP Library is distributed in the hope that it will be useful, but
-*  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-*  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-*  for more details.
-*
-*  You should have received copies of the GNU General Public License and the
-*  GNU Lesser General Public License along with the GNU MP Library.  If not,
-*  see https://www.gnu.org/licenses/.
+*     This file is part of the GNU MP Library.
+
+*     The GNU MP Library is free software; you can redistribute it and/or
+*     modify it under the terms of the GNU Lesser General Public License as
+*     published by the Free Software Foundation; either version 3 of the
+*     License, or (at your option) any later version.
+
+*     The GNU MP Library is distributed in the hope that it will be useful,
+*     but WITHOUT ANY WARRANTY; without even the implied warranty of
+*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*     Lesser General Public License for more details.
+
+*     You should have received a copy of the GNU Lesser General Public License
+*     along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
             IDENT           GMPN_MULWW
 **********************************************
diff --git a/gmp/mpn/cray/cfp/submul_1.c b/gmp/mpn/cray/cfp/submul_1.c
index b44c97df45..0507d0ef2e 100644
--- a/gmp/mpn/cray/cfp/submul_1.c
+++ b/gmp/mpn/cray/cfp/submul_1.c
@@ -5,28 +5,17 @@ Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
diff --git a/gmp/mpn/cray/gmp-mparam.h b/gmp/mpn/cray/gmp-mparam.h
index ea8c25b32e..72dcb627da 100644
--- a/gmp/mpn/cray/gmp-mparam.h
+++ b/gmp/mpn/cray/gmp-mparam.h
@@ -1,35 +1,25 @@
 /* Cray T90 CFP gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1996, 2000-2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 #if 0
 #define UMUL_TIME	 519
@@ -40,11 +30,11 @@ see https://www.gnu.org/licenses/.  */
 
 /* Generated by tuneup.c, 2004-02-07, system compiler */
 
-#define MUL_TOOM22_THRESHOLD             71
-#define MUL_TOOM33_THRESHOLD            131
+#define MUL_KARATSUBA_THRESHOLD          71
+#define MUL_TOOM3_THRESHOLD             131
 
 #define SQR_BASECASE_THRESHOLD           32
-#define SQR_TOOM2_THRESHOLD             199
+#define SQR_KARATSUBA_THRESHOLD         199
 #define SQR_TOOM3_THRESHOLD             363
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* (preinv always) */
diff --git a/gmp/mpn/cray/hamdist.c b/gmp/mpn/cray/hamdist.c
index 8eb9ba018c..d80b4d6324 100644
--- a/gmp/mpn/cray/hamdist.c
+++ b/gmp/mpn/cray/hamdist.c
@@ -5,28 +5,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <intrinsics.h>
 #include "gmp.h"
diff --git a/gmp/mpn/cray/ieee/addmul_1.c b/gmp/mpn/cray/ieee/addmul_1.c
index 6318b7c9c2..158a79cba8 100644
--- a/gmp/mpn/cray/ieee/addmul_1.c
+++ b/gmp/mpn/cray/ieee/addmul_1.c
@@ -1,33 +1,22 @@
 /* Cray PVP/IEEE mpn_addmul_1 -- multiply a limb vector with a limb and add the
    result to a second limb vector.
 
-Copyright 2000-2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
    mainly due to vector register shortage in the main loop.  Assembly code
diff --git a/gmp/mpn/cray/ieee/gmp-mparam.h b/gmp/mpn/cray/ieee/gmp-mparam.h
index 1fdc286574..03d655c814 100644
--- a/gmp/mpn/cray/ieee/gmp-mparam.h
+++ b/gmp/mpn/cray/ieee/gmp-mparam.h
@@ -1,44 +1,33 @@
 /* Cray T90 IEEE gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1996, 2000-2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 /* Generated by tuneup.c, 2004-02-07, system compiler */
 
-#define MUL_TOOM22_THRESHOLD            130
-#define MUL_TOOM33_THRESHOLD            260
+#define MUL_KARATSUBA_THRESHOLD         130
+#define MUL_TOOM3_THRESHOLD             260
 
 #define SQR_BASECASE_THRESHOLD            9  /* karatsuba */
-#define SQR_TOOM2_THRESHOLD               0  /* never sqr_basecase */
+#define SQR_KARATSUBA_THRESHOLD           0  /* never sqr_basecase */
 #define SQR_TOOM3_THRESHOLD              34
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
diff --git a/gmp/mpn/cray/ieee/invert_limb.c b/gmp/mpn/cray/ieee/invert_limb.c
index f951a6e138..e3484a9153 100644
--- a/gmp/mpn/cray/ieee/invert_limb.c
+++ b/gmp/mpn/cray/ieee/invert_limb.c
@@ -5,28 +5,17 @@ Copyright 1991, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -84,14 +73,14 @@ mpn_invert_limb (mp_limb_t d)
   mp_limb_t xh, xl;
   mp_limb_t zh, zl;
 
-#if GMP_LIMB_BITS == 32
+#if BITS_PER_MP_LIMB == 32
   z = approx_tab[(d >> 23) - 0x100] << 6;	/* z < 2^16 */
 
   z2l = z * z;					/* z2l < 2^32 */
   umul_ppmm (th, tl, z2l, d);
   z = (z << 17) - (th << 1);
 #endif
-#if GMP_LIMB_BITS == 64
+#if BITS_PER_MP_LIMB == 64
   z = approx_tab[(d >> 55) - 0x100] << 6;	/* z < 2^16 */
 
   z2l = z * z;					/* z2l < 2^32 */
@@ -108,7 +97,7 @@ mpn_invert_limb (mp_limb_t d)
   umul_ppmm (xh, xl, z2l, d);
   tl += xh;
   th += tl < xh;
-  th = (th << 2) | (tl >> GMP_LIMB_BITS - 2);
+  th = (th << 2) | (tl >> BITS_PER_MP_LIMB - 2);
   tl = tl << 2;
   sub_ddmmss (zh, zl, z << 2, 0, th, tl);
 
diff --git a/gmp/mpn/cray/ieee/mul_1.c b/gmp/mpn/cray/ieee/mul_1.c
index dad09fa8cf..4dc2fd9dec 100644
--- a/gmp/mpn/cray/ieee/mul_1.c
+++ b/gmp/mpn/cray/ieee/mul_1.c
@@ -6,28 +6,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* This code runs at 5 cycles/limb on a T90.  That would probably
    be hard to improve upon, even with assembly code.  */
diff --git a/gmp/mpn/cray/ieee/mul_basecase.c b/gmp/mpn/cray/ieee/mul_basecase.c
index 6dc845dd99..ea32db312e 100644
--- a/gmp/mpn/cray/ieee/mul_basecase.c
+++ b/gmp/mpn/cray/ieee/mul_basecase.c
@@ -5,28 +5,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* The most critical loop of this code runs at about 5 cycles/limb on a T90.
    That is not perfect, mainly due to vector register shortage.  */
diff --git a/gmp/mpn/cray/ieee/sqr_basecase.c b/gmp/mpn/cray/ieee/sqr_basecase.c
index 840d3dd260..92a9a0e14f 100644
--- a/gmp/mpn/cray/ieee/sqr_basecase.c
+++ b/gmp/mpn/cray/ieee/sqr_basecase.c
@@ -5,28 +5,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* This is just mpn_mul_basecase with trivial modifications.  */
 
diff --git a/gmp/mpn/cray/ieee/submul_1.c b/gmp/mpn/cray/ieee/submul_1.c
index 27a1939019..4d7a6b47cd 100644
--- a/gmp/mpn/cray/ieee/submul_1.c
+++ b/gmp/mpn/cray/ieee/submul_1.c
@@ -1,33 +1,22 @@
 /* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and
    subtract the result from a second limb vector.
 
-Copyright 2000-2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* This code runs at just under 9 cycles/limb on a T90.  That is not perfect,
    mainly due to vector register shortage in the main loop.  Assembly code
diff --git a/gmp/mpn/cray/lshift.c b/gmp/mpn/cray/lshift.c
index 074f38041a..64302e9d39 100644
--- a/gmp/mpn/cray/lshift.c
+++ b/gmp/mpn/cray/lshift.c
@@ -5,28 +5,17 @@ Copyright (C) 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <intrinsics.h>
 #include "gmp.h"
@@ -40,7 +29,7 @@ mpn_lshift (mp_ptr wp, mp_srcptr up, mp_size_t n, unsigned int cnt)
   mp_limb_t retval;
 
   sh_1 = cnt;
-  sh_2 = GMP_LIMB_BITS - sh_1;
+  sh_2 = BITS_PER_MP_LIMB - sh_1;
   retval = up[n - 1] >> sh_2;
 
 #pragma _CRI ivdep
diff --git a/gmp/mpn/cray/mulww.f b/gmp/mpn/cray/mulww.f
index 9bddf05bc9..e0bf96e441 100644
--- a/gmp/mpn/cray/mulww.f
+++ b/gmp/mpn/cray/mulww.f
@@ -1,36 +1,25 @@
-c    Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
-
-c    Copyright 1996, 2000 Free Software Foundation, Inc.
-
-c    This file is part of the GNU MP Library.
-c  
-c    The GNU MP Library is free software; you can redistribute it and/or modify
-c    it under the terms of either:
-c  
-c      * the GNU Lesser General Public License as published by the Free
-c        Software Foundation; either version 3 of the License, or (at your
-c        option) any later version.
-c  
-c    or
-c  
-c      * the GNU General Public License as published by the Free Software
-c        Foundation; either version 2 of the License, or (at your option) any
-c        later version.
-c  
-c    or both in parallel, as here.
-c  
-c    The GNU MP Library is distributed in the hope that it will be useful, but
-c    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-c    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-c    for more details.
-c  
-c    You should have received copies of the GNU General Public License and the
-c    GNU Lesser General Public License along with the GNU MP Library.  If not,
-c    see https://www.gnu.org/licenses/.
-
-c    p1[] = hi(a[]*s); the upper limbs of each product
-c    p0[] = low(a[]*s); the corresponding lower limbs
-c    n is number of limbs in the vectors
+c     Helper for mpn_mul_1, mpn_addmul_1, and mpn_submul_1 for Cray PVP.
+
+c     Copyright 1996, 2000 Free Software Foundation, Inc.
+
+c     This file is part of the GNU MP Library.
+
+c     The GNU MP Library is free software; you can redistribute it and/or
+c     modify it under the terms of the GNU Lesser General Public License as
+c     published by the Free Software Foundation; either version 3 of the
+c     License, or (at your option) any later version.
+
+c     The GNU MP Library is distributed in the hope that it will be useful,
+c     but WITHOUT ANY WARRANTY; without even the implied warranty of
+c     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+c     Lesser General Public License for more details.
+
+c     You should have received a copy of the GNU Lesser General Public License
+c     along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+c     p1[] = hi(a[]*s); the upper limbs of each product
+c     p0[] = low(a[]*s); the corresponding lower limbs
+c     n is number of limbs in the vectors
 
       subroutine gmpn_mulww(p1,p0,a,n,s)
       integer*8 p1(0:*),p0(0:*),a(0:*),s
diff --git a/gmp/mpn/cray/popcount.c b/gmp/mpn/cray/popcount.c
index 48ddab875e..3abdce85cc 100644
--- a/gmp/mpn/cray/popcount.c
+++ b/gmp/mpn/cray/popcount.c
@@ -5,28 +5,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <intrinsics.h>
 #include "gmp.h"
diff --git a/gmp/mpn/cray/rshift.c b/gmp/mpn/cray/rshift.c
index 424bede9db..6280d2ca7e 100644
--- a/gmp/mpn/cray/rshift.c
+++ b/gmp/mpn/cray/rshift.c
@@ -5,28 +5,17 @@ Copyright (C) 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <intrinsics.h>
 #include "gmp.h"
@@ -40,7 +29,7 @@ mpn_rshift (mp_ptr wp, mp_srcptr up, mp_size_t n, unsigned int cnt)
   mp_limb_t retval;
 
   sh_1 = cnt;
-  sh_2 = GMP_LIMB_BITS - sh_1;
+  sh_2 = BITS_PER_MP_LIMB - sh_1;
   retval = up[0] << sh_2;
 
 #pragma _CRI ivdep
diff --git a/gmp/mpn/cray/sub_n.c b/gmp/mpn/cray/sub_n.c
index 0cc9ad1e04..90a5f1b1e8 100644
--- a/gmp/mpn/cray/sub_n.c
+++ b/gmp/mpn/cray/sub_n.c
@@ -6,28 +6,17 @@ Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* This code runs at 4 cycles/limb.  It may be possible to bring it down
    to 3 cycles/limb.  */
diff --git a/gmp/mpn/generic/add.c b/gmp/mpn/generic/add.c
index 559f26133c..8065ccf3c2 100644
--- a/gmp/mpn/generic/add.c
+++ b/gmp/mpn/generic/add.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_add 1
 
diff --git a/gmp/mpn/generic/add_1.c b/gmp/mpn/generic/add_1.c
index ca2d866852..2d3fa76c2e 100644
--- a/gmp/mpn/generic/add_1.c
+++ b/gmp/mpn/generic/add_1.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_add_1 1
 
diff --git a/gmp/mpn/generic/add_err1_n.c b/gmp/mpn/generic/add_err1_n.c
deleted file mode 100644
index b8cb75f6e8..0000000000
--- a/gmp/mpn/generic/add_err1_n.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* mpn_add_err1_n -- add_n with one error term
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
-  return value is carry out.
-
-  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
-  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-		mp_ptr ep, mp_srcptr yp,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
-
-  yp += n - 1;
-  el = eh = 0;
-
-  do
-    {
-      yl = *yp--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary add_n */
-      ADDC_LIMB (cy1, sl, ul, vl);
-      ADDC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh:el) */
-      zl = (-cy) & yl;
-      el += zl;
-      eh += el < zl;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
-  el &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el;
-  ep[1] = eh;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/add_err2_n.c b/gmp/mpn/generic/add_err2_n.c
deleted file mode 100644
index 4b0242a32d..0000000000
--- a/gmp/mpn/generic/add_err2_n.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/* mpn_add_err2_n -- add_n with two error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
-  return value is carry out.
-
-  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-  stores two-limb results at {ep,2} and {ep+2,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary add_n */
-      ADDC_LIMB (cy1, sl, ul, vl);
-      ADDC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/add_err3_n.c b/gmp/mpn/generic/add_err3_n.c
deleted file mode 100644
index 28cd7facf9..0000000000
--- a/gmp/mpn/generic/add_err3_n.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* mpn_add_err3_n -- add_n with three error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
-  return value is carry out.
-
-  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
-  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  yp3 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-  el3 = eh3 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      yl3 = *yp3--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary add_n */
-      ADDC_LIMB (cy1, sl, ul, vl);
-      ADDC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-
-      /* update (eh3:el3) */
-      zl3 = (-cy) & yl3;
-      el3 += zl3;
-      eh3 += el3 < zl3;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
-  el3 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-  ep[4] = el3;
-  ep[5] = eh3;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/add_n.c b/gmp/mpn/generic/add_n.c
index 1a07670900..5006e27780 100644
--- a/gmp/mpn/generic/add_n.c
+++ b/gmp/mpn/generic/add_n.c
@@ -1,32 +1,21 @@
 /* mpn_add_n -- Add equal length limb vectors.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,8 +29,8 @@ mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
@@ -70,8 +59,8 @@ mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, rl, cy;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
diff --git a/gmp/mpn/generic/addmul_1.c b/gmp/mpn/generic/addmul_1.c
index d76b4ad135..861e1bc830 100644
--- a/gmp/mpn/generic/addmul_1.c
+++ b/gmp/mpn/generic/addmul_1.c
@@ -3,33 +3,23 @@
    pointed to by RP.  Return the most significant limb of the product,
    adjusted for carry-out from the addition.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/add_n_sub_n.c b/gmp/mpn/generic/addsub_n.c
index 012eb3e33a..452cf7b211 100644
--- a/gmp/mpn/generic/add_n_sub_n.c
+++ b/gmp/mpn/generic/addsub_n.c
@@ -1,36 +1,25 @@
-/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+/* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 1999-2001, 2006 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -39,16 +28,16 @@ see https://www.gnu.org/licenses/.  */
 #define L1_CACHE_SIZE 8192	/* only 68040 has less than this */
 #endif
 
-#define PART_SIZE (L1_CACHE_SIZE / GMP_LIMB_BYTES / 6)
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
 
 
-/* mpn_add_n_sub_n.
+/* mpn_addsub_n.
    r1[] = s1[] + s2[]
    r2[] = s1[] - s2[]
    All operands have n limbs.
    In-place operations allowed.  */
 mp_limb_t
-mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
 {
   mp_limb_t acyn, acyo;		/* carry for add */
   mp_limb_t scyn, scyo;		/* carry for subtract */
@@ -153,19 +142,19 @@ main (int argc, char **argv)
 
   n = strtol (argv[1], 0, 0);
 
-  r1p = malloc (n * GMP_LIMB_BYTES);
-  r2p = malloc (n * GMP_LIMB_BYTES);
-  s1p = malloc (n * GMP_LIMB_BYTES);
-  s2p = malloc (n * GMP_LIMB_BYTES);
+  r1p = malloc (n * BYTES_PER_MP_LIMB);
+  r2p = malloc (n * BYTES_PER_MP_LIMB);
+  s1p = malloc (n * BYTES_PER_MP_LIMB);
+  s2p = malloc (n * BYTES_PER_MP_LIMB);
   TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
   printf ("              separate add and sub: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
   printf ("combined addsub separate variables: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
   printf ("        combined addsub r1 overlap: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
   printf ("        combined addsub r2 overlap: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
   printf ("          combined addsub in-place: %.3f\n", t);
 
   return 0;
diff --git a/gmp/mpn/generic/bdiv_dbm1c.c b/gmp/mpn/generic/bdiv_dbm1c.c
index 22c3cfd2c8..23cb6f1c9e 100644
--- a/gmp/mpn/generic/bdiv_dbm1c.c
+++ b/gmp/mpn/generic/bdiv_dbm1c.c
@@ -10,28 +10,17 @@ Copyright 2008, 2009 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/bdiv_q.c b/gmp/mpn/generic/bdiv_q.c
deleted file mode 100644
index 1fc1bb7c09..0000000000
--- a/gmp/mpn/generic/bdiv_q.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/* mpn_bdiv_q -- Hensel division with precomputed inverse, returning quotient.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Computes Q = N / D mod B^n. */
-
-void
-mpn_bdiv_q (mp_ptr qp,
-	    mp_srcptr np, mp_size_t nn,
-	    mp_srcptr dp, mp_size_t dn,
-	    mp_ptr tp)
-{
-  mp_limb_t di;
-
-  if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
-    }
-  else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
-    }
-  else
-    {
-      mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
-    }
-  return;
-}
-
-mp_size_t
-mpn_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
-{
-  if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
-    return nn;
-  else
-    return mpn_mu_bdiv_q_itch (nn, dn);
-}
diff --git a/gmp/mpn/generic/bdiv_q_1.c b/gmp/mpn/generic/bdiv_q_1.c
deleted file mode 100644
index 74b247d5a9..0000000000
--- a/gmp/mpn/generic/bdiv_q_1.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/* mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 1-limb
-   divisor, returning quotient only.
-
-   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
-   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
-   FUTURE GNU MP RELEASES.
-
-Copyright 2000-2003, 2005, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_pi1_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d,
-		  mp_limb_t di, int shift)
-{
-  mp_size_t  i;
-  mp_limb_t  c, h, l, u, u_next, dummy;
-
-  ASSERT (n >= 1);
-  ASSERT (d != 0);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT_MPN (up, n);
-  ASSERT_LIMB (d);
-
-  d <<= GMP_NAIL_BITS;
-
-  if (shift != 0)
-    {
-      c = 0;
-
-      u = up[0];
-      rp--;
-      for (i = 1; i < n; i++)
-	{
-	  u_next = up[i];
-	  u = ((u >> shift) | (u_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
-
-	  SUBC_LIMB (c, l, u, c);
-
-	  l = (l * di) & GMP_NUMB_MASK;
-	  rp[i] = l;
-
-	  umul_ppmm (h, dummy, l, d);
-	  c += h;
-	  u = u_next;
-	}
-
-      u = u >> shift;
-      l = u - c;
-      l = (l * di) & GMP_NUMB_MASK;
-      rp[i] = l;
-    }
-  else
-    {
-      u = up[0];
-      l = (u * di) & GMP_NUMB_MASK;
-      rp[0] = l;
-      c = 0;
-
-      for (i = 1; i < n; i++)
-	{
-	  umul_ppmm (h, dummy, l, d);
-	  c += h;
-
-	  u = up[i];
-	  SUBC_LIMB (c, l, u, c);
-
-	  l = (l * di) & GMP_NUMB_MASK;
-	  rp[i] = l;
-	}
-    }
-
-  return c;
-}
-
-mp_limb_t
-mpn_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d)
-{
-  mp_limb_t di;
-  int shift;
-
-  ASSERT (n >= 1);
-  ASSERT (d != 0);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT_MPN (up, n);
-  ASSERT_LIMB (d);
-
-  if ((d & 1) == 0)
-    {
-      count_trailing_zeros (shift, d);
-      d >>= shift;
-    }
-  else
-    shift = 0;
-
-  binvert_limb (di, d);
-  return mpn_pi1_bdiv_q_1 (rp, up, n, d, di, shift);
-}
diff --git a/gmp/mpn/generic/bdiv_qr.c b/gmp/mpn/generic/bdiv_qr.c
deleted file mode 100644
index 6a5eedbbc2..0000000000
--- a/gmp/mpn/generic/bdiv_qr.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* mpn_bdiv_qr -- Hensel division with precomputed inverse, returning quotient
-   and remainder.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Computes Q = N / D mod B^n,
-	    R = N - QD.  */
-
-mp_limb_t
-mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
-	     mp_srcptr np, mp_size_t nn,
-	     mp_srcptr dp, mp_size_t dn,
-	     mp_ptr tp)
-{
-  mp_limb_t di;
-  mp_limb_t rh;
-
-  ASSERT (nn > dn);
-  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
-      BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
-      MPN_COPY (rp, tp + nn - dn, dn);
-    }
-  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
-      MPN_COPY (rp, tp + nn - dn, dn);
-    }
-  else
-    {
-      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
-    }
-
-  return rh;
-}
-
-mp_size_t
-mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
-{
-  if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
-    return nn;
-  else
-    return  mpn_mu_bdiv_qr_itch (nn, dn);
-}
diff --git a/gmp/mpn/generic/bdivmod.c b/gmp/mpn/generic/bdivmod.c
new file mode 100644
index 0000000000..783b594082
--- /dev/null
+++ b/gmp/mpn/generic/bdivmod.c
@@ -0,0 +1,124 @@
+/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
+
+   Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
+   returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
+
+   Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up.  Since the
+   low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
+   the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
+
+   Preconditions:
+   1.  V is odd.
+   2.  usize * BITS_PER_MP_LIMB >= d.
+   3.  If Q and U overlap, qp <= up.
+
+   Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+   Funding for this work has been partially provided by Conselho Nacional
+   de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+   301314194-2, and was done while I was a visiting reseacher in the Instituto
+   de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+   References:
+       T. Jebelean, An algorithm for exact division, Journal of Symbolic
+       Computation, v. 15, 1993, pp. 169-180.
+
+       K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
+       Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
+	     mp_srcptr vp, mp_size_t vsize, unsigned long int d)
+{
+  mp_limb_t v_inv;
+
+  ASSERT (usize >= 1);
+  ASSERT (vsize >= 1);
+  ASSERT (usize * GMP_NUMB_BITS >= d);
+  ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
+  ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
+  ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
+  ASSERT_MPN (up, usize);
+  ASSERT_MPN (vp, vsize);
+
+  /* 1/V mod 2^GMP_NUMB_BITS. */
+  binvert_limb (v_inv, vp[0]);
+
+  /* Fast code for two cases previously used by the accel part of mpn_gcd.
+     (Could probably remove this now it's inlined there.) */
+  if (usize == 2 && vsize == 2 &&
+      (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
+    {
+      mp_limb_t hi, lo;
+      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
+      umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
+      up[0] = 0;
+      up[1] -= hi + q*vp[1];
+      qp[0] = q;
+      if (d == 2*GMP_NUMB_BITS)
+        {
+          q = (up[1] * v_inv) & GMP_NUMB_MASK;
+          up[1] = 0;
+          qp[1] = q;
+        }
+      return 0;
+    }
+
+  /* Main loop.  */
+  while (d >= GMP_NUMB_BITS)
+    {
+      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
+      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+      if (usize > vsize)
+	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+      d -= GMP_NUMB_BITS;
+      up += 1, usize -= 1;
+      *qp++ = q;
+    }
+
+  if (d)
+    {
+      mp_limb_t b;
+      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
+      if (q <= 1)
+	{
+	  if (q == 0)
+	    return 0;
+	  else
+	    b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
+	}
+      else
+	b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+
+      if (usize > vsize)
+	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+      return q;
+    }
+
+  return 0;
+}
diff --git a/gmp/mpn/generic/binvert.c b/gmp/mpn/generic/binvert.c
index be27ea552e..24d4dcdb6f 100644
--- a/gmp/mpn/generic/binvert.c
+++ b/gmp/mpn/generic/binvert.c
@@ -1,38 +1,28 @@
-/* Compute {up,n}^(-1) mod B^n.
+/* Compute {up,n}^(-1) mod 2(n*GMP_NUMB_BITS).
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright (C) 2004-2007, 2009, 2012 Free Software Foundation, Inc.
+Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -43,6 +33,14 @@ see https://www.gnu.org/licenses/.  */
   r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
 */
 
+/* This is intended for constant THRESHOLDs only, where the compiler can
+   completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
+
 #if TUNE_PROGRAM_BUILD
 #define NPOWS \
  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
@@ -54,9 +52,12 @@ see https://www.gnu.org/licenses/.  */
 mp_size_t
 mpn_binvert_itch (mp_size_t n)
 {
-  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
-  return itch_local + itch_out;
+#if WANT_FFT
+  if (ABOVE_THRESHOLD (n, 2 * MUL_FFT_MODF_THRESHOLD))
+    return mpn_fft_next_size (n, mpn_fft_best_k (n, 0));
+  else
+#endif
+    return 3 * (n - (n >> 1));
 }
 
 void
@@ -75,28 +76,42 @@ mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
 
   xp = scratch;
 
-  /* Compute a base value of rn limbs.  */
+  /* Compute a base value using a low-overhead O(n^2) algorithm.  FIXME: We
+     should call some divide-and-conquer lsb division function here for an
+     operand subrange.  */
   MPN_ZERO (xp, rn);
   xp[0] = 1;
   binvert_limb (di, up[0]);
   if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
-    mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+    mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di);
   else
-    mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+    mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di);
 
   /* Use Newton iterations to get the desired precision.  */
   for (; rn < n; rn = newrn)
     {
-      mp_size_t m;
       newrn = *--sizp;
 
-      /* X <- UR. */
-      m = mpn_mulmod_bnm1_next_size (newrn);
-      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
-      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);
-
-      /* R = R(X/B^rn) */
-      mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
-      mpn_neg (rp + rn, rp + rn, newrn - rn);
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD))
+	{
+	  int k;
+	  mp_size_t m, i;
+
+	  k = mpn_fft_best_k (newrn, 0);
+	  m = mpn_fft_next_size (newrn, k);
+	  mpn_mul_fft (xp, m, up, newrn, rp, rn, k);
+	  for (i = rn - 1; i >= 0; i--)
+	    if (xp[i] > (i == 0))
+	      {
+		mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1);
+		break;
+	      }
+	}
+      else
+#endif
+	mpn_mul (xp, up, newrn, rp, rn);
+      mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);
+      mpn_neg_n (rp + rn, rp + rn, newrn - rn);
     }
 }
diff --git a/gmp/mpn/generic/broot.c b/gmp/mpn/generic/broot.c
deleted file mode 100644
index 6974ac8b9e..0000000000
--- a/gmp/mpn/generic/broot.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* mpn_broot -- Compute hensel sqrt
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
-   typical use will have e small. */
-static mp_limb_t
-powlimb (mp_limb_t a, mp_limb_t e)
-{
-  mp_limb_t r = 1;
-  mp_limb_t s = a;
-
-  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
-    if (e & 1)
-      r *= s;
-
-  return r;
-}
-
-/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
-
-   Iterates
-
-     r' <-- r - r * (a^{k-1} r^k - 1) / n
-
-   If
-
-     a^{k-1} r^k = 1 (mod 2^m),
-
-   then
-
-     a^{k-1} r'^k = 1 (mod 2^{2m}),
-
-   Compute the update term as
-
-     r' = r - (a^{k-1} r^{k+1} - r) / k
-
-   where we still have cancellation of low limbs.
-
- */
-void
-mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
-{
-  mp_size_t sizes[GMP_LIMB_BITS * 2];
-  mp_ptr akm1, tp, rnp, ep;
-  mp_limb_t a0, r0, km1, kp1h, kinv;
-  mp_size_t rn;
-  unsigned i;
-
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT (ap[0] & 1);
-  ASSERT (k & 1);
-  ASSERT (k >= 3);
-
-  TMP_MARK;
-
-  akm1 = TMP_ALLOC_LIMBS (4*n);
-  tp = akm1 + n;
-
-  km1 = k-1;
-  /* FIXME: Could arrange the iteration so we don't need to compute
-     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
-     that we can use wraparound also for a*r, since the low half is
-     unchanged from the previous iteration. Or possibly mulmid. Also,
-     a r = a^{1/k}, so we get that value too, for free? */
-  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
-
-  a0 = ap[0];
-  binvert_limb (kinv, k);
-
-  /* 4 bits: a^{1/k - 1} (mod 16):
-
-	a % 8
-	1 3 5 7
-   k%4 +-------
-     1 |1 1 1 1
-     3 |1 9 9 1
-  */
-  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
-  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
-  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
-  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
-#if GMP_NUMB_BITS > 32
-  {
-    unsigned prec = 32;
-    do
-      {
-	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
-	prec *= 2;
-      }
-    while (prec < GMP_NUMB_BITS);
-  }
-#endif
-
-  rp[0] = r0;
-  if (n == 1)
-    {
-      TMP_FREE;
-      return;
-    }
-
-  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
-  kp1h = k/2 + 1;
-
-  /* FIXME: Special case for two limb iteration. */
-  rnp = TMP_ALLOC_LIMBS (2*n + 1);
-  ep = rnp + n;
-
-  /* FIXME: Possible to this on the fly with some bit fiddling. */
-  for (i = 0; n > 1; n = (n + 1)/2)
-    sizes[i++] = n;
-
-  rn = 1;
-
-  while (i-- > 0)
-    {
-      /* Compute x^{k+1}. */
-      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
-			       final iteration. */
-      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
-
-      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
-
-      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
-      ASSERT (mpn_cmp (ep, rp, rn) == 0);
-
-      ASSERT (sizes[i] <= 2*rn);
-      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
-      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
-      rn = sizes[i];
-    }
-  TMP_FREE;
-}
-
-/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
-void
-mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
-{
-  mp_ptr tp;
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT (ap[0] & 1);
-  ASSERT (k & 1);
-
-  if (k == 1)
-    {
-      MPN_COPY (rp, ap, n);
-      return;
-    }
-
-  TMP_MARK;
-  tp = TMP_ALLOC_LIMBS (n);
-
-  mpn_broot_invm1 (tp, ap, n, k);
-  mpn_mullo_n (rp, tp, ap, n);
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/brootinv.c b/gmp/mpn/generic/brootinv.c
deleted file mode 100644
index b96c97f1d3..0000000000
--- a/gmp/mpn/generic/brootinv.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
-
-   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
-
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
-   typical use will have e small. */
-static mp_limb_t
-powlimb (mp_limb_t a, mp_limb_t e)
-{
-  mp_limb_t r;
-
-  for (r = 1; e > 0; e >>= 1, a *= a)
-    if (e & 1)
-      r *= a;
-
-  return r;
-}
-
-/* Compute r such that r^k * y = 1 (mod B^n).
-
-   Iterates
-     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
-   using Hensel lifting, each time doubling the number of known bits in r.
-
-   Works just for odd k.  Else the Hensel lifting degenerates.
-
-   FIXME:
-
-     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
-
-     (2) Rewrite iteration as
-	   r' <-- r - k^{-1} r (r^k y - 1)
-	 and take advantage of the zero low part of r^k y - 1.
-
-     (3) Use wrap-around trick.
-
-     (4) Use a small table to get starting value.
-
-   Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
-*/
-
-void
-mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
-{
-  mp_ptr tp2, tp3;
-  mp_limb_t kinv, k2, r0, y0;
-  mp_size_t order[GMP_LIMB_BITS + 1];
-  int i, d;
-
-  ASSERT (bn > 0);
-  ASSERT ((k & 1) != 0);
-
-  tp2 = tp + bn;
-  tp3 = tp + 2 * bn;
-  k2 = k + 1;
-
-  binvert_limb (kinv, k);
-
-  /* 4-bit initial approximation:
-
-   y%16 | 1  3  5  7  9 11 13 15,
-    k%4 +-------------------------+k2%4
-     1  | 1 11 13  7  9  3  5 15  |  2
-     3  | 1  3  5  7  9 11 13 15  |  0
-
-  */
-  y0 = yp[0];
-
-  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 2) & 8);		/* 4 bits */
-  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));		/* 8 bits */
-  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7fff));	/* 16 bits */
-#if GMP_NUMB_BITS > 16
-  {
-    unsigned prec = 16;
-    do
-      {
-	r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
-	prec *= 2;
-      }
-    while (prec < GMP_NUMB_BITS);
-  }
-#endif
-
-  rp[0] = r0;
-  if (bn == 1)
-    return;
-
-  /* This initialization doesn't matter for the result (any garbage is
-     cancelled in the iteration), but proper initialization makes
-     valgrind happier. */
-  MPN_ZERO (rp+1, bn-1);
-
-  d = 0;
-  for (; bn > 1; bn = (bn + 1) >> 1)
-    order[d++] = bn;
-
-  for (i = d - 1; i >= 0; i--)
-    {
-      bn = order[i];
-
-      mpn_mul_1 (tp, rp, bn, k2);
-
-      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
-      mpn_mullo_n (rp, yp, tp2, bn);
-
-      mpn_sub_n (tp2, tp, rp, bn);
-      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
-    }
-}
diff --git a/gmp/mpn/generic/bsqrt.c b/gmp/mpn/generic/bsqrt.c
deleted file mode 100644
index 18ba26f440..0000000000
--- a/gmp/mpn/generic/bsqrt.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* mpn_bsqrt, a^{1/2} (mod 2^n).
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-void
-mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
-{
-  mp_ptr sp;
-  mp_size_t n;
-
-  ASSERT (nb > 0);
-
-  n = nb / GMP_NUMB_BITS;
-  sp = tp + n;
-
-  mpn_bsqrtinv (sp, ap, nb, tp);
-  mpn_mullo_n (rp, sp, ap, n);
-}
diff --git a/gmp/mpn/generic/bsqrtinv.c b/gmp/mpn/generic/bsqrtinv.c
deleted file mode 100644
index 33df6a3c15..0000000000
--- a/gmp/mpn/generic/bsqrtinv.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
-
-   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
-   Return non-zero if such an integer r exists.
-
-   Iterates
-     r' <-- (3r - r^3 y) / 2
-   using Hensel lifting.  Since we divide by two, the Hensel lifting is
-   somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
-
-   FIXME:
-     (1) Simplify to do precision book-keeping in limbs rather than bits.
-
-     (2) Rewrite iteration as
-	   r' <-- r - r (r^2 y - 1) / 2
-	 and take advantage of zero low part of r^2 y - 1.
-
-     (3) Use wrap-around trick.
-
-     (4) Use a small table to get starting value.
-*/
-int
-mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
-{
-  mp_ptr tp2, tp3;
-  mp_limb_t k;
-  mp_size_t bn, order[GMP_LIMB_BITS + 1];
-  int i, d;
-
-  ASSERT (bnb > 0);
-
-  bn = 1 + bnb / GMP_LIMB_BITS;
-
-  tp2 = tp + bn;
-  tp3 = tp + 2 * bn;
-  k = 3;
-
-  rp[0] = 1;
-  if (bnb == 1)
-    {
-      if ((yp[0] & 3) != 1)
-	return 0;
-    }
-  else
-    {
-      if ((yp[0] & 7) != 1)
-	return 0;
-
-      d = 0;
-      for (; bnb != 2; bnb = (bnb + 2) >> 1)
-	order[d++] = bnb;
-
-      for (i = d - 1; i >= 0; i--)
-	{
-	  bnb = order[i];
-	  bn = 1 + bnb / GMP_LIMB_BITS;
-
-	  mpn_mul_1 (tp, rp, bn, k);
-
-	  mpn_powlo (tp2, rp, &k, 1, bn, tp3);
-	  mpn_mullo_n (rp, yp, tp2, bn);
-
-#if HAVE_NATIVE_mpn_rsh1sub_n
-	  mpn_rsh1sub_n (rp, tp, rp, bn);
-#else
-	  mpn_sub_n (tp2, tp, rp, bn);
-	  mpn_rshift (rp, tp2, bn, 1);
-#endif
-	}
-    }
-  return 1;
-}
diff --git a/gmp/mpn/generic/cmp.c b/gmp/mpn/generic/cmp.c
index 18c7b42844..d352076599 100644
--- a/gmp/mpn/generic/cmp.c
+++ b/gmp/mpn/generic/cmp.c
@@ -5,28 +5,17 @@ Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_cmp 1
 
diff --git a/gmp/mpn/generic/cnd_add_n.c b/gmp/mpn/generic/cnd_add_n.c
deleted file mode 100644
index 443f9858da..0000000000
--- a/gmp/mpn/generic/cnd_add_n.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* mpn_cnd_add_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
-   Both cases should take the same time and perform the exact same memory
-   accesses, since this function is intended to be used where side-channel
-   attack resilience is relevant.
-
-Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-mp_limb_t
-mpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-
-  mask = -(mp_limb_t) (cnd != 0);
-  cy = 0;
-  do
-    {
-      ul = *up++;
-      vl = *vp++ & mask;
-#if GMP_NAIL_BITS == 0
-      sl = ul + vl;
-      cy1 = sl < ul;
-      rl = sl + cy;
-      cy2 = rl < sl;
-      cy = cy1 | cy2;
-      *rp++ = rl;
-#else
-      rl = ul + vl;
-      rl += cy;
-      cy = rl >> GMP_NUMB_BITS;
-      *rp++ = rl & GMP_NUMB_MASK;
-#endif
-    }
-  while (--n != 0);
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/cnd_sub_n.c b/gmp/mpn/generic/cnd_sub_n.c
deleted file mode 100644
index bd8e029a36..0000000000
--- a/gmp/mpn/generic/cnd_sub_n.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* mpn_cnd_sub_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
-   Both cases should take the same time and perform the exact same memory
-   accesses, since this function is intended to be used where side-channel
-   attack resilience is relevant.
-
-Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-mp_limb_t
-mpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-
-  mask = -(mp_limb_t) (cnd != 0);
-  cy = 0;
-  do
-    {
-      ul = *up++;
-      vl = *vp++ & mask;
-#if GMP_NAIL_BITS == 0
-      sl = ul - vl;
-      cy1 = sl > ul;
-      rl = sl - cy;
-      cy2 = rl > sl;
-      cy = cy1 | cy2;
-      *rp++ = rl;
-#else
-      rl = ul - vl;
-      rl -= cy;
-      cy = rl >> (GMP_LIMB_BITS - 1);
-      *rp++ = rl & GMP_NUMB_MASK;
-#endif
-    }
-  while (--n != 0);
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/com.c b/gmp/mpn/generic/com.c
deleted file mode 100644
index cd8551df5b..0000000000
--- a/gmp/mpn/generic/com.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* mpn_com - complement an mpn.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef mpn_com
-#define mpn_com __MPN(com)
-
-void
-mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
-  mp_limb_t ul;
-  do {
-      ul = *up++;
-      *rp++ = ~ul & GMP_NUMB_MASK;
-  } while (--n != 0);
-}
diff --git a/gmp/mpn/generic/comb_tables.c b/gmp/mpn/generic/comb_tables.c
deleted file mode 100644
index 41bcb5f879..0000000000
--- a/gmp/mpn/generic/comb_tables.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Const tables shared among combinatoric functions.
-
-   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
-   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
-   is an odd integer. */
-const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
-
-/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
-   is an odd integer. */
-const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
-
-/* Entry i contains 2i-popc(2i). */
-const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
-
-const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };
diff --git a/gmp/mpn/generic/copyd.c b/gmp/mpn/generic/copyd.c
deleted file mode 100644
index ba3380a82b..0000000000
--- a/gmp/mpn/generic/copyd.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn_copyd
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_copyd (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
-  mp_size_t i;
-
-  for (i = n - 1; i >= 0; i--)
-    rp[i] = up[i];
-}
diff --git a/gmp/mpn/generic/copyi.c b/gmp/mpn/generic/copyi.c
deleted file mode 100644
index 0c39b4534b..0000000000
--- a/gmp/mpn/generic/copyi.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* mpn_copyi
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_copyi (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
-  mp_size_t i;
-
-  up += n;
-  rp += n;
-  for (i = -n; i != 0; i++)
-    rp[i] = up[i];
-}
diff --git a/gmp/mpn/generic/dc_bdiv_q.c b/gmp/mpn/generic/dc_bdiv_q.c
new file mode 100644
index 0000000000..9a43d18b56
--- /dev/null
+++ b/gmp/mpn/generic/dc_bdiv_q.c
@@ -0,0 +1,137 @@
+/* mpn_dc_bdiv_q -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient.
+
+   Contributed to the GNU project by Niels M�ller and Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n, destroys N. */
+
+mp_size_t
+mpn_dc_bdiv_q_n_itch (mp_size_t n)
+{
+  /* NOTE: Depends om mullow_n interface */
+  return n;
+}
+
+void
+mpn_dc_bdiv_q_n (mp_ptr qp,
+		 mp_ptr np, mp_srcptr dp, mp_size_t n,
+		 mp_limb_t dinv, mp_ptr tp)
+{
+  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
+    {
+      mp_limb_t l, h;
+      mp_limb_t cy;
+
+      l = n >> 1;
+      h = n - l;
+
+      cy = mpn_dc_bdiv_qr_n (qp, np, dp, l, dinv, tp);
+
+      mpn_mullow_n (tp, qp, dp + h, l);
+      mpn_sub_n (np + h, np + h, tp, l);
+
+      if (l < h)
+	{
+	  cy += mpn_submul_1 (np + l, qp, l, dp[l]);
+	  np[n - 1] -= cy;
+	}
+      qp += l;
+      np += l;
+      n -= l;
+    }
+  mpn_sb_bdiv_q (qp, np, n, dp, n, dinv);
+}
+
+void
+mpn_dc_bdiv_q (mp_ptr qp,
+	       mp_ptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn in a super-efficient manner.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+	cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+	cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+	  else
+	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+	  mpn_incr_u (tp + qn, cy);
+
+	  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
+	  cy = 0;
+	}
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - qn;
+      while (qn > dn)
+	{
+	  mpn_sub_1 (np + dn, np + dn, qn, cy);
+	  cy = mpn_dc_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  qp += dn;
+	  np += dn;
+	  qn -= dn;
+	}
+      mpn_sub_1 (np + dn, np + dn, qn, cy);
+      mpn_dc_bdiv_q_n (qp, np, dp, dn, dinv, tp);
+      TMP_FREE;
+      return;
+    }
+
+  if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
+    mpn_sb_bdiv_q (qp, np, 2 * qn, dp, qn, dinv);
+  else
+    mpn_dc_bdiv_q_n (qp, np, dp, qn, dinv, tp);
+
+  TMP_FREE;
+}
diff --git a/gmp/mpn/generic/dcpi1_bdiv_qr.c b/gmp/mpn/generic/dc_bdiv_qr.c
index 8a251f8d9d..8b59bbd860 100644
--- a/gmp/mpn/generic/dcpi1_bdiv_qr.c
+++ b/gmp/mpn/generic/dc_bdiv_qr.c
@@ -1,39 +1,29 @@
-/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
+/* mpn_dc_bdiv_qr -- divide-and-conquer Hensel division with precomputed
    inverse, returning quotient and remainder.
 
-   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+   Contributed to the GNU project by Niels M�ller and Torbj�rn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -53,14 +43,14 @@ see https://www.gnu.org/licenses/.  */
    d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
 
 mp_size_t
-mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
+mpn_dc_bdiv_qr_n_itch (mp_size_t n)
 {
   return n;
 }
 
 mp_limb_t
-mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
-		     mp_limb_t dinv, mp_ptr tp)
+mpn_dc_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		  mp_limb_t dinv, mp_ptr tp)
 {
   mp_size_t lo, hi;
   mp_limb_t cy;
@@ -70,9 +60,9 @@ mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
   hi = n - lo;			/* ceil(n/2) */
 
   if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
-    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
+    cy = mpn_sb_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
   else
-    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+    cy = mpn_dc_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
 
   mpn_mul (tp, dp + lo, hi, qp, lo);
 
@@ -80,9 +70,9 @@ mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
   rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
 
   if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
-    cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
+    cy = mpn_sb_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
   else
-    cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
+    cy = mpn_dc_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
 
   mpn_mul (tp, qp + lo, hi, dp + hi, lo);
 
@@ -93,8 +83,8 @@ mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
 }
 
 mp_limb_t
-mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+mpn_dc_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
+		mp_limb_t dinv)
 {
   mp_size_t qn;
   mp_limb_t rr, cy;
@@ -103,10 +93,6 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
 
   TMP_MARK;
 
-  ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (dp[0] & 1);
-
   tp = TMP_SALLOC_LIMBS (dn);
 
   qn = nn - dn;
@@ -120,9 +106,9 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
 
       /* Perform the typically smaller block first.  */
       if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
-	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+	cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
       else
-	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+	cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
 
       rr = 0;
       if (qn != dn)
@@ -144,7 +130,7 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
       do
 	{
 	  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
-	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  cy = mpn_dc_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
 	  qp += dn;
 	  np += dn;
 	  qn -= dn;
@@ -155,9 +141,9 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
     }
 
   if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
-    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+    cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
   else
-    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+    cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
 
   rr = 0;
   if (qn != dn)
diff --git a/gmp/mpn/generic/dc_div_q.c b/gmp/mpn/generic/dc_div_q.c
new file mode 100644
index 0000000000..276ae4fba6
--- /dev/null
+++ b/gmp/mpn/generic/dc_div_q.c
@@ -0,0 +1,57 @@
+/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient only.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dc_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  mp_ptr tp, wp;
+  mp_limb_t qh;
+  mp_size_t qn;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (nn + 1);
+  MPN_COPY (tp + 1, np, nn);
+  tp[0] = 0;
+
+  qn = nn - dn;
+  wp = TMP_SALLOC_LIMBS (qn + 1);
+
+  qh = mpn_dc_divappr_q (wp, tp, nn + 1, dp, dn);
+
+  if (wp[0] == 0)
+    /* FIXME: Should multiply and subtract here, not recompute from scratch.  */
+    qh = mpn_dc_div_qr (qp, np, nn, dp, dn);
+  else
+    MPN_COPY (qp, wp + 1, qn);
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/dc_div_qr.c b/gmp/mpn/generic/dc_div_qr.c
new file mode 100644
index 0000000000..41a46f1516
--- /dev/null
+++ b/gmp/mpn/generic/dc_div_qr.c
@@ -0,0 +1,203 @@
+/* mpn_dc_div_qr -- recursive divide-and-conquer division for arbitrary size
+   operands.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dc_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		 mp_srcptr dip, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip);
+  else
+    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
+    ql = mpn_sb_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dip);
+  else
+    ql = mpn_dc_div_qr_n (qp, np + hi, dp + hi, lo, dip, tp);
+
+  mpn_mul (tp, dp, hi, qp, lo);
+
+  cy = mpn_sub_n (np, np, tp, n);
+  if (ql != 0)
+    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
+
+  while (cy != 0)
+    {
+      mpn_sub_1 (qp, qp, lo, 1);
+      cy -= mpn_add_n (np, np, dp, n);
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_dc_div_qr (mp_ptr qp,
+		      mp_ptr np, mp_size_t nn,
+		      mp_srcptr dp, mp_size_t dn,
+		      mp_srcptr dip)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
+      else
+	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+
+      qn = nn - dn - qn;
+      do
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp);
+	  qn -= dn;
+	}
+      while (qn > 0);
+    }
+  else
+    {
+      if (qn == 0)
+	{
+	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+	  if (qh)
+	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+	  TMP_FREE;
+	  return qh;
+	}
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
+      else
+	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_limb_t
+mpn_dc_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t cy;
+  mp_limb_t xp[2], dip[2];
+
+  ASSERT (dn >= 2);
+
+  cy = mpn_add_1 (xp, dp + dn - 2, 2, 1);
+  if (cy != 0)
+    dip[0] = dip[1] = 0;
+  else
+    {
+      mp_limb_t scratch[10];	/* FIXME */
+      mpn_invert (dip, xp, 2, scratch);
+    }
+
+  return mpn_preinv_dc_div_qr (qp, np, nn, dp, dn, dip);
+}
diff --git a/gmp/mpn/generic/dc_divappr_q.c b/gmp/mpn/generic/dc_divappr_q.c
new file mode 100644
index 0000000000..4474872388
--- /dev/null
+++ b/gmp/mpn/generic/dc_divappr_q.c
@@ -0,0 +1,196 @@
+/* mpn_dc_divappr_q -- divide-and-conquer division, returning only approximate
+   quotient.  The quotient retuened is either correct, or unity too large.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		    mp_srcptr dip, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip);
+  else
+    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
+    ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip);
+  else
+    ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp);
+
+  if (UNLIKELY (ql != 0))
+    {
+      mp_size_t i;
+      for (i = 0; i < lo; i++)
+	qp[i] = GMP_NUMB_MASK;
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_dc_divappr_q (mp_ptr qp,
+			 mp_ptr np, mp_size_t nn,
+			 mp_srcptr dp, mp_size_t dn,
+			 mp_srcptr dip)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy, qsave;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (dn+1);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      qn++;			/* pretend we'll need an extra limb */
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
+      else
+	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+
+      qn = nn - dn - qn + 1;
+      while (qn > dn)
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp);
+	  qn -= dn;
+	}
+
+      /* Since we pretended we'd need an extra quotient limb before, we now
+	 have made sure the code above left just dn-1=qn quotient limbs to
+	 develop.  Develop that plus a guard limb. */
+      qn--;
+      qp -= qn;
+      np -= dn;
+      qsave = qp[qn];
+      mpn_dc_divappr_q_n (qp, np - dn, dp - dn, dn, dip, tp);
+      MPN_COPY_INCR (qp, qp + 1, qn);
+      qp[qn] = qsave;
+    }
+  else
+    {
+      if (qn == 0)
+	{
+	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+	  if (qh)
+	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+	  TMP_FREE;
+	  return qh;
+	}
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
+	 /* Full precision.  Optimal?  */
+	qh = mpn_sb_divappr_q (qp, np - dn, nn, dp - dn, dn, dip);
+      else
+	{
+	  /* Put quotient in tp, use qp as temporary, since qp lacks a limb.  */
+	  qh = mpn_dc_divappr_q_n (tp, np - qn - 2, dp - (qn + 1), qn + 1, dip, qp);
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_limb_t
+mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t cy;
+  mp_limb_t xp[2], dip[2];
+
+  ASSERT (dn >= 2);
+
+  cy = mpn_add_1 (xp, dp + dn - 2, 2, 1);
+  if (cy != 0)
+    dip[0] = dip[1] = 0;
+  else
+    {
+      mp_limb_t scratch[10];	/* FIXME */
+      mpn_invert (dip, xp, 2, scratch);
+    }
+
+  return mpn_preinv_dc_divappr_q (qp, np, nn, dp, dn, dip);
+}
diff --git a/gmp/mpn/generic/dc_divrem_n.c b/gmp/mpn/generic/dc_divrem_n.c
new file mode 100644
index 0000000000..61ddde72c3
--- /dev/null
+++ b/gmp/mpn/generic/dc_divrem_n.c
@@ -0,0 +1,121 @@
+/* mpn_dc_divrem_n and auxilliary routines.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+
+Copyright 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
+    Technical report MPI-I-98-1-022, october 1998.
+    http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
+*/
+
+static mp_limb_t mpn_dc_div_3_by_2
+  __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch));
+static mp_limb_t mpn_dc_div_2_by_1
+  __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch));
+
+/* mpn_dc_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
+   by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
+   Returns most significant limb of the quotient, which is 0 or 1.
+   Requires that the most significant bit of the divisor is set.  */
+
+mp_limb_t
+mpn_dc_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
+{
+  mp_limb_t ret;
+  mp_ptr scratch;
+  TMP_DECL;
+  TMP_MARK;
+
+  scratch = TMP_ALLOC_LIMBS (n);
+  ret = mpn_dc_div_2_by_1 (qp, np, dp, n, scratch);
+
+  TMP_FREE;
+  return ret;
+}
+
+static mp_limb_t
+mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_limb_t qhl, cc;
+  mp_size_t n2 = n/2;
+
+  if (n % 2 != 0)
+    {
+      mp_ptr qp1 = qp + 1;
+      qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch);
+      qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2,
+			mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch));
+
+      cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]);
+      cc = mpn_sub_1 (np + n, np + n, 1, cc);
+      if (qhl != 0)
+	cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
+      while (cc != 0)
+	{
+	  qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1);
+	  cc -= mpn_add_n (np + 1, np + 1, dp, n);
+	}
+      qhl += mpn_add_1 (qp1, qp1, n - 1,
+			mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
+    }
+  else
+    {
+      qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch);
+      qhl += mpn_add_1 (qp + n2, qp + n2, n2,
+			mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch));
+    }
+  return qhl;
+}
+
+
+/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
+   the remainder in (np, 2n) */
+
+static mp_limb_t
+mpn_dc_div_3_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_size_t twon = n + n;
+  mp_limb_t qhl, cc;
+
+  if (n < DIV_DC_THRESHOLD)
+    qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
+  else
+    qhl = mpn_dc_div_2_by_1 (qp, np + n, dp + n, n, scratch);
+
+  mpn_mul_n (scratch, qp, dp, n);
+  cc = mpn_sub_n (np, np, scratch, twon);
+
+  if (qhl != 0)
+    cc += mpn_sub_n (np + n, np + n, dp, n);
+  while (cc != 0)
+    {
+      qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
+      cc -= mpn_add_n (np, np, dp, twon);
+    }
+  return qhl;
+}
diff --git a/gmp/mpn/generic/dcpi1_bdiv_q.c b/gmp/mpn/generic/dcpi1_bdiv_q.c
deleted file mode 100644
index a7b86c96d4..0000000000
--- a/gmp/mpn/generic/dcpi1_bdiv_q.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/* mpn_dcpi1_bdiv_q -- divide-and-conquer Hensel division with precomputed
-   inverse, returning quotient.
-
-   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-mp_size_t
-mpn_dcpi1_bdiv_q_n_itch (mp_size_t n)
-{
-  /* NOTE: Depends on mullo_n interface */
-  return n;
-}
-
-/* Computes Q = N / D mod B^n, destroys N.
-
-   N = {np,n}
-   D = {dp,n}
-*/
-
-void
-mpn_dcpi1_bdiv_q_n (mp_ptr qp,
-		    mp_ptr np, mp_srcptr dp, mp_size_t n,
-		    mp_limb_t dinv, mp_ptr tp)
-{
-  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
-    {
-      mp_size_t lo, hi;
-      mp_limb_t cy;
-
-      lo = n >> 1;			/* floor(n/2) */
-      hi = n - lo;			/* ceil(n/2) */
-
-      cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
-
-      mpn_mullo_n (tp, qp, dp + hi, lo);
-      mpn_sub_n (np + hi, np + hi, tp, lo);
-
-      if (lo < hi)
-	{
-	  cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
-	  np[n - 1] -= cy;
-	}
-      qp += lo;
-      np += lo;
-      n -= lo;
-    }
-  mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
-}
-
-/* Computes Q = N / D mod B^nn, destroys N.
-
-   N = {np,nn}
-   D = {dp,dn}
-*/
-
-void
-mpn_dcpi1_bdiv_q (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  mp_limb_t dinv)
-{
-  mp_size_t qn;
-  mp_limb_t cy;
-  mp_ptr tp;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 2);
-  ASSERT (nn - dn >= 0);
-  ASSERT (dp[0] & 1);
-
-  tp = TMP_SALLOC_LIMBS (dn);
-
-  qn = nn;
-
-  if (qn > dn)
-    {
-      /* Reduce qn mod dn in a super-efficient manner.  */
-      do
-	qn -= dn;
-      while (qn > dn);
-
-      /* Perform the typically smaller block first.  */
-      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
-	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
-      else
-	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
-
-      if (qn != dn)
-	{
-	  if (qn > dn - qn)
-	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
-	  else
-	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
-	  mpn_incr_u (tp + qn, cy);
-
-	  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
-	  cy = 0;
-	}
-
-      np += qn;
-      qp += qn;
-
-      qn = nn - qn;
-      while (qn > dn)
-	{
-	  mpn_sub_1 (np + dn, np + dn, qn - dn, cy);
-	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
-	  qp += dn;
-	  np += dn;
-	  qn -= dn;
-	}
-      mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
-    }
-  else
-    {
-      if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
-	mpn_sbpi1_bdiv_q (qp, np, qn, dp, qn, dinv);
-      else
-	mpn_dcpi1_bdiv_q_n (qp, np, dp, qn, dinv, tp);
-    }
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/dcpi1_div_q.c b/gmp/mpn/generic/dcpi1_div_q.c
deleted file mode 100644
index 32d74c31a9..0000000000
--- a/gmp/mpn/generic/dcpi1_div_q.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient
-   only.
-
-   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-mp_limb_t
-mpn_dcpi1_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
-{
-  mp_ptr tp, wp;
-  mp_limb_t qh;
-  mp_size_t qn;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 6);
-  ASSERT (nn - dn >= 3);
-  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
-
-  tp = TMP_SALLOC_LIMBS (nn + 1);
-  MPN_COPY (tp + 1, np, nn);
-  tp[0] = 0;
-
-  qn = nn - dn;
-  wp = TMP_SALLOC_LIMBS (qn + 1);
-
-  qh = mpn_dcpi1_divappr_q (wp, tp, nn + 1, dp, dn, dinv);
-
-  if (wp[0] == 0)
-    {
-      mp_limb_t cy;
-
-      if (qn > dn)
-	mpn_mul (tp, wp + 1, qn, dp, dn);
-      else
-	mpn_mul (tp, dp, dn, wp + 1, qn);
-
-      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;
-
-      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */
-	qh -= mpn_sub_1 (qp, wp + 1, qn, 1);
-      else /* Same as below */
-	MPN_COPY (qp, wp + 1, qn);
-    }
-  else
-    MPN_COPY (qp, wp + 1, qn);
-
-  TMP_FREE;
-  return qh;
-}
diff --git a/gmp/mpn/generic/dcpi1_div_qr.c b/gmp/mpn/generic/dcpi1_div_qr.c
deleted file mode 100644
index 4d80c7b769..0000000000
--- a/gmp/mpn/generic/dcpi1_div_qr.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
-   size operands.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-mp_limb_t
-mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
-		    gmp_pi1_t *dinv, mp_ptr tp)
-{
-  mp_size_t lo, hi;
-  mp_limb_t cy, qh, ql;
-
-  lo = n >> 1;			/* floor(n/2) */
-  hi = n - lo;			/* ceil(n/2) */
-
-  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
-    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
-  else
-    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
-
-  mpn_mul (tp, qp + lo, hi, dp, lo);
-
-  cy = mpn_sub_n (np + lo, np + lo, tp, n);
-  if (qh != 0)
-    cy += mpn_sub_n (np + n, np + n, dp, lo);
-
-  while (cy != 0)
-    {
-      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
-      cy -= mpn_add_n (np + lo, np + lo, dp, n);
-    }
-
-  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
-    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
-  else
-    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
-
-  mpn_mul (tp, dp, hi, qp, lo);
-
-  cy = mpn_sub_n (np, np, tp, n);
-  if (ql != 0)
-    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
-
-  while (cy != 0)
-    {
-      mpn_sub_1 (qp, qp, lo, 1);
-      cy -= mpn_add_n (np, np, dp, n);
-    }
-
-  return qh;
-}
-
-mp_limb_t
-mpn_dcpi1_div_qr (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  gmp_pi1_t *dinv)
-{
-  mp_size_t qn;
-  mp_limb_t qh, cy;
-  mp_ptr tp;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
-
-  tp = TMP_SALLOC_LIMBS (dn);
-
-  qn = nn - dn;
-  qp += qn;
-  np += nn;
-  dp += dn;
-
-  if (qn > dn)
-    {
-      /* Reduce qn mod dn without division, optimizing small operations.  */
-      do
-	qn -= dn;
-      while (qn > dn);
-
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      /* Perform the typically smaller block first.  */
-      if (qn == 1)
-	{
-	  mp_limb_t q, n2, n1, n0, d1, d0;
-
-	  /* Handle qh up front, for simplicity. */
-	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
-	  if (qh)
-	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
-
-	  /* A single iteration of schoolbook: One 3/2 division,
-	     followed by the bignum update and adjustment. */
-	  n2 = np[0];
-	  n1 = np[-1];
-	  n0 = np[-2];
-	  d1 = dp[-1];
-	  d0 = dp[-2];
-
-	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
-
-	  if (UNLIKELY (n2 == d1) && n1 == d0)
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
-	      ASSERT (cy == n2);
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
-
-	      if (dn > 2)
-		{
-		  mp_limb_t cy, cy1;
-		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
-
-		  cy1 = n0 < cy;
-		  n0 = (n0 - cy) & GMP_NUMB_MASK;
-		  cy = n1 < cy1;
-		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
-		  np[-2] = n0;
-
-		  if (UNLIKELY (cy != 0))
-		    {
-		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
-		      qh -= (q == 0);
-		      q = (q - 1) & GMP_NUMB_MASK;
-		    }
-		}
-	      else
-		np[-2] = n0;
-
-	      np[-1] = n1;
-	    }
-	  qp[0] = q;
-	}
-      else
-	{
-	  /* Do a 2qn / qn division */
-	  if (qn == 2)
-	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
-	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
-	  else
-	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
-
-	  if (qn != dn)
-	    {
-	      if (qn > dn - qn)
-		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	      else
-		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
-
-	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	      if (qh != 0)
-		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
-
-	      while (cy != 0)
-		{
-		  qh -= mpn_sub_1 (qp, qp, qn, 1);
-		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
-		}
-	    }
-	}
-
-      qn = nn - dn - qn;
-      do
-	{
-	  qp -= dn;
-	  np -= dn;
-	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
-	  qn -= dn;
-	}
-      while (qn > 0);
-    }
-  else
-    {
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
-      else
-	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
-
-      if (qn != dn)
-	{
-	  if (qn > dn - qn)
-	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	  else
-	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
-
-	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	  if (qh != 0)
-	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
-
-	  while (cy != 0)
-	    {
-	      qh -= mpn_sub_1 (qp, qp, qn, 1);
-	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
-	    }
-	}
-    }
-
-  TMP_FREE;
-  return qh;
-}
diff --git a/gmp/mpn/generic/dcpi1_divappr_q.c b/gmp/mpn/generic/dcpi1_divappr_q.c
deleted file mode 100644
index c7b03c7f49..0000000000
--- a/gmp/mpn/generic/dcpi1_divappr_q.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
-   quotient.  The quotient returned is either correct, or one too large.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-mp_limb_t
-mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
-		       gmp_pi1_t *dinv, mp_ptr tp)
-{
-  mp_size_t lo, hi;
-  mp_limb_t cy, qh, ql;
-
-  lo = n >> 1;			/* floor(n/2) */
-  hi = n - lo;			/* ceil(n/2) */
-
-  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
-    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
-  else
-    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
-
-  mpn_mul (tp, qp + lo, hi, dp, lo);
-
-  cy = mpn_sub_n (np + lo, np + lo, tp, n);
-  if (qh != 0)
-    cy += mpn_sub_n (np + n, np + n, dp, lo);
-
-  while (cy != 0)
-    {
-      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
-      cy -= mpn_add_n (np + lo, np + lo, dp, n);
-    }
-
-  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
-    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
-  else
-    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
-
-  if (UNLIKELY (ql != 0))
-    {
-      mp_size_t i;
-      for (i = 0; i < lo; i++)
-	qp[i] = GMP_NUMB_MASK;
-    }
-
-  return qh;
-}
-
-mp_limb_t
-mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
-{
-  mp_size_t qn;
-  mp_limb_t qh, cy, qsave;
-  mp_ptr tp;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 6);
-  ASSERT (nn > dn);
-  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
-
-  qn = nn - dn;
-  qp += qn;
-  np += nn;
-  dp += dn;
-
-  if (qn >= dn)
-    {
-      qn++;			/* pretend we'll need an extra limb */
-      /* Reduce qn mod dn without division, optimizing small operations.  */
-      do
-	qn -= dn;
-      while (qn > dn);
-
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      tp = TMP_SALLOC_LIMBS (dn);
-
-      /* Perform the typically smaller block first.  */
-      if (qn == 1)
-	{
-	  mp_limb_t q, n2, n1, n0, d1, d0;
-
-	  /* Handle qh up front, for simplicity. */
-	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
-	  if (qh)
-	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
-
-	  /* A single iteration of schoolbook: One 3/2 division,
-	     followed by the bignum update and adjustment. */
-	  n2 = np[0];
-	  n1 = np[-1];
-	  n0 = np[-2];
-	  d1 = dp[-1];
-	  d0 = dp[-2];
-
-	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
-
-	  if (UNLIKELY (n2 == d1) && n1 == d0)
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
-	      ASSERT (cy == n2);
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
-
-	      if (dn > 2)
-		{
-		  mp_limb_t cy, cy1;
-		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
-
-		  cy1 = n0 < cy;
-		  n0 = (n0 - cy) & GMP_NUMB_MASK;
-		  cy = n1 < cy1;
-		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
-		  np[-2] = n0;
-
-		  if (UNLIKELY (cy != 0))
-		    {
-		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
-		      qh -= (q == 0);
-		      q = (q - 1) & GMP_NUMB_MASK;
-		    }
-		}
-	      else
-		np[-2] = n0;
-
-	      np[-1] = n1;
-	    }
-	  qp[0] = q;
-	}
-      else
-	{
-	  if (qn == 2)
-	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
-	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
-	  else
-	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
-
-	  if (qn != dn)
-	    {
-	      if (qn > dn - qn)
-		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	      else
-		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
-
-	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	      if (qh != 0)
-		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
-
-	      while (cy != 0)
-		{
-		  qh -= mpn_sub_1 (qp, qp, qn, 1);
-		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
-		}
-	    }
-	}
-      qn = nn - dn - qn + 1;
-      while (qn > dn)
-	{
-	  qp -= dn;
-	  np -= dn;
-	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
-	  qn -= dn;
-	}
-
-      /* Since we pretended we'd need an extra quotient limb before, we now
-	 have made sure the code above left just dn-1=qn quotient limbs to
-	 develop.  Develop that plus a guard limb. */
-      qn--;
-      qp -= qn;
-      np -= dn;
-      qsave = qp[qn];
-      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
-      MPN_COPY_INCR (qp, qp + 1, qn);
-      qp[qn] = qsave;
-    }
-  else    /* (qn < dn) */
-    {
-      mp_ptr q2p;
-#if 0				/* not possible since we demand nn > dn */
-      if (qn == 0)
-	{
-	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
-	  if (qh)
-	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
-	  TMP_FREE;
-	  return qh;
-	}
-#endif
-
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      q2p = TMP_SALLOC_LIMBS (qn + 1);
-      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
-	 callers not to be silly?  */
-      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
-	{
-	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
-				    dp - (qn + 1), qn + 1, dinv->inv32);
-	}
-      else
-	{
-	  /* It is tempting to use qp for recursive scratch and put quotient in
-	     tp, but the recursive scratch needs one limb too many.  */
-	  tp = TMP_SALLOC_LIMBS (qn + 1);
-	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
-	}
-      MPN_COPY (qp, q2p + 1, qn);
-    }
-
-  TMP_FREE;
-  return qh;
-}
diff --git a/gmp/mpn/generic/div_q.c b/gmp/mpn/generic/div_q.c
deleted file mode 100644
index aabcef0825..0000000000
--- a/gmp/mpn/generic/div_q.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/* mpn_div_q -- division for arbitrary size operands.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* Compute Q = N/D with truncation.
-     N = {np,nn}
-     D = {dp,dn}
-     Q = {qp,nn-dn+1}
-     T = {scratch,nn+1} is scratch space
-   N and D are both untouched by the computation.
-   N and T may overlap; pass the same space if N is irrelevant after the call,
-   but note that tp needs an extra limb.
-
-   Operand requirements:
-     N >= D > 0
-     dp[dn-1] != 0
-     No overlap between the N, D, and Q areas.
-
-   This division function does not clobber its input operands, since it is
-   intended to support average-O(qn) division, and for that to be effective, it
-   cannot put requirements on callers to copy a O(nn) operand.
-
-   If a caller does not care about the value of {np,nn+1} after calling this
-   function, it should pass np also for the scratch argument.  This function
-   will then save some time and space by avoiding allocation and copying.
-   (FIXME: Is this a good design?  We only really save any copying for
-   already-normalised divisors, which should be rare.  It also prevents us from
-   reasonably asking for all scratch space we need.)
-
-   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
-   the most significant quotient limb?  Look at the 4 main code blocks below
-   (consisting of an outer if-else where each arm contains an if-else). It is
-   tricky for the first code block, since the mpn_*_div_q calls will typically
-   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
-   we generate the most significant quotient limb here, before calling
-   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
-   critical division case (the SB sub-case in particular) copying is not a good
-   idea.
-
-   It might make sense to split the if-else parts of the (qn + FUDGE
-   >= dn) blocks into separate functions, since we could promise quite
-   different things to callers in these two cases.  The 'then' case
-   benefits from np=scratch, and it could perhaps even tolerate qp=np,
-   saving some headache for many callers.
-
-   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
-   operands, we do not reuse the huge scratch for adjustments.  This can be a
-   serious waste of memory for the largest operands.
-*/
-
-/* FUDGE determines when to try getting an approximate quotient from the upper
-   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
-   for the code to be correct.  */
-#define FUDGE 5			/* FIXME: tune this */
-
-#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
-#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
-#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
-#ifndef MUPI_DIVAPPR_Q_THRESHOLD
-#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
-#endif
-
-void
-mpn_div_q (mp_ptr qp,
-	   mp_srcptr np, mp_size_t nn,
-	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
-{
-  mp_ptr new_dp, new_np, tp, rp;
-  mp_limb_t cy, dh, qh;
-  mp_size_t new_nn, qn;
-  gmp_pi1_t dinv;
-  int cnt;
-  TMP_DECL;
-  TMP_MARK;
-
-  ASSERT (nn >= dn);
-  ASSERT (dn > 0);
-  ASSERT (dp[dn - 1] != 0);
-  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
-  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
-
-  ASSERT_ALWAYS (FUDGE >= 2);
-
-  if (dn == 1)
-    {
-      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
-      return;
-    }
-
-  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
-
-  if (qn + FUDGE >= dn)
-    {
-      /* |________________________|
-                          |_______|  */
-      new_np = scratch;
-
-      dh = dp[dn - 1];
-      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
-	{
-	  count_leading_zeros (cnt, dh);
-
-	  cy = mpn_lshift (new_np, np, nn, cnt);
-	  new_np[nn] = cy;
-	  new_nn = nn + (cy != 0);
-
-	  new_dp = TMP_ALLOC_LIMBS (dn);
-	  mpn_lshift (new_dp, dp, dn, cnt);
-
-	  if (dn == 2)
-	    {
-	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
-	    }
-	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
-		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
-	    {
-	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
-	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
-		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
-		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
-		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
-	    {
-	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
-	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
-	    }
-	  if (cy == 0)
-	    qp[qn - 1] = qh;
-	  else if (UNLIKELY (qh != 0))
-	    {
-	      /* This happens only when the quotient is close to B^n and
-		 mpn_*_divappr_q returned B^n.  */
-	      mp_size_t i, n;
-	      n = new_nn - dn;
-	      for (i = 0; i < n; i++)
-		qp[i] = GMP_NUMB_MAX;
-	      qh = 0;		/* currently ignored */
-	    }
-	}
-      else  /* divisor is already normalised */
-	{
-	  if (new_np != np)
-	    MPN_COPY (new_np, np, nn);
-
-	  if (dn == 2)
-	    {
-	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
-	    }
-	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
-		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
-	    {
-	      invert_pi1 (dinv, dh, dp[dn - 2]);
-	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
-		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
-		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
-		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
-	    {
-	      invert_pi1 (dinv, dh, dp[dn - 2]);
-	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
-	    }
-	  qp[nn - dn] = qh;
-	}
-    }
-  else
-    {
-      /* |________________________|
-                |_________________|  */
-      tp = TMP_ALLOC_LIMBS (qn + 1);
-
-      new_np = scratch;
-      new_nn = 2 * qn + 1;
-      if (new_np == np)
-	/* We need {np,nn} to remain untouched until the final adjustment, so
-	   we need to allocate separate space for new_np.  */
-	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
-
-
-      dh = dp[dn - 1];
-      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
-	{
-	  count_leading_zeros (cnt, dh);
-
-	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
-	  new_np[new_nn] = cy;
-
-	  new_nn += (cy != 0);
-
-	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
-	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
-	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
-
-	  if (qn + 1 == 2)
-	    {
-	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
-	    }
-	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
-	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
-	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
-	    }
-	  if (cy == 0)
-	    tp[qn] = qh;
-	  else if (UNLIKELY (qh != 0))
-	    {
-	      /* This happens only when the quotient is close to B^n and
-		 mpn_*_divappr_q returned B^n.  */
-	      mp_size_t i, n;
-	      n = new_nn - (qn + 1);
-	      for (i = 0; i < n; i++)
-		tp[i] = GMP_NUMB_MAX;
-	      qh = 0;		/* currently ignored */
-	    }
-	}
-      else  /* divisor is already normalised */
-	{
-	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
-
-	  new_dp = (mp_ptr) dp + dn - (qn + 1);
-
-	  if (qn == 2 - 1)
-	    {
-	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
-	    }
-	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
-	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
-	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
-	    }
-	  tp[qn] = qh;
-	}
-
-      MPN_COPY (qp, tp + 1, qn);
-      if (tp[0] <= 4)
-        {
-	  mp_size_t rn;
-
-          rp = TMP_ALLOC_LIMBS (dn + qn);
-          mpn_mul (rp, dp, dn, tp + 1, qn);
-	  rn = dn + qn;
-	  rn -= rp[rn - 1] == 0;
-
-          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
-            mpn_decr_u (qp, 1);
-        }
-    }
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/div_qr_1.c b/gmp/mpn/generic/div_qr_1.c
deleted file mode 100644
index 09401ac535..0000000000
--- a/gmp/mpn/generic/div_qr_1.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/* mpn_div_qr_1 -- mpn by limb division.
-
-   Contributed to the GNU project by Niels Möller and Torbjörn Granlund
-
-Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003, 2013 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef DIV_QR_1_NORM_THRESHOLD
-#define DIV_QR_1_NORM_THRESHOLD 3
-#endif
-#ifndef DIV_QR_1_UNNORM_THRESHOLD
-#define DIV_QR_1_UNNORM_THRESHOLD 3
-#endif
-
-#if GMP_NAIL_BITS > 0
-#error Nail bits not supported
-#endif
-
-/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
- * n-1}, and the high quote limb at *qh. Returns remainder. */
-mp_limb_t
-mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
-	      mp_limb_t d)
-{
-  unsigned cnt;
-  mp_limb_t uh;
-
-  ASSERT (n > 0);
-  ASSERT (d > 0);
-
-  if (d & GMP_NUMB_HIGHBIT)
-    {
-      /* Normalized case */
-      mp_limb_t dinv, q;
-
-      uh = up[--n];
-
-      q = (uh >= d);
-      *qh = q;
-      uh -= (-q) & d;
-
-      if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
-	{
-	  cnt = 0;
-	plain:
-	  while (n > 0)
-	    {
-	      mp_limb_t ul = up[--n];
-	      udiv_qrnnd (qp[n], uh, uh, ul, d);
-	    }
-	  return uh >> cnt;
-	}
-      invert_limb (dinv, d);
-      return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
-    }
-  else
-    {
-      /* Unnormalized case */
-      mp_limb_t dinv, ul;
-
-      if (! UDIV_NEEDS_NORMALIZATION
-	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
-	{
-	  uh = up[--n];
-	  udiv_qrnnd (*qh, uh, CNST_LIMB(0), uh, d);
-	  cnt = 0;
-	  goto plain;
-	}
-
-      count_leading_zeros (cnt, d);
-      d <<= cnt;
-
-#if HAVE_NATIVE_div_qr_1u_pi1
-      /* FIXME: Call loop doing on-the-fly normalization */
-#endif
-
-      /* Shift up front, use qp area for shifted copy. A bit messy,
-	 since we have only n-1 limbs available, and shift the high
-	 limb manually. */
-      uh = up[--n];
-      ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
-      uh >>= (GMP_LIMB_BITS - cnt);
-
-      if (UDIV_NEEDS_NORMALIZATION
-	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
-	{
-	  udiv_qrnnd (*qh, uh, uh, ul, d);
-	  up = qp;
-	  goto plain;
-	}
-      invert_limb (dinv, d);
-
-      udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
-      return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
-    }
-}
diff --git a/gmp/mpn/generic/div_qr_1n_pi1.c b/gmp/mpn/generic/div_qr_1n_pi1.c
deleted file mode 100644
index 229ee091a4..0000000000
--- a/gmp/mpn/generic/div_qr_1n_pi1.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/* mpn_div_qr_1n_pi1
-
-   Contributed to the GNU project by Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if GMP_NAIL_BITS > 0
-#error Nail bits not supported
-#endif
-
-#ifndef DIV_QR_1N_METHOD
-#define DIV_QR_1N_METHOD 2
-#endif
-
-/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
-
-#if defined (__GNUC__)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %k2\n\t"					\
-	     "adc	%4, %k1\n\t"					\
-	     "sbb	%k0, %k0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %q2\n\t"					\
-	     "adc	%4, %q1\n\t"					\
-	     "sbb	%q0, %q0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxcc	%r3, %4, %1\n\t"				\
-	     "subx	%%g0, %%g0, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addccc	%r7, %8, %%g0\n\t"				\
-	     "addccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
-	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
-	 __CLOBBER_CC)
-#if __VIS__ >= 0x300
-#undef add_mssaaaa
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
-	     "adde	%1, %3, %4\n\t"					\
-	     "subfe	%0, %0, %0\n\t"					\
-	     "nor	%0, %0, %0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#if defined (__s390x__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "algr	%2, %6\n\t"					\
-	     "alcgr	%1, %4\n\t"					\
-	     "lghi	%0, 0\n\t"					\
-	     "alcgr	%0, %0\n\t"					\
-	     "lcgr	%0, %0"						\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
-	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
-#endif
-
-#if defined (__arm__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "adds	%2, %5, %6\n\t"					\
-	     "adcs	%1, %3, %4\n\t"					\
-	     "movcc	%0, #0\n\t"					\
-	     "movcs	%0, #-1"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
-#endif
-#endif /* defined (__GNUC__) */
-
-#ifndef add_mssaaaa
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (m) = - (__c1 + (__s1 < __c0));					\
-  } while (0)
-#endif
-
-#if DIV_QR_1N_METHOD == 1
-
-/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
-   Requires that uh < d. */
-mp_limb_t
-mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
-		   mp_limb_t d, mp_limb_t dinv)
-{
-  ASSERT (n > 0);
-  ASSERT (uh < d);
-  ASSERT (d & GMP_NUMB_HIGHBIT);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
-
-  do
-    {
-      mp_limb_t q, ul;
-
-      ul = up[--n];
-      udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
-      qp[n] = q;
-    }
-  while (n > 0);
-
-  return uh;
-}
-
-#elif DIV_QR_1N_METHOD == 2
-
-mp_limb_t
-mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
-		   mp_limb_t d, mp_limb_t dinv)
-{
-  mp_limb_t B2;
-  mp_limb_t u0, u2;
-  mp_limb_t q0, q1;
-  mp_limb_t p0, p1;
-  mp_limb_t t;
-  mp_size_t j;
-
-  ASSERT (d & GMP_LIMB_HIGHBIT);
-  ASSERT (n > 0);
-  ASSERT (u1 < d);
-
-  if (n == 1)
-    {
-      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
-      return u1;
-    }
-
-  /* FIXME: Could be precomputed */
-  B2 = -d*dinv;
-
-  umul_ppmm (q1, q0, dinv, u1);
-  umul_ppmm (p1, p0, B2, u1);
-  q1 += u1;
-  ASSERT (q1 >= u1);
-  u0 = up[n-1];	/* Early read, to allow qp == up. */
-  qp[n-1] = q1;
-
-  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
-
-  /* FIXME: Keep q1 in a variable between iterations, to reduce number
-     of memory accesses. */
-  for (j = n-2; j-- > 0; )
-    {
-      mp_limb_t q2, cy;
-
-      /* Additions for the q update:
-       *	+-------+
-       *        |u1 * v |
-       *        +---+---+
-       *        | u1|
-       *    +---+---+
-       *    | 1 | v |  (conditional on u2)
-       *    +---+---+
-       *        | 1 |  (conditional on u0 + u2 B2 carry)
-       *        +---+
-       * +      | q0|
-       *   -+---+---+---+
-       *    | q2| q1| q0|
-       *    +---+---+---+
-      */
-      umul_ppmm (p1, t, u1, dinv);
-      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
-      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
-      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
-      q0 = t;
-
-      umul_ppmm (p1, p0, u1, B2);
-      ADDC_LIMB (cy, u0, u0, u2 & B2);
-      u0 -= (-cy) & d;
-
-      /* Final q update */
-      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
-      qp[j+1] = q1;
-      MPN_INCR_U (qp+j+2, n-j-2, q2);
-
-      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
-    }
-
-  q1 = (u2 > 0);
-  u1 -= (-q1) & d;
-
-  t = (u1 >= d);
-  q1 += t;
-  u1 -= (-t) & d;
-
-  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
-  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
-
-  MPN_INCR_U (qp+1, n-1, q1);
-
-  qp[0] = q0;
-  return u0;
-}
-
-#else
-#error Unknown DIV_QR_1N_METHOD
-#endif
diff --git a/gmp/mpn/generic/div_qr_1n_pi2.c b/gmp/mpn/generic/div_qr_1n_pi2.c
deleted file mode 100644
index 7ea3410cb6..0000000000
--- a/gmp/mpn/generic/div_qr_1n_pi2.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* mpn_div_qr_1u_pi2.
-
-   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
-   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* ISSUES:
-
-   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
-
-   * Are there any problems with generating n quotient limbs in the q area?  It
-     surely simplifies things.
-
-   * Not yet adequately tested.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Define some longlong.h-style macros, but for wider operations.
-   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
-     carry-out into an additional sum operand.
-*/
-#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((USItype)(s2)),					\
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((UDItype)(s2)),					\
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"	\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#endif /* __GNUC__ */
-
-#ifndef add_sssaaaa
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (s2) += __c1 + (__s1 < __c0);					\
-  } while (0)
-#endif
-
-struct precomp_div_1_pi2
-{
-  mp_limb_t dip[2];
-  mp_limb_t d;
-  int norm_cnt;
-};
-
-mp_limb_t
-mpn_div_qr_1n_pi2 (mp_ptr qp,
-		   mp_srcptr up, mp_size_t un,
-		   struct precomp_div_1_pi2 *pd)
-{
-  mp_limb_t most_significant_q_limb;
-  mp_size_t i;
-  mp_limb_t r, u2, u1, u0;
-  mp_limb_t d0, di1, di0;
-  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
-  mp_limb_t cnd;
-
-  ASSERT (un >= 2);
-  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
-  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
-  ASSERT_MPN (up, un);
-
-#define q3 q3a
-#define q2 q2b
-#define q1 q1b
-
-  up += un - 3;
-  r = up[2];
-  d0 = pd->d;
-
-  most_significant_q_limb = (r >= d0);
-  r -= d0 & -most_significant_q_limb;
-
-  qp += un - 3;
-  qp[2] = most_significant_q_limb;
-
-  di1 = pd->dip[1];
-  di0 = pd->dip[0];
-
-  for (i = un - 3; i >= 0; i -= 2)
-    {
-      u2 = r;
-      u1 = up[1];
-      u0 = up[0];
-
-      /* Dividend in {r,u1,u0} */
-
-      umul_ppmm (q1d,q0d, u1, di0);
-      umul_ppmm (q2b,q1b, u1, di1);
-      q2b++;				/* cannot spill */
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
-
-      umul_ppmm (q2c,q1c, u2,  di0);
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
-      umul_ppmm (q3a,q2a, u2, di1);
-
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
-
-      q3 += r;
-
-      r = u0 - q2 * d0;
-
-      cnd = (r >= q1);
-      r += d0 & -cnd;
-      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
-
-      if (UNLIKELY (r >= d0))
-	{
-	  r -= d0;
-	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
-	}
-
-      qp[0] = q2;
-      qp[1] = q3;
-
-      up -= 2;
-      qp -= 2;
-    }
-
-  if ((un & 1) == 0)
-    {
-      u2 = r;
-      u1 = up[1];
-
-      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
-      qp[1] = q3;
-    }
-
-  return r;
-
-#undef q3
-#undef q2
-#undef q1
-}
diff --git a/gmp/mpn/generic/div_qr_1u_pi2.c b/gmp/mpn/generic/div_qr_1u_pi2.c
deleted file mode 100644
index 83d66ef29e..0000000000
--- a/gmp/mpn/generic/div_qr_1u_pi2.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/* mpn_div_qr_1u_pi2.
-
-   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
-   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* ISSUES:
-
-   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
-
-   * Are there any problems with generating n quotient limbs in the q area?  It
-     surely simplifies things.
-
-   * Not yet adequately tested.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Define some longlong.h-style macros, but for wider operations.
-   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
-     carry-out into an additional sum operand.
-*/
-#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((USItype)(s2)),					\
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((UDItype)(s2)),					\
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"	\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#endif /* __GNUC__ */
-
-#ifndef add_sssaaaa
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (s2) += __c1 + (__s1 < __c0);					\
-  } while (0)
-#endif
-
-struct precomp_div_1_pi2
-{
-  mp_limb_t dip[2];
-  mp_limb_t d;
-  int norm_cnt;
-};
-
-mp_limb_t
-mpn_div_qr_1u_pi2 (mp_ptr qp,
-		   mp_srcptr up, mp_size_t un,
-		   struct precomp_div_1_pi2 *pd)
-{
-  mp_size_t i;
-  mp_limb_t r, u2, u1, u0;
-  mp_limb_t d0, di1, di0;
-  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
-  mp_limb_t cnd;
-  int cnt;
-
-  ASSERT (un >= 2);
-  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) == 0);
-  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
-  ASSERT_MPN (up, un);
-
-#define q3 q3a
-#define q2 q2b
-#define q1 q1b
-
-  up += un - 3;
-  cnt = pd->norm_cnt;
-  r = up[2] >> (GMP_NUMB_BITS - cnt);
-  d0 = pd->d << cnt;
-
-  qp += un - 2;
-
-  di1 = pd->dip[1];
-  di0 = pd->dip[0];
-
-  for (i = un - 3; i >= 0; i -= 2)
-    {
-      u2 = r;
-      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
-      u0 = (up[1] << cnt) | (up[0] >> (GMP_NUMB_BITS - cnt));
-
-      /* Dividend in {r,u1,u0} */
-
-      umul_ppmm (q1d,q0d, u1, di0);
-      umul_ppmm (q2b,q1b, u1, di1);
-      q2b++;				/* cannot spill */
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
-
-      umul_ppmm (q2c,q1c, u2,  di0);
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
-      umul_ppmm (q3a,q2a, u2, di1);
-
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
-
-      q3 += r;
-
-      r = u0 - q2 * d0;
-
-      cnd = (r >= q1);
-      r += d0 & -cnd;
-      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
-
-      if (UNLIKELY (r >= d0))
-	{
-	  r -= d0;
-	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
-	}
-
-      qp[0] = q2;
-      qp[1] = q3;
-
-      up -= 2;
-      qp -= 2;
-    }
-
-  if ((un & 1) != 0)
-    {
-      u2 = r;
-      u1 = (up[2] << cnt);
-
-      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
-      qp[1] = q3;
-    }
-  else
-    {
-      u2 = r;
-      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
-      u0 = (up[1] << cnt);
-
-      /* Dividend in {r,u1,u0} */
-
-      umul_ppmm (q1d,q0d, u1, di0);
-      umul_ppmm (q2b,q1b, u1, di1);
-      q2b++;				/* cannot spill */
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
-
-      umul_ppmm (q2c,q1c, u2,  di0);
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
-      umul_ppmm (q3a,q2a, u2, di1);
-
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
-
-      q3 += r;
-
-      r = u0 - q2 * d0;
-
-      cnd = (r >= q1);
-      r += d0 & -cnd;
-      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
-
-      if (UNLIKELY (r >= d0))
-	{
-	  r -= d0;
-	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
-	}
-
-      qp[0] = q2;
-      qp[1] = q3;
-    }
-
-  return r >> cnt;
-
-#undef q3
-#undef q2
-#undef q1
-}
diff --git a/gmp/mpn/generic/div_qr_2.c b/gmp/mpn/generic/div_qr_2.c
deleted file mode 100644
index cb07e0e3b4..0000000000
--- a/gmp/mpn/generic/div_qr_2.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
-   quotient.  The divisor is two limbs.
-
-   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef DIV_QR_2_PI2_THRESHOLD
-/* Disabled unless explicitly tuned. */
-#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
-#endif
-
-#ifndef SANITY_CHECK
-#define SANITY_CHECK 0
-#endif
-
-/* Define some longlong.h-style macros, but for wider operations.
-   * add_sssaaaa is like longlong.h's add_ssaaaa but the propagating
-     carry-out into an additional sum operand.
-   * add_csaac accepts two addends and a carry in, and generates a sum
-     and a carry out.  A little like a "full adder".
-*/
-#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((USItype)(s2)),					\
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#define add_csaac(co, s, a, b, ci)					\
-  __asm__ ("bt\t$0, %2\n\tadc\t%5, %k1\n\tadc\t%k0, %k0"		\
-	   : "=r" (co), "=r" (s)					\
-	   : "rm"  ((USItype)(ci)), "0" (CNST_LIMB(0)),			\
-	     "%1" ((USItype)(a)), "g" ((USItype)(b)))
-#endif
-
-#if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((UDItype)(s2)),					\
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#define add_csaac(co, s, a, b, ci)					\
-  __asm__ ("bt\t$0, %2\n\tadc\t%5, %q1\n\tadc\t%q0, %q0"		\
-	   : "=r" (co), "=r" (s)					\
-	   : "rm"  ((UDItype)(ci)), "0" (CNST_LIMB(0)),			\
-	     "%1" ((UDItype)(a)), "g" ((UDItype)(b)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"	\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#endif /* __GNUC__ */
-
-#ifndef add_sssaaaa
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (s2) += __c1 + (__s1 < __c0);					\
-  } while (0)
-#endif
-
-#ifndef add_csaac
-#define add_csaac(co, s, a, b, ci)					\
-  do {									\
-    UWtype __s, __c;							\
-    __s = (a) + (b);							\
-    __c = __s < (a);							\
-    __s = __s + (ci);							\
-    (s) = __s;								\
-    (co) = __c + (__s < (ci));						\
-  } while (0)
-#endif
-
-/* Typically used with r1, r0 same as n3, n2. Other types of overlap
-   between inputs and outputs are not supported. */
-#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0)		\
-  do {									\
-    mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0;		\
-    mp_limb_t _t1, _t0;							\
-    mp_limb_t _c, _mask;						\
-									\
-    umul_ppmm (_q3,_q2a, n3, di1);					\
-    umul_ppmm (_q2,_q1, n2, di1);					\
-    umul_ppmm (_q2c,_q1c, n3, di0);					\
-    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1c);			\
-    umul_ppmm (_q1d,_q0, n2, di0);					\
-    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2a,_q1d);			\
-									\
-    add_ssaaaa (r1, r0, n3, n2, CNST_LIMB(0), CNST_LIMB(1));		\
-									\
-    /* [q3,q2,q1,q0] += [n3,n3,n1,n0] */				\
-    add_csaac (_c, _q0, _q0, n0, CNST_LIMB(0));				\
-    add_csaac (_c, _q1, _q1, n1, _c);					\
-    add_csaac (_c, _q2, _q2, r0, _c);					\
-    _q3 = _q3 + r1 + _c;						\
-									\
-    umul_ppmm (_t1,_t0, _q2, d0);					\
-    _t1 += _q2 * d1 + _q3 * d0;						\
-									\
-    sub_ddmmss (r1, r0, n1, n0, _t1, _t0);				\
-									\
-    _mask = -(mp_limb_t) (r1 >= _q1 & (r1 > _q1 | r0 >= _q0));  /* (r1,r0) >= (q1,q0) */  \
-    add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask);		\
-    sub_ddmmss (_q3, _q2, _q3, _q2, CNST_LIMB(0), -_mask);		\
-									\
-    if (UNLIKELY (r1 >= d1))						\
-      {									\
-	if (r1 > d1 || r0 >= d0)					\
-	  {								\
-	    sub_ddmmss (r1, r0, r1, r0, d1, d0);			\
-	    add_ssaaaa (_q3, _q2, _q3, _q2, CNST_LIMB(0), CNST_LIMB(1));\
-	  }								\
-      }									\
-    (q1) = _q3;								\
-    (q0) = _q2;								\
-  } while (0)
-
-static void
-invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
-{
-  mp_limb_t v1, v0, p1, t1, t0, p0, mask;
-  invert_limb (v1, d1);
-  p1 = d1 * v1;
-  /* <1, v1> * d1 = <B-1, p1> */
-  p1 += d0;
-  if (p1 < d0)
-    {
-      v1--;
-      mask = -(mp_limb_t) (p1 >= d1);
-      p1 -= d1;
-      v1 += mask;
-      p1 -= mask & d1;
-    }
-  /* <1, v1> * d1 + d0 = <B-1, p1> */
-  umul_ppmm (t1, p0, d0, v1);
-  p1 += t1;
-  if (p1 < t1)
-    {
-      if (UNLIKELY (p1 >= d1))
-	{
-	  if (p1 > d1 || p0 >= d0)
-	    {
-	      sub_ddmmss (p1, p0, p1, p0, d1, d0);
-	      v1--;
-	    }
-	}
-      sub_ddmmss (p1, p0, p1, p0, d1, d0);
-      v1--;
-    }
-  /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
-   * with <p1, p0> + <d1, d0> >= B^2.
-   *
-   * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
-   * partial remainder after <1, v1> is
-   *
-   * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
-   *                              = <~p1, ~p0, B-1>
-   */
-  udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
-  di[0] = v0;
-  di[1] = v1;
-
-#if SANITY_CHECK
-  {
-    mp_limb_t tp[4];
-    mp_limb_t dp[2];
-    dp[0] = d0;
-    dp[1] = d1;
-    mpn_mul_n (tp, dp, di, 2);
-    ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
-    ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
-    ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
-    ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
-  }
-#endif
-}
-
-static mp_limb_t
-mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t r1, r0;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-
-  r1 = np[nn-1];
-  r0 = np[nn-2];
-
-  qh = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
-    {
-#if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
-#else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
-#endif
-      qh = 1;
-    }
-
-  for (i = nn - 2; i >= 2; i -= 2)
-    {
-      mp_limb_t n1, n0, q1, q0;
-      n1 = np[i-1];
-      n0 = np[i-2];
-      udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
-      qp[i-1] = q1;
-      qp[i-2] = q0;
-    }
-
-  if (i > 0)
-    {
-      mp_limb_t q;
-      udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
-      qp[0] = q;
-    }
-  rp[1] = r1;
-  rp[0] = r0;
-
-  return qh;
-}
-
-
-/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
-   significant quotient limbs at qp and the 2 long remainder at np.
-   Return the most significant limb of the quotient.
-
-   Preconditions:
-   1. qp must either not overlap with the input operands at all, or
-      qp >= np + 2 must hold true.  (This means that it's possible to put
-      the quotient in the high part of {np,nn}, right above the remainder.
-   2. nn >= 2.  */
-
-mp_limb_t
-mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-	      mp_srcptr dp)
-{
-  mp_limb_t d1;
-  mp_limb_t d0;
-  gmp_pi1_t dinv;
-
-  ASSERT (nn >= 2);
-  ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
-  ASSERT_MPN (np, nn);
-  ASSERT_MPN (dp, 2);
-
-  d1 = dp[1]; d0 = dp[0];
-
-  ASSERT (d1 > 0);
-
-  if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
-    {
-      if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
-	{
-	  gmp_pi1_t dinv;
-	  invert_pi1 (dinv, d1, d0);
-	  return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
-	}
-      else
-	{
-	  mp_limb_t di[2];
-	  invert_4by2 (di, d1, d0);
-	  return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
-	}
-    }
-  else
-    {
-      int shift;
-      count_leading_zeros (shift, d1);
-      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
-      d0 <<= shift;
-      invert_pi1 (dinv, d1, d0);
-      return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
-    }
-}
diff --git a/gmp/mpn/generic/div_qr_2n_pi1.c b/gmp/mpn/generic/div_qr_2n_pi1.c
deleted file mode 100644
index da500e2170..0000000000
--- a/gmp/mpn/generic/div_qr_2n_pi1.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* mpn_div_qr_2n_pi1
-
-   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* 3/2 loop, for normalized divisor */
-mp_limb_t
-mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t r1, r0;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-
-  np += nn - 2;
-  r1 = np[1];
-  r0 = np[0];
-
-  qh = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
-    {
-#if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
-#else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
-#endif
-      qh = 1;
-    }
-
-  for (i = nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t n0, q;
-      n0 = np[-1];
-      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
-      np--;
-      qp[i] = q;
-    }
-
-  rp[1] = r1;
-  rp[0] = r0;
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/div_qr_2u_pi1.c b/gmp/mpn/generic/div_qr_2u_pi1.c
deleted file mode 100644
index 0b9ddf5753..0000000000
--- a/gmp/mpn/generic/div_qr_2u_pi1.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/* mpn_div_qr_2u_pi1
-
-   Contributed to the GNU project by Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
-   d0, while {np,nn} is shifted on the fly. */
-mp_limb_t
-mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
-{
-  mp_limb_t qh;
-  mp_limb_t r2, r1, r0;
-  mp_size_t i;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-  ASSERT (shift > 0);
-
-  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
-  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
-  r0 = np[nn-2] << shift;
-
-  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
-
-  for (i = nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t q;
-      r0 = np[i];
-      r1 |= r0 >> (GMP_LIMB_BITS - shift);
-      r0 <<= shift;
-      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
-      qp[i] = q;
-    }
-
-  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
-  rp[1] = r2 >> shift;
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/dive_1.c b/gmp/mpn/generic/dive_1.c
index 1c0a4e894d..27df57b80e 100644
--- a/gmp/mpn/generic/dive_1.c
+++ b/gmp/mpn/generic/dive_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/.  */
 /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
    q will only be correct if d divides a exactly.
 
-   A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
+   A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
    give zero on all CPUs (for instance it doesn't on the x86s).  This
    separate loop might run faster too, helping odd divisors.
 
@@ -61,7 +50,7 @@ see https://www.gnu.org/licenses/.  */
    faster on some CPUs and would mean just the shift==0 style loop would be
    needed.
 
-   If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
+   If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
    shift==0 loop is unnecessary, and could be eliminated if there's no great
    speed difference.
 
@@ -87,6 +76,14 @@ mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
   ASSERT_MPN (src, size);
   ASSERT_LIMB (divisor);
 
+  s = src[0];
+
+  if (size == 1)
+    {
+      dst[0] = s / divisor;
+      return;
+    }
+
   if ((divisor & 1) == 0)
     {
       count_trailing_zeros (shift, divisor);
@@ -101,39 +98,40 @@ mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
   if (shift != 0)
     {
       c = 0;
+      i = 0;
+      size--;
 
-      s = src[0];
-
-      for (i = 1; i < size; i++)
+      do
 	{
-	  s_next = src[i];
+	  s_next = src[i+1];
 	  ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
 	  s = s_next;
 
 	  SUBC_LIMB (c, l, ls, c);
 
 	  l = (l * inverse) & GMP_NUMB_MASK;
-	  dst[i - 1] = l;
+	  dst[i] = l;
 
 	  umul_ppmm (h, dummy, l, divisor);
 	  c += h;
+
+	  i++;
 	}
       while (i < size);
 
       ls = s >> shift;
       l = ls - c;
       l = (l * inverse) & GMP_NUMB_MASK;
-      dst[size - 1] = l;
+      dst[i] = l;
     }
   else
     {
-      s = src[0];
-
       l = (s * inverse) & GMP_NUMB_MASK;
       dst[0] = l;
+      i = 1;
       c = 0;
 
-      for (i = 1; i < size; i++)
+      do
 	{
 	  umul_ppmm (h, dummy, l, divisor);
 	  c += h;
@@ -143,6 +141,8 @@ mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
 
 	  l = (l * inverse) & GMP_NUMB_MASK;
 	  dst[i] = l;
+	  i++;
 	}
+      while (i < size);
     }
 }
diff --git a/gmp/mpn/generic/diveby3.c b/gmp/mpn/generic/diveby3.c
index 2ffd9fe777..6293f65a89 100644
--- a/gmp/mpn/generic/diveby3.c
+++ b/gmp/mpn/generic/diveby3.c
@@ -1,32 +1,21 @@
 /* mpn_divexact_by3c -- mpn exact division by 3.
 
-Copyright 2000-2003, 2008 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/divexact.c b/gmp/mpn/generic/divexact.c
index 47a47e3d80..a0e439cbee 100644
--- a/gmp/mpn/generic/divexact.c
+++ b/gmp/mpn/generic/divexact.c
@@ -4,104 +4,28 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+Copyright 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if 1
-void
-mpn_divexact (mp_ptr qp,
-	      mp_srcptr np, mp_size_t nn,
-	      mp_srcptr dp, mp_size_t dn)
-{
-  unsigned shift;
-  mp_size_t qn;
-  mp_ptr tp;
-  TMP_DECL;
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-  ASSERT (dn > 0);
-  ASSERT (nn >= dn);
-  ASSERT (dp[dn-1] > 0);
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  while (dp[0] == 0)
-    {
-      ASSERT (np[0] == 0);
-      dp++;
-      np++;
-      dn--;
-      nn--;
-    }
-
-  if (dn == 1)
-    {
-      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
-      return;
-    }
-
-  TMP_MARK;
-
-  qn = nn + 1 - dn;
-  count_trailing_zeros (shift, dp[0]);
-
-  if (shift > 0)
-    {
-      mp_ptr wp;
-      mp_size_t ss;
-      ss = (dn > qn) ? qn + 1 : dn;
-
-      tp = TMP_ALLOC_LIMBS (ss);
-      mpn_rshift (tp, dp, ss, shift);
-      dp = tp;
-
-      /* Since we have excluded dn == 1, we have nn > qn, and we need
-	 to shift one limb beyond qn. */
-      wp = TMP_ALLOC_LIMBS (qn + 1);
-      mpn_rshift (wp, np, qn + 1, shift);
-      np = wp;
-    }
-
-  if (dn > qn)
-    dn = qn;
-
-  tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
-  mpn_bdiv_q (qp, np, qn, dp, dn, tp);
-  TMP_FREE;
-}
-
-#else
 
 /* We use the Jebelean's bidirectional exact division algorithm.  This is
    somewhat naively implemented, with equal quotient parts done by 2-adic
@@ -120,8 +44,17 @@ mpn_divexact (mp_ptr qp,
    * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
      that the latter is faster.  We should at least reverse this, but perhaps
      we should make the lsb part considerably larger.  (How do we tune this?)
+
+   Perhaps we could somehow use 2-adic division for both parts, not as now
+   truncating division for the upper part and 2-adic for the lower part.
 */
 
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
 mp_size_t
 mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
 {
@@ -143,8 +76,7 @@ mpn_divexact (mp_ptr qp,
   int cnt;
   mp_ptr xdp;
   mp_limb_t di;
-  mp_limb_t cy;
-  gmp_pi1_t dinv;
+  mp_limb_t dip[2], xp[2], cy;
   TMP_DECL;
 
   TMP_MARK;
@@ -158,7 +90,7 @@ mpn_divexact (mp_ptr qp,
       MPN_COPY (tp, np, qn);
       binvert_limb (di, dp[0]);  di = -di;
       dn = MIN (dn, qn);
-      mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+      mpn_sb_bdiv_q (qp, tp, qn, dp, dn, di);
       TMP_FREE;
       return;
     }
@@ -175,14 +107,14 @@ mpn_divexact (mp_ptr qp,
 	  MPN_COPY (tp, np, qn);
 	  binvert_limb (di, dp[0]);  di = -di;
 	  dn = MIN (dn, qn);
-	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	  mpn_sb_bdiv_q (qp, tp, qn, dp, dn, di);
 	}
       else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
 	{
 	  tp = scratch;
 	  MPN_COPY (tp, np, qn);
 	  binvert_limb (di, dp[0]);  di = -di;
-	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	  mpn_dc_bdiv_q (qp, tp, qn, dp, dn, di);
 	}
       else
 	{
@@ -248,14 +180,23 @@ mpn_divexact (mp_ptr qp,
       MPN_COPY (tp, np + nn - nn1, nn1);
     }
 
-  invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
   if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
     {
-      qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
+      /* Compute divisor inverse.  */
+      cy = mpn_add_1 (xp, xdp + qn1 - 2, 2, 1);
+      if (cy != 0)
+	dip[0] = dip[1] = 0;
+      else
+	{
+	  mp_limb_t scratch[10];	/* FIXME */
+	  mpn_invert (dip, xp, 2, scratch);
+	}
+
+      qp[qn0 - 1 + nn1 - qn1] = mpn_sb_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dip);
     }
   else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
     {
-      qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
+      qp[qn0 - 1 + nn1 - qn1] = mpn_dc_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1);
     }
   else
     {
@@ -274,12 +215,12 @@ mpn_divexact (mp_ptr qp,
   if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
     {
       MPN_COPY (tp, np, qn0);
-      mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+      mpn_sb_bdiv_q (qp, tp, qn0, dp, qn0, di);
     }
   else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
     {
       MPN_COPY (tp, np, qn0);
-      mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+      mpn_dc_bdiv_q (qp, tp, qn0, dp, qn0, di);
     }
   else
     {
@@ -291,4 +232,3 @@ mpn_divexact (mp_ptr qp,
 
   TMP_FREE;
 }
-#endif
diff --git a/gmp/mpn/generic/divis.c b/gmp/mpn/generic/divis.c
index 9e162e60d2..b05ecd8a78 100644
--- a/gmp/mpn/generic/divis.c
+++ b/gmp/mpn/generic/divis.c
@@ -4,80 +4,86 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 
-/* Determine whether A={ap,an} is divisible by D={dp,dn}.  Must have both
-   operands normalized, meaning high limbs non-zero, except that an==0 is
+/* Determine whether {ap,asize} is divisible by {dp,dsize}.  Must have both
+   operands normalized, meaning high limbs non-zero, except that asize==0 is
    allowed.
 
-   There usually won't be many low zero bits on D, but the checks for this
+   There usually won't be many low zero bits on d, but the checks for this
    are fast and might pick up a few operand combinations, in particular they
-   might reduce D to fit the single-limb mod_1/modexact_1 code.
+   might reduce d to fit the single-limb mod_1/modexact_1 code.
 
    Future:
 
+   This is currently not much faster than the user doing an mpz_tdiv_r
+   and testing for a zero remainder, but hopefully it can be improved.
+
+   mpn_bdivmod is one possibility, but it only trades udiv_qrnnd's for
+   multiplies, it won't save crossproducts the way it can in mpz_divexact.
+   Definitely worthwhile on small operands for most processors, but a
+   sub-quadratic version will be wanted before it can be used on all sizes.
+
    Getting the remainder limb by limb would make an early exit possible on
    finding a non-zero.  This would probably have to be bdivmod style so
    there's no addback, but it would need a multi-precision inverse and so
    might be slower than the plain method (on small sizes at least).
 
-   When D must be normalized (shifted to low bit set), it's possible to
-   suppress the bit-shifting of A down, as long as it's already been checked
-   that A has at least as many trailing zero bits as D.  */
+   When d must be normalized (shifted to high bit set), it's possible to
+   just append a low zero limb to "a" rather than bit-shifting as
+   mpn_tdiv_qr does internally, so long as it's already been checked that a
+   has at least as many trailing zeros bits as d.  Or equivalently, pass
+   qxn==1 to mpn_tdiv_qr, if/when it accepts that.
+
+   When called from mpz_congruent_p, {ap,asize} is a temporary which can be
+   destroyed.  Maybe it'd be possible to get into mpn_tdiv_qr at a lower
+   level to save copying it, or maybe that function could accept rp==ap.
+
+   Could use __attribute__ ((regparm (2))) on i386, so the parameters
+   wouldn't need extra stack when called from mpz_divisible_p, but a
+   pre-release gcc 3 didn't generate particularly good register juggling in
+   that case, so this isn't done for now.  */
 
 int
-mpn_divisible_p (mp_srcptr ap, mp_size_t an,
-		 mp_srcptr dp, mp_size_t dn)
+mpn_divisible_p (mp_srcptr ap, mp_size_t asize,
+		 mp_srcptr dp, mp_size_t dsize)
 {
   mp_limb_t  alow, dlow, dmask;
-  mp_ptr     qp, rp, tp;
+  mp_ptr     qp, rp;
   mp_size_t  i;
-  mp_limb_t di;
-  unsigned  twos;
   TMP_DECL;
 
-  ASSERT (an >= 0);
-  ASSERT (an == 0 || ap[an-1] != 0);
-  ASSERT (dn >= 1);
-  ASSERT (dp[dn-1] != 0);
-  ASSERT_MPN (ap, an);
-  ASSERT_MPN (dp, dn);
+  ASSERT (asize >= 0);
+  ASSERT (asize == 0 || ap[asize-1] != 0);
+  ASSERT (dsize >= 1);
+  ASSERT (dp[dsize-1] != 0);
+  ASSERT_MPN (ap, asize);
+  ASSERT_MPN (dp, dsize);
 
   /* When a<d only a==0 is divisible.
-     Notice this test covers all cases of an==0. */
-  if (an < dn)
-    return (an == 0);
+     Notice this test covers all cases of asize==0. */
+  if (asize < dsize)
+    return (asize == 0);
 
   /* Strip low zero limbs from d, requiring a==0 on those. */
   for (;;)
@@ -91,9 +97,9 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
       if (alow != 0)
 	return 0;  /* a has fewer low zero limbs than d, so not divisible */
 
-      /* a!=0 and d!=0 so won't get to n==0 */
-      an--; ASSERT (an >= 1);
-      dn--; ASSERT (dn >= 1);
+      /* a!=0 and d!=0 so won't get to size==0 */
+      asize--; ASSERT (asize >= 1);
+      dsize--; ASSERT (dsize >= 1);
       ap++;
       dp++;
     }
@@ -103,88 +109,41 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
   if ((alow & dmask) != 0)
     return 0;
 
-  if (dn == 1)
+  if (dsize == 1)
     {
-      if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
-	return mpn_mod_1 (ap, an, dlow) == 0;
+      if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD))
+	return mpn_mod_1 (ap, asize, dlow) == 0;
 
-      count_trailing_zeros (twos, dlow);
-      dlow >>= twos;
-      return mpn_modexact_1_odd (ap, an, dlow) == 0;
+      if ((dlow & 1) == 0)
+	{
+	  unsigned  twos;
+	  count_trailing_zeros (twos, dlow);
+	  dlow >>= twos;
+	}
+      return mpn_modexact_1_odd (ap, asize, dlow) == 0;
     }
 
-  if (dn == 2)
+  if (dsize == 2)
     {
       mp_limb_t  dsecond = dp[1];
       if (dsecond <= dmask)
 	{
+	  unsigned  twos;
 	  count_trailing_zeros (twos, dlow);
 	  dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
 	  ASSERT_LIMB (dlow);
-	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
+	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, asize, dlow) == 0;
 	}
     }
 
-  /* Should we compute Q = A * D^(-1) mod B^k,
-                       R = A - Q * D  mod B^k
-     here, for some small values of k?  Then check if R = 0 (mod B^k).  */
-
-  /* We could also compute A' = A mod T and D' = D mod P, for some
-     P = 3 * 5 * 7 * 11 ..., and then check if any prime factor from P
-     dividing D' also divides A'.  */
-
   TMP_MARK;
 
-  rp = TMP_ALLOC_LIMBS (an + 1);
-  qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this? */
-
-  count_trailing_zeros (twos, dp[0]);
-
-  if (twos != 0)
-    {
-      tp = TMP_ALLOC_LIMBS (dn);
-      ASSERT_NOCARRY (mpn_rshift (tp, dp, dn, twos));
-      dp = tp;
+  rp = TMP_ALLOC_LIMBS (asize+1);
+  qp = rp + dsize;
 
-      ASSERT_NOCARRY (mpn_rshift (rp, ap, an, twos));
-    }
-  else
-    {
-      MPN_COPY (rp, ap, an);
-    }
-  if (rp[an - 1] >= dp[dn - 1])
-    {
-      rp[an] = 0;
-      an++;
-    }
-  else if (an == dn)
-    {
-      TMP_FREE;
-      return 0;
-    }
-
-  ASSERT (an > dn);		/* requirement of functions below */
-
-  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
-      BELOW_THRESHOLD (an - dn, DC_BDIV_QR_THRESHOLD))
-    {
-      binvert_limb (di, dp[0]);
-      mpn_sbpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
-      rp += an - dn;
-    }
-  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
-    {
-      binvert_limb (di, dp[0]);
-      mpn_dcpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
-      rp += an - dn;
-    }
-  else
-    {
-      tp = TMP_ALLOC_LIMBS (mpn_mu_bdiv_qr_itch (an, dn));
-      mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
-    }
+  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, dp, dsize);
 
-  /* test for {rp,dn} zero or non-zero */
+  /* test for {rp,dsize} zero or non-zero */
   i = 0;
   do
     {
@@ -194,7 +153,7 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
 	  return 0;
 	}
     }
-  while (++i < dn);
+  while (++i < dsize);
 
   TMP_FREE;
   return 1;
diff --git a/gmp/mpn/generic/divrem.c b/gmp/mpn/generic/divrem.c
index f420992746..999ffdd347 100644
--- a/gmp/mpn/generic/divrem.c
+++ b/gmp/mpn/generic/divrem.c
@@ -1,33 +1,24 @@
 /* mpn_divrem -- Divide natural numbers, producing both remainder and
-   quotient.  This is now just a middle layer calling mpn_tdiv_qr.
+   quotient.  This is now just a middle layer for calling the new
+   internal mpn_tdiv_qr.
 
-Copyright 1993-1997, 1999-2002, 2005 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2005 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -56,7 +47,7 @@ mpn_divrem (mp_ptr qp, mp_size_t qxn,
       TMP_DECL;
 
       TMP_MARK;
-      q2p = TMP_ALLOC_LIMBS (nn + qxn);
+      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
 
       np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
       qn = nn + qxn - 1;
@@ -81,11 +72,11 @@ mpn_divrem (mp_ptr qp, mp_size_t qxn,
       if (UNLIKELY (qxn != 0))
 	{
 	  mp_ptr n2p;
-	  n2p = TMP_ALLOC_LIMBS (nn + qxn);
+	  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
 	  MPN_ZERO (n2p, qxn);
 	  MPN_COPY (n2p + qxn, np, nn);
-	  q2p = TMP_ALLOC_LIMBS (nn - dn + qxn + 1);
-	  rp = TMP_ALLOC_LIMBS (dn);
+	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
+	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 	  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
 	  MPN_COPY (np, rp, dn);
 	  qn = nn - dn + qxn;
@@ -94,8 +85,8 @@ mpn_divrem (mp_ptr qp, mp_size_t qxn,
 	}
       else
 	{
-	  q2p = TMP_ALLOC_LIMBS (nn - dn + 1);
-	  rp = TMP_ALLOC_LIMBS (dn);
+	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
+	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 	  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
 	  MPN_COPY (np, rp, dn);	/* overwrite np area with remainder */
 	  qn = nn - dn;
diff --git a/gmp/mpn/generic/divrem_1.c b/gmp/mpn/generic/divrem_1.c
index 9157b5735e..c416946294 100644
--- a/gmp/mpn/generic/divrem_1.c
+++ b/gmp/mpn/generic/divrem_1.c
@@ -1,33 +1,22 @@
 /* mpn_divrem_1 -- mpn by limb division.
 
-Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -167,7 +156,7 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
   else
     {
       /* Most significant bit of divisor == 0.  */
-      int cnt;
+      int norm;
 
       /* Skip a division if high < divisor (high quotient 0).  Testing here
 	 before normalizing will still skip as often as possible.  */
@@ -189,28 +178,28 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 	goto plain;
 
-      count_leading_zeros (cnt, d);
-      d <<= cnt;
-      r <<= cnt;
+      count_leading_zeros (norm, d);
+      d <<= norm;
+      r <<= norm;
 
       if (UDIV_NEEDS_NORMALIZATION
 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 	{
-	  mp_limb_t nshift;
 	  if (un != 0)
 	    {
 	      n1 = up[un - 1] << GMP_NAIL_BITS;
-	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      r |= (n1 >> (GMP_LIMB_BITS - norm));
 	      for (i = un - 2; i >= 0; i--)
 		{
 		  n0 = up[i] << GMP_NAIL_BITS;
-		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-		  udiv_qrnnd (*qp, r, r, nshift, d);
+		  udiv_qrnnd (*qp, r, r,
+			      (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
+			      d);
 		  r >>= GMP_NAIL_BITS;
 		  qp--;
 		  n1 = n0;
 		}
-	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
+	      udiv_qrnnd (*qp, r, r, n1 << norm, d);
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
@@ -220,26 +209,27 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
-	  return r >> cnt;
+	  return r >> norm;
 	}
       else
 	{
-	  mp_limb_t  dinv, nshift;
+	  mp_limb_t  dinv;
 	  invert_limb (dinv, d);
 	  if (un != 0)
 	    {
 	      n1 = up[un - 1] << GMP_NAIL_BITS;
-	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      r |= (n1 >> (GMP_LIMB_BITS - norm));
 	      for (i = un - 2; i >= 0; i--)
 		{
 		  n0 = up[i] << GMP_NAIL_BITS;
-		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
+		  udiv_qrnnd_preinv (*qp, r, r,
+				     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
+				     d, dinv);
 		  r >>= GMP_NAIL_BITS;
 		  qp--;
 		  n1 = n0;
 		}
-	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
+	      udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
@@ -249,7 +239,7 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
-	  return r >> cnt;
+	  return r >> norm;
 	}
     }
 }
diff --git a/gmp/mpn/generic/divrem_2.c b/gmp/mpn/generic/divrem_2.c
index 30d24bb102..ba761dc36c 100644
--- a/gmp/mpn/generic/divrem_2.c
+++ b/gmp/mpn/generic/divrem_2.c
@@ -1,119 +1,179 @@
 /* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
    quotient.  The divisor is two limbs.
 
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
 
 
-Copyright 1993-1996, 1999-2002 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 
-/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
-   quotient limbs at qp and the 2 long remainder at np.  If qxn is non-zero,
-   generate that many fraction bits and append them after the other quotient
-   limbs.  Return the most significant limb of the quotient, this is always 0
-   or 1.
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIVREM_2_THRESHOLD
+#define DIVREM_2_THRESHOLD  0
+#endif
+
+
+/* Divide num (NP/NSIZE) by den (DP/2) and write
+   the NSIZE-2 least significant quotient limbs at QP
+   and the 2 long remainder at NP.  If QEXTRA_LIMBS is
+   non-zero, generate that many fraction bits and append them after the
+   other quotient limbs.
+   Return the most significant limb of the quotient, this is always 0 or 1.
 
    Preconditions:
+   0. NSIZE >= 2.
    1. The most significant bit of the divisor must be set.
-   2. qp must either not overlap with the input operands at all, or
-      qp >= np + 2 must hold true.  (This means that it's possible to put
-      the quotient in the high part of {np,nn}, right above the remainder.
-   3. nn >= 2, even if qxn is non-zero.  */
+   2. QP must either not overlap with the input operands at all, or
+      QP + 2 >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero.  */
 
 mp_limb_t
 mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
 	      mp_ptr np, mp_size_t nn,
 	      mp_srcptr dp)
 {
-  mp_limb_t most_significant_q_limb;
+  mp_limb_t most_significant_q_limb = 0;
   mp_size_t i;
-  mp_limb_t r1, r0, d1, d0;
-  gmp_pi1_t di;
+  mp_limb_t n1, n0, n2;
+  mp_limb_t d1, d0;
+  mp_limb_t d1inv;
+  int use_preinv;
 
   ASSERT (nn >= 2);
   ASSERT (qxn >= 0);
   ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);
   ASSERT_MPN (np, nn);
   ASSERT_MPN (dp, 2);
 
   np += nn - 2;
   d1 = dp[1];
   d0 = dp[0];
-  r1 = np[1];
-  r0 = np[0];
+  n1 = np[1];
+  n0 = np[0];
 
-  most_significant_q_limb = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+  if (n1 >= d1 && (n1 > d1 || n0 >= d0))
     {
 #if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+      sub_ddmmss (n1, n0, n1, n0, d1, d0);
 #else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
+      n0 = n0 - d0;
+      n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);
+      n0 &= GMP_NUMB_MASK;
 #endif
       most_significant_q_limb = 1;
     }
 
-  invert_pi1 (di, d1, d0);
+  use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);
+  if (use_preinv)
+    invert_limb (d1inv, d1);
 
-  qp += qxn;
-
-  for (i = nn - 2 - 1; i >= 0; i--)
+  for (i = qxn + nn - 2 - 1; i >= 0; i--)
     {
-      mp_limb_t n0, q;
-      n0 = np[-1];
-      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
-      np--;
-      qp[i] = q;
-    }
+      mp_limb_t q;
+      mp_limb_t r;
 
-  if (UNLIKELY (qxn != 0))
-    {
-      qp -= qxn;
-      for (i = qxn - 1; i >= 0; i--)
+      if (i >= qxn)
+	np--;
+      else
+	np[0] = 0;
+
+      if (n1 == d1)
+	{
+	  /* Q should be either 111..111 or 111..110.  Need special handling
+	     of this rare case as normal division would give overflow.  */
+	  q = GMP_NUMB_MASK;
+
+	  r = (n0 + d1) & GMP_NUMB_MASK;
+	  if (r < d1)	/* Carry in the addition? */
+	    {
+#if GMP_NAIL_BITS == 0
+	      add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+#else
+	      n0 = np[0] + d0;
+	      n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
+	      n0 &= GMP_NUMB_MASK;
+#endif
+	      qp[i] = q;
+	      continue;
+	    }
+	  n1 = d0 - (d0 != 0);
+	  n0 = -d0 & GMP_NUMB_MASK;
+	}
+      else
 	{
-	  mp_limb_t q;
-	  udiv_qr_3by2 (q, r1, r0, r1, r0, CNST_LIMB(0), d1, d0, di.inv32);
-	  qp[i] = q;
+	  if (use_preinv)
+	    udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
+	  else
+	    udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);
+	  r >>= GMP_NAIL_BITS;
+	  umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);
+	  n0 >>= GMP_NAIL_BITS;
 	}
-    }
 
-  np[1] = r1;
-  np[0] = r0;
+      n2 = np[0];
+
+    q_test:
+      if (n1 > r || (n1 == r && n0 > n2))
+	{
+	  /* The estimated Q was too large.  */
+	  q--;
+
+#if GMP_NAIL_BITS == 0
+	  sub_ddmmss (n1, n0, n1, n0, 0, d0);
+#else
+	  n0 = n0 - d0;
+	  n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);
+	  n0 &= GMP_NUMB_MASK;
+#endif
+	  r += d1;
+	  if (r >= d1)	/* If not carry, test Q again.  */
+	    goto q_test;
+	}
+
+      qp[i] = q;
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (n1, n0, r, n2, n1, n0);
+#else
+      n0 = n2 - n0;
+      n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);
+      n0 &= GMP_NUMB_MASK;
+#endif
+    }
+  np[1] = n1;
+  np[0] = n0;
 
   return most_significant_q_limb;
 }
diff --git a/gmp/mpn/generic/dump.c b/gmp/mpn/generic/dump.c
index 3a73fe49e3..38309996cc 100644
--- a/gmp/mpn/generic/dump.c
+++ b/gmp/mpn/generic/dump.c
@@ -3,33 +3,22 @@
    FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
 
-Copyright 1996, 2000-2002, 2005 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h>
 #include "gmp.h"
diff --git a/gmp/mpn/generic/fib2_ui.c b/gmp/mpn/generic/fib2_ui.c
index eb6e56e736..a39d538262 100644
--- a/gmp/mpn/generic/fib2_ui.c
+++ b/gmp/mpn/generic/fib2_ui.c
@@ -4,37 +4,28 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h>
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
+
 
 /* change this to "#define TRACE(x) x" for diagnostics */
 #define TRACE(x)
@@ -61,13 +52,20 @@ see https://www.gnu.org/licenses/.  */
    This property of F[4m+3] can be verified by induction on F[4m+3] =
    7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
    identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
-*/
+
+   Enhancements:
+
+   If there was an mpn_addlshift, it'd be possible to eliminate the yp
+   temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
+   fp+=2*(-1)^n, etc.  */
 
 mp_size_t
 mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
 {
+  mp_ptr         xp, yp;
   mp_size_t      size;
   unsigned long  nfirst, mask;
+  TMP_DECL;
 
   TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
 
@@ -87,15 +85,15 @@ mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
   if (mask != 1)
     {
       mp_size_t  alloc;
-      mp_ptr        xp;
-      TMP_DECL;
 
       TMP_MARK;
       alloc = MPN_FIB2_SIZE (n);
-      xp = TMP_ALLOC_LIMBS (alloc);
+      TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
 
       do
 	{
+	  mp_limb_t  c;
+
 	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
 	     n&mask upwards.
 
@@ -116,65 +114,45 @@ mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
 	  /* f1p[size-1] might be zero, but this occurs rarely, so it's not
 	     worth bothering checking for it */
 	  ASSERT (alloc >= 2*size);
-	  mpn_sqr (xp, fp,  size);
-	  mpn_sqr (fp, f1p, size);
+	  mpn_sqr_n (xp, fp,  size);
+	  mpn_sqr_n (yp, f1p, size);
 	  size *= 2;
 
 	  /* Shrink if possible.  Since fp was normalized there'll be at
 	     most one high zero on xp (and if there is then there's one on
 	     yp too).  */
-	  ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
+	  ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
 	  size -= (xp[size-1] == 0);
 	  ASSERT (xp[size-1] != 0);  /* only one xp high zero */
 
-	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
-	  f1p[size] = mpn_add_n (f1p, xp, fp, size);
-
 	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
 	     n&mask is the low bit of our implied k.  */
-#if HAVE_NATIVE_mpn_rsblsh2_n || HAVE_NATIVE_mpn_rsblsh_n
-#if HAVE_NATIVE_mpn_rsblsh2_n
-	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
-#else /* HAVE_NATIVE_mpn_rsblsh_n */
-	  fp[size] = mpn_rsblsh_n (fp, fp, xp, size, 2);
-#endif
-	  if ((n & mask) == 0)
-	    MPN_INCR_U(fp, size + 1, 2);	/* possible +2 */
-	  else
-	  {
-	    ASSERT (fp[0] >= 2);
-	    fp[0] -= 2;				/* possible -2 */
-	  }
-#else
-	  {
-	    mp_limb_t  c;
-
-	    c = mpn_lshift (xp, xp, size, 2);
-	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
-	    c -= mpn_sub_n (fp, xp, fp, size);
-	    ASSERT (n & mask ? fp[0] != 0 && fp[0] != 1 : 1);
-	    fp[0] -= (n & mask ? 2 : 0);	/* possible -2 */
-	    fp[size] = c;
-	  }
-#endif
+	  c = mpn_lshift (fp, xp, size, 2);
+	  fp[0] |= (n & mask ? 0 : 2);	 /* possible +2 */
+	  c -= mpn_sub_n (fp, fp, yp, size);
+	  ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
+	  fp[0] -= (n & mask ? 2 : 0);	 /* possible -2 */
 	  ASSERT (alloc >= size+1);
-	  size += (fp[size] != 0);
+	  xp[size] = 0;
+	  yp[size] = 0;
+	  fp[size] = c;
+	  size += (c != 0);
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
+	     F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
+	  ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
 
 	  /* now n&mask is the new bit of n being considered */
 	  mask >>= 1;
 
 	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
 	     F[2k+1] and F[2k-1].  */
-	  if (n & mask)
-	    ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
-	  else {
-	    ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
-
-	    /* Can have a high zero after replacing F[2k+1] with F[2k].
-	       f1p will have a high zero if fp does. */
-	    ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
-	    size -= (fp[size-1] == 0);
-	  }
+	  ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
+
+	  /* Can have a high zero after replacing F[2k+1] with F[2k].
+	     f1p will have a high zero if fp does. */
+	  ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+	  size -= (fp[size-1] == 0);
 	}
       while (mask != 1);
 
diff --git a/gmp/mpn/generic/gcd.c b/gmp/mpn/generic/gcd.c
index b14e1ad888..542e0fe7b8 100644
--- a/gmp/mpn/generic/gcd.c
+++ b/gmp/mpn/generic/gcd.c
@@ -1,33 +1,22 @@
 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
 
-Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -35,7 +24,7 @@ see https://www.gnu.org/licenses/.  */
 
 /* Uses the HGCD operation described in
 
-     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd
      computation, Math. Comp. 77 (2008), 589-607.
 
   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
@@ -62,76 +51,6 @@ mp_size_t p_table[P_TABLE_SIZE];
 #define CHOOSE_P(n) (2*(n) / 3)
 #endif
 
-struct gcd_ctx
-{
-  mp_ptr gp;
-  mp_size_t gn;
-};
-
-static void
-gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	  mp_srcptr qp, mp_size_t qn, int d)
-{
-  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
-  MPN_COPY (ctx->gp, gp, gn);
-  ctx->gn = gn;
-}
-
-#if GMP_NAIL_BITS > 0
-/* Nail supports should be easy, replacing the sub_ddmmss with nails
- * logic. */
-#error Nails not supported.
-#endif
-
-/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
-   Both U and V must be odd. */
-static inline mp_size_t
-gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
-{
-  mp_limb_t u0, u1, v0, v1;
-  mp_size_t gn;
-
-  u0 = up[0];
-  u1 = up[1];
-  v0 = vp[0];
-  v1 = vp[1];
-
-  ASSERT (u0 & 1);
-  ASSERT (v0 & 1);
-
-  /* Check for u0 != v0 needed to ensure that argument to
-   * count_trailing_zeros is non-zero. */
-  while (u1 != v1 && u0 != v0)
-    {
-      unsigned long int r;
-      if (u1 > v1)
-	{
-	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
-	  count_trailing_zeros (r, u0);
-	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
-	  u1 >>= r;
-	}
-      else  /* u1 < v1.  */
-	{
-	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
-	  count_trailing_zeros (r, v0);
-	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
-	  v1 >>= r;
-	}
-    }
-
-  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
-
-  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
-  if (u1 == v1 && u0 == v0)
-    return gn;
-
-  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
-  gp[0] = mpn_gcd_1 (gp, gn, v0);
-
-  return 1;
-}
-
 mp_size_t
 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
 {
@@ -139,17 +58,13 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
   mp_size_t scratch;
   mp_size_t matrix_scratch;
 
-  struct gcd_ctx ctx;
+  mp_size_t gn;
   mp_ptr tp;
   TMP_DECL;
 
-  ASSERT (usize >= n);
-  ASSERT (n > 0);
-  ASSERT (vp[n-1] > 0);
-
   /* FIXME: Check for small sizes first, before setting up temporary
      storage etc. */
-  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
+  talloc = MPN_GCD_LEHMER_N_ITCH(n);
 
   /* For initial division */
   scratch = usize - n + 1;
@@ -192,13 +107,11 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
       if (mpn_zero_p (up, n))
 	{
 	  MPN_COPY (gp, vp, n);
-	  ctx.gn = n;
-	  goto done;
+	  TMP_FREE;
+	  return n;
 	}
     }
 
-  ctx.gp = gp;
-
 #if TUNE_GCD_P
   while (CHOOSE_P (n) > 0)
 #else
@@ -221,90 +134,153 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
       else
 	{
 	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
+	  n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
 	  if (n == 0)
-	    goto done;
+	    {
+	      TMP_FREE;
+	      return gn;
+	    }
 	}
     }
 
-  while (n > 2)
-    {
-      struct hgcd_matrix1 M;
-      mp_limb_t uh, ul, vh, vl;
-      mp_limb_t mask;
+  gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+  TMP_FREE;
+  return gn;
+}
 
-      mask = up[n-1] | vp[n-1];
-      ASSERT (mask > 0);
+#ifdef TUNE_GCD_P
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "speed.h"
 
-      if (mask & GMP_NUMB_HIGHBIT)
-	{
-	  uh = up[n-1]; ul = up[n-2];
-	  vh = vp[n-1]; vl = vp[n-2];
-	}
-      else
-	{
-	  int shift;
+static int
+compare_double(const void *ap, const void *bp)
+{
+  double a = * (const double *) ap;
+  double b = * (const double *) bp;
+
+  if (a < b)
+    return -1;
+  else if (a > b)
+    return 1;
+  else
+    return 0;
+}
 
-	  count_leading_zeros (shift, mask);
-	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
-	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
-	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
-	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
-	}
+static double
+median (double *v, size_t n)
+{
+  qsort(v, n, sizeof(*v), compare_double);
 
-      /* Try an mpn_hgcd2 step */
-      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
-	{
-	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
-	  MP_PTR_SWAP (up, tp);
-	}
-      else
-	{
-	  /* mpn_hgcd2 has failed. Then either one of a or b is very
-	     small, or the difference is very small. Perform one
-	     subtraction followed by one division. */
+  return v[n/2];
+}
 
-	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
-	  if (n == 0)
-	    goto done;
-	}
-    }
+#define TIME(res, code) do {				\
+  double time_measurement[5];				\
+  unsigned time_i;					\
+							\
+  for (time_i = 0; time_i < 5; time_i++)		\
+    {							\
+      speed_starttime();				\
+      code;						\
+      time_measurement[time_i] = speed_endtime();	\
+    }							\
+  res = median(time_measurement, 5);			\
+} while (0)
+
+int
+main(int argc, char *argv)
+{
+  gmp_randstate_t rands;
+  mp_size_t n;
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr up;
+  mp_ptr vp;
+  mp_ptr gp;
+  mp_ptr tp;
+  TMP_DECL;
 
-  ASSERT(up[n-1] | vp[n-1]);
+  /* Unbuffered so if output is redirected to a file it isn't lost if the
+     program is killed part way through.  */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
 
-  if (n == 1)
-    {
-      *gp = mpn_gcd_1(up, 1, vp[0]);
-      ctx.gn = 1;
-      goto done;
-    }
+  gmp_randinit_default (rands);
 
-  /* Due to the calling convention for mpn_gcd, at most one can be
-     even. */
+  TMP_MARK;
 
-  if (! (up[0] & 1))
-    MP_PTR_SWAP (up, vp);
+  ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
 
-  ASSERT (up[0] & 1);
+  mpn_random (ap, P_TABLE_SIZE);
+  mpn_random (bp, P_TABLE_SIZE);
 
-  if (vp[0] == 0)
-    {
-      *gp = mpn_gcd_1 (up, 2, vp[1]);
-      ctx.gn = 1;
-      goto done;
-    }
-  else if (! (vp[0] & 1))
+  memset (p_table, 0, sizeof(p_table));
+
+  for (n = 100; n++; n < P_TABLE_SIZE)
     {
-      int r;
-      count_trailing_zeros (r, vp[0]);
-      vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
-      vp[1] >>= r;
-    }
+      mp_size_t p;
+      mp_size_t best_p;
+      double best_time;
+      double lehmer_time;
+
+      if (ap[n-1] == 0)
+	ap[n-1] = 1;
+
+      if (bp[n-1] == 0)
+	bp[n-1] = 1;
+
+      p_table[n] = 0;
+      TIME(lehmer_time, {
+	  MPN_COPY (up, ap, n);
+	  MPN_COPY (vp, bp, n);
+	  mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+	});
 
-  ctx.gn = gcd_2(gp, up, vp);
+      best_time = lehmer_time;
+      best_p = 0;
 
-done:
+      for (p = n * 0.48; p < n * 0.77; p++)
+	{
+	  double t;
+
+	  p_table[n] = p;
+
+	  TIME(t, {
+	      MPN_COPY (up, ap, n);
+	      MPN_COPY (vp, bp, n);
+	      mpn_gcd (gp, up, n, vp, n);
+	    });
+
+	  if (t < best_time)
+	    {
+	      best_time = t;
+	      best_p = p;
+	    }
+	}
+      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
+      if (best_p > 0)
+	{
+	  double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+	  printf(" %5.3g%%", speedup);
+	  if (speedup < 1.0)
+	    {
+	      printf(" (ignored)");
+	      best_p = 0;
+	    }
+	}
+      printf("\n");
+
+      p_table[n] = best_p;
+    }
   TMP_FREE;
-  return ctx.gn;
+  gmp_randclear(rands);
+  return 0;
 }
+#endif /* TUNE_GCD_P */
diff --git a/gmp/mpn/generic/gcd_1.c b/gmp/mpn/generic/gcd_1.c
index f6dcb4a2eb..73be15134c 100644
--- a/gmp/mpn/generic/gcd_1.c
+++ b/gmp/mpn/generic/gcd_1.c
@@ -1,54 +1,26 @@
 /* mpn_gcd_1 -- mpn and limb greatest common divisor.
 
-Copyright 1994, 1996, 2000, 2001, 2009, 2012 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#ifndef GCD_1_METHOD
-#define GCD_1_METHOD 2
-#endif
-
-#define USE_ZEROTAB 0
-
-#if USE_ZEROTAB
-#define MAXSHIFT 4
-#define MASK ((1 << MAXSHIFT) - 1)
-static const unsigned char zerotab[1 << MAXSHIFT] =
-{
-#if MAXSHIFT > 4
-  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-#endif
-  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-#endif
 
 /* Does not work for U == 0 or V == 0.  It would be tough to make it work for
    V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
@@ -109,10 +81,6 @@ mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
       goto strip_u_maybe;
     }
 
-  ASSERT (ulimb & 1);
-  ASSERT (vlimb & 1);
-
-#if GCD_1_METHOD == 1
   while (ulimb != vlimb)
     {
       ASSERT (ulimb & 1);
@@ -141,58 +109,6 @@ mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
 	  while ((vlimb & 1) == 0);
 	}
     }
-#else
-# if GCD_1_METHOD  == 2
-
-  ulimb >>= 1;
-  vlimb >>= 1;
-
-  while (ulimb != vlimb)
-    {
-      int c;
-      mp_limb_t t;
-      mp_limb_t vgtu;
-
-      t = ulimb - vlimb;
-      vgtu = LIMB_HIGHBIT_TO_MASK (t);
-
-      /* v <-- min (u, v) */
-      vlimb += (vgtu & t);
-
-      /* u <-- |u - v| */
-      ulimb = (t ^ vgtu) - vgtu;
-
-#if USE_ZEROTAB
-      /* Number of trailing zeros is the same no matter if we look at
-       * t or ulimb, but using t gives more parallelism. */
-      c = zerotab[t & MASK];
-
-      while (UNLIKELY (c == MAXSHIFT))
-	{
-	  ulimb >>= MAXSHIFT;
-	  if (0)
-	  strip_u_maybe:
-	    vlimb >>= 1;
-
-	  c = zerotab[ulimb & MASK];
-	}
-#else
-      if (0)
-	{
-	strip_u_maybe:
-	  vlimb >>= 1;
-	  t = ulimb;
-	}
-      count_trailing_zeros (c, t);
-#endif
-      ulimb >>= (c + 1);
-    }
-
-  vlimb = (vlimb << 1) | 1;
-# else
-#  error Unknown GCD_1_METHOD
-# endif
-#endif
 
  done:
   return vlimb << zero_bits;
diff --git a/gmp/mpn/generic/gcd_lehmer.c b/gmp/mpn/generic/gcd_lehmer.c
new file mode 100644
index 0000000000..37fd3c590d
--- /dev/null
+++ b/gmp/mpn/generic/gcd_lehmer.c
@@ -0,0 +1,160 @@
+/* gcd_lehmer.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
+   Both U and V must be odd. */
+static inline mp_size_t
+gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
+{
+  mp_limb_t u0, u1, v0, v1;
+  mp_size_t gn;
+
+  u0 = up[0];
+  u1 = up[1];
+  v0 = vp[0];
+  v1 = vp[1];
+
+  ASSERT (u0 & 1);
+  ASSERT (v0 & 1);
+
+  /* Check for u0 != v0 needed to ensure that argument to
+   * count_trailing_zeros is non-zero. */
+  while (u1 != v1 && u0 != v0)
+    {
+      unsigned long int r;
+      if (u1 > v1)
+	{
+	  u1 -= v1 + (u0 < v0);
+	  u0 = (u0 - v0) & GMP_NUMB_MASK;
+	  count_trailing_zeros (r, u0);
+	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
+	  u1 >>= r;
+	}
+      else  /* u1 < v1.  */
+	{
+	  v1 -= u1 + (v0 < u0);
+	  v0 = (v0 - u0) & GMP_NUMB_MASK;
+	  count_trailing_zeros (r, v0);
+	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
+	  v1 >>= r;
+	}
+    }
+
+  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
+
+  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
+  if (u1 == v1 && u0 == v0)
+    return gn;
+
+  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
+  gp[0] = mpn_gcd_1 (gp, gn, v0);
+
+  return 1;
+}
+
+/* Temporary storage: n */
+mp_size_t
+mpn_gcd_lehmer_n (mp_ptr gp, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  /* Relax this requirement, and normalize at the start? Must disallow
+     A = B = 0, though. */
+  ASSERT(ap[n-1] > 0 || bp[n-1] > 0);
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  ah = ap[n-1]; al = ap[n-2];
+	  bh = bp[n-1]; bl = bp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+	}
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2 (ah, al, bh, bl, &M))
+	{
+	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
+	  MP_PTR_SWAP (ap, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  mp_size_t gn;
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (gp, &gn, ap, bp, n, tp);
+	  if (n == 0)
+	    return gn;
+	}
+    }
+
+  if (n == 1)
+    {
+      *gp = mpn_gcd_1(ap, 1, bp[0]);
+      return 1;
+    }
+
+  /* Due to the calling convention for mpn_gcd, at most one can be
+     even. */
+
+  if (! (ap[0] & 1))
+    MP_PTR_SWAP (ap, bp);
+
+  ASSERT (ap[0] & 1);
+
+  if (bp[0] == 0)
+    {
+      *gp = mpn_gcd_1 (ap, 2, bp[1]);
+      return 1;
+    }
+  else if (! (bp[0] & 1))
+    {
+      int r;
+      count_trailing_zeros (r, bp[0]);
+      bp[0] = ((bp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (bp[0] >> r);
+      bp[1] >>= r;
+    }
+
+  return gcd_2(gp, ap, bp);
+}
diff --git a/gmp/mpn/generic/gcd_subdiv_step.c b/gmp/mpn/generic/gcd_subdiv_step.c
index 18634bec9f..47c0c26c86 100644
--- a/gmp/mpn/generic/gcd_subdiv_step.c
+++ b/gmp/mpn/generic/gcd_subdiv_step.c
@@ -4,35 +4,22 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2003-2005, 2008, 2010, 2011 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-#include <stdlib.h>		/* for NULL */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,47 +27,17 @@ see https://www.gnu.org/licenses/.  */
 
 /* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
    b is small, or the difference is small. Perform one subtraction
-   followed by one division. The normal case is to compute the reduced
-   a and b, and return the new size.
-
-   If s == 0 (used for gcd and gcdext), returns zero if the gcd is
-   found.
-
-   If s > 0, don't reduce to size <= s, and return zero if no
-   reduction is possible (if either a, b or |a-b| is of size <= s). */
-
-/* The hook function is called as
-
-     hook(ctx, gp, gn, qp, qn, d)
-
-   in the following cases:
-
-   + If A = B at the start, G is the gcd, Q is NULL, d = -1.
-
-   + If one input is zero at the start, G is the gcd, Q is NULL,
-     d = 0 if A = G and d = 1 if B = G.
-
-   Otherwise, if d = 0 we have just subtracted a multiple of A from B,
-   and if d = 1 we have subtracted a multiple of B from A.
-
-   + If A = B after subtraction, G is the gcd, Q is NULL.
-
-   + If we get a zero remainder after division, G is the gcd, Q is the
-     quotient.
-
-   + Otherwise, G is NULL, Q is the quotient (often 1).
-
- */
+   followed by one division. If the gcd is found, stores it in gp and
+   *gn, and returns zero. Otherwise, compute the reduced a and b, and
+   return the new size. */
 
+/* FIXME: Check when the smaller number is a single limb, and invoke
+ * mpn_gcd_1. */
 mp_size_t
-mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
-		     gcd_subdiv_step_hook *hook, void *ctx,
-		     mp_ptr tp)
+mpn_gcd_subdiv_step (mp_ptr gp, mp_size_t *gn,
+		     mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
 {
-  static const mp_limb_t one = CNST_LIMB(1);
-  mp_size_t an, bn, qn;
-
-  int swapped;
+  mp_size_t an, bn;
 
   ASSERT (n > 0);
   ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
@@ -89,117 +46,59 @@ mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
   MPN_NORMALIZE (ap, an);
   MPN_NORMALIZE (bp, bn);
 
-  swapped = 0;
-
-  /* Arrange so that a < b, subtract b -= a, and maintain
-     normalization. */
-  if (an == bn)
+  if (UNLIKELY (an == 0))
     {
-      int c;
-      MPN_CMP (c, ap, bp, an);
-      if (UNLIKELY (c == 0))
-	{
-	  /* For gcdext, return the smallest of the two cofactors, so
-	     pass d = -1. */
-	  if (s == 0)
-	    hook (ctx, ap, an, NULL, 0, -1);
-	  return 0;
-	}
-      else if (c > 0)
-	{
-	  MP_PTR_SWAP (ap, bp);
-	  swapped ^= 1;
-	}
-    }
-  else
-    {
-      if (an > bn)
-	{
-	  MPN_PTR_SWAP (ap, an, bp, bn);
-	  swapped ^= 1;
-	}
-    }
-  if (an <= s)
-    {
-      if (s == 0)
-	hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
+    return_b:
+      MPN_COPY (gp, bp, bn);
+      *gn = bn;
       return 0;
     }
-
-  ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
-  MPN_NORMALIZE (bp, bn);
-  ASSERT (bn > 0);
-
-  if (bn <= s)
+  else if (UNLIKELY (bn == 0))
     {
-      /* Undo subtraction. */
-      mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
-      if (cy > 0)
-	bp[an] = cy;
+    return_a:
+      MPN_COPY (gp, ap, an);
+      *gn = an;
       return 0;
     }
 
-  /* Arrange so that a < b */
-  if (an == bn)
+  /* Arrange so that a > b, subtract an -= bn, and maintain
+     normalization. */
+  if (an < bn)
+    MPN_PTR_SWAP (ap, an, bp, bn);
+  else if (an == bn)
     {
       int c;
       MPN_CMP (c, ap, bp, an);
       if (UNLIKELY (c == 0))
-	{
-	  if (s > 0)
-	    /* Just record subtraction and return */
-	    hook (ctx, NULL, 0, &one, 1, swapped);
-	  else
-	    /* Found gcd. */
-	    hook (ctx, bp, bn, NULL, 0, swapped);
-	  return 0;
-	}
-
-      hook (ctx, NULL, 0, &one, 1, swapped);
-
-      if (c > 0)
-	{
-	  MP_PTR_SWAP (ap, bp);
-	  swapped ^= 1;
-	}
+	goto return_a;
+      else if (c < 0)
+	MP_PTR_SWAP (ap, bp);
     }
-  else
-    {
-      hook (ctx, NULL, 0, &one, 1, swapped);
 
-      if (an > bn)
-	{
-	  MPN_PTR_SWAP (ap, an, bp, bn);
-	  swapped ^= 1;
-	}
+  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
+  MPN_NORMALIZE (ap, an);
+  ASSERT (an > 0);
+
+  /* Arrange so that a > b, and divide a = q b + r */
+  /* FIXME: an < bn happens when we have cancellation. If that is the
+     common case, then we could reverse the roles of a and b to avoid
+     the swap. */
+  if (an < bn)
+    MPN_PTR_SWAP (ap, an, bp, bn);
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	goto return_a;
+      else if (c < 0)
+	MP_PTR_SWAP (ap, bp);
     }
 
-  mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
-  qn = bn - an + 1;
-  bn = an;
-  MPN_NORMALIZE (bp, bn);
+  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
 
-  if (UNLIKELY (bn <= s))
-    {
-      if (s == 0)
-	{
-	  hook (ctx, ap, an, tp, qn, swapped);
-	  return 0;
-	}
-
-      /* Quotient is one too large, so decrement it and add back A. */
-      if (bn > 0)
-	{
-	  mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
-	  if (cy)
-	    bp[an++] = cy;
-	}
-      else
-	MPN_COPY (bp, ap, an);
-
-      MPN_DECR_U (tp, qn, 1);
-    }
+  if (mpn_zero_p (ap, bn))
+    goto return_b;
 
-  hook (ctx, NULL, 0, tp, qn, swapped);
-  return an;
+  return bn;
 }
diff --git a/gmp/mpn/generic/gcdext.c b/gmp/mpn/generic/gcdext.c
index 1c4ff75aab..38487ae66d 100644
--- a/gmp/mpn/generic/gcdext.c
+++ b/gmp/mpn/generic/gcdext.c
@@ -1,33 +1,22 @@
 /* mpn_gcdext -- Extended Greatest Common Divisor.
 
-Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
-Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -96,10 +85,10 @@ hgcd_mul_matrix_vector (struct hgcd_matrix *M,
   return n;
 }
 
-#define COMPUTE_V_ITCH(n) (2*(n))
+#define COMPUTE_V_ITCH(n) (2*(n) + 1)
 
 /* Computes |v| = |(g - u a)| / b, where u may be positive or
-   negative, and v is of the opposite sign. max(a, b) is of size n, u and
+   negative, and v is of the opposite sign. a, b are of size n, u and
    v at most size n, and v must have space for n+1 limbs. */
 static mp_size_t
 compute_v (mp_ptr vp,
@@ -119,11 +108,9 @@ compute_v (mp_ptr vp,
 
   size = ABS (usize);
   ASSERT (size <= n);
-  ASSERT (up[size-1] > 0);
 
   an = n;
   MPN_NORMALIZE (ap, an);
-  ASSERT (gn <= an);
 
   if (an >= size)
     mpn_mul (tp, ap, an, up, size);
@@ -132,6 +119,8 @@ compute_v (mp_ptr vp,
 
   size += an;
 
+  ASSERT (gn <= size);
+
   if (usize > 0)
     {
       /* |v| = -v = (u a - g) / b */
@@ -142,11 +131,11 @@ compute_v (mp_ptr vp,
 	return 0;
     }
   else
-    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
-	 (g + |u| a) always fits in (|usize| + an) limbs. */
-
-      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
-      size -= (tp[size - 1] == 0);
+    { /* usize < 0 */
+      /* |v| = v = (c - u a) / b = (c + |u| a) / b */
+      mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
+      if (cy)
+	tp[size++] = cy;
     }
 
   /* Now divide t / b. There must be no remainder */
@@ -157,9 +146,21 @@ compute_v (mp_ptr vp,
   vn = size + 1 - bn;
   ASSERT (vn <= n + 1);
 
-  mpn_divexact (vp, tp, size, bp, bn);
+  /* FIXME: Use divexact. Or do the entire calculation mod 2^{n *
+     GMP_NUMB_BITS}. */
+  mpn_tdiv_qr (vp, tp, 0, tp, size, bp, bn);
   vn -= (vp[vn-1] == 0);
 
+  /* Remainder must be zero */
+#if WANT_ASSERT
+  {
+    mp_size_t i;
+    for (i = 0; i < bn; i++)
+      {
+	ASSERT (tp[i] == 0);
+      }
+  }
+#endif
   return vn;
 }
 
@@ -180,8 +181,7 @@ compute_v (mp_ptr vp,
    For the lehmer call after the loop, Let T denote
    GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
    u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
-   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
-   sufficient for both operations.
+   + 1 for v and 2T + 1 scratch space. In all, 7T + 3 is sufficient.
 
 */
 
@@ -204,7 +204,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
   mp_size_t matrix_scratch;
   mp_size_t ualloc = n + 1;
 
-  struct gcdext_ctx ctx;
   mp_size_t un;
   mp_ptr u0;
   mp_ptr u1;
@@ -215,7 +214,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 
   ASSERT (an >= n);
   ASSERT (n > 0);
-  ASSERT (bp[n-1] > 0);
 
   TMP_MARK;
 
@@ -284,10 +282,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
   u0 = tp; tp += ualloc;
   u1 = tp; tp += ualloc;
 
-  ctx.gp = gp;
-  ctx.up = up;
-  ctx.usize = usizep;
-
   {
     /* For the first hgcd call, there are no u updates, and it makes
        some sense to use a different choice for p. */
@@ -321,22 +315,21 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 	/* mpn_hgcd has failed. Then either one of a or b is very
 	   small, or the difference is very small. Perform one
 	   subtraction followed by one division. */
-	u1[0] = 1;
+	mp_size_t gn;
+	mp_size_t updated_un = 1;
 
-	ctx.u0 = u0;
-	ctx.u1 = u1;
-	ctx.tp = tp + n; /* ualloc */
-	ctx.un = 1;
+	u1[0] = 1;
 
-	/* Temporary storage n */
-	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	/* Temporary storage 2n + 1 */
+	n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
+				    u0, u1, &updated_un, tp, tp + n);
 	if (n == 0)
 	  {
 	    TMP_FREE;
-	    return ctx.gn;
+	    return gn;
 	  }
 
-	un = ctx.un;
+	un = updated_un;
 	ASSERT (un < ualloc);
       }
   }
@@ -378,45 +371,22 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 	  /* mpn_hgcd has failed. Then either one of a or b is very
 	     small, or the difference is very small. Perform one
 	     subtraction followed by one division. */
-	  ctx.u0 = u0;
-	  ctx.u1 = u1;
-	  ctx.tp = tp + n; /* ualloc */
-	  ctx.un = un;
+	  mp_size_t gn;
+	  mp_size_t updated_un = un;
 
-	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  /* Temporary storage 2n + 1 */
+	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
+				      u0, u1, &updated_un, tp, tp + n);
 	  if (n == 0)
 	    {
 	      TMP_FREE;
-	      return ctx.gn;
+	      return gn;
 	    }
 
-	  un = ctx.un;
+	  un = updated_un;
 	  ASSERT (un < ualloc);
 	}
     }
-  /* We have A = ... a + ... b
-	     B =  u0 a +  u1 b
-
-	     a = u1  A + ... B
-	     b = -u0 A + ... B
-
-     with bounds
-
-       |u0|, |u1| <= B / min(a, b)
-
-     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
-     in which case the only reduction done so far is a = A - k B for
-     some k.
-
-     Compute g = u a + v b = (u u1 - v u0) A + (...) B
-     Here, u, v are bounded by
-
-       |u| <= b,
-       |v| <= a
-  */
-
-  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
 
   if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
     {
@@ -426,10 +396,7 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
       MPN_COPY (gp, ap, n);
 
       MPN_CMP (c, u0, u1, un);
-      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
-	 this case we choose the cofactor + 1, corresponding to G = A
-	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
-      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      ASSERT (c != 0);
       if (c < 0)
 	{
 	  MPN_NORMALIZE (u0, un);
@@ -446,9 +413,10 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
       TMP_FREE;
       return n;
     }
-  else if (UNLIKELY (u0[0] == 0) && un == 1)
+  else if (mpn_zero_p (u0, un))
     {
       mp_size_t gn;
+      ASSERT (un == 1);
       ASSERT (u1[0] == 1);
 
       /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
@@ -459,6 +427,23 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
     }
   else
     {
+      /* We have A = ... a + ... b
+		 B =  u0 a +  u1 b
+
+		 a = u1  A + ... B
+		 b = -u0 A + ... B
+
+	 with bounds
+
+	   |u0|, |u1| <= B / min(a, b)
+
+	 Compute g = u a + v b = (u u1 - v u0) A + (...) B
+	 Here, u, v are bounded by
+
+	 |u| <= b,
+	 |v| <= a
+      */
+
       mp_size_t u0n;
       mp_size_t u1n;
       mp_size_t lehmer_un;
@@ -478,8 +463,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 
       u0n = un;
       MPN_NORMALIZE (u0, u0n);
-      ASSERT (u0n > 0);
-
       if (lehmer_un == 0)
 	{
 	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
@@ -505,12 +488,25 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 
       u1n = un;
       MPN_NORMALIZE (u1, u1n);
-      ASSERT (u1n > 0);
+
+      /* It's possible that u0 = 1, u1 = 0 */
+      if (u1n == 0)
+	{
+	  ASSERT (un == 1);
+	  ASSERT (u0[0] == 1);
+
+	  /* u1 == 0 ==> u u1 + v u0 = v */
+	  MPN_COPY (up, lehmer_vp, lehmer_vn);
+	  *usizep = negate ? lehmer_vn : - lehmer_vn;
+
+	  TMP_FREE;
+	  return gn;
+	}
 
       ASSERT (lehmer_un + u1n <= ualloc);
       ASSERT (lehmer_vn + u0n <= ualloc);
 
-      /* We may still have v == 0 */
+      /* Now u0, u1, u are non-zero. We may still have v == 0 */
 
       /* Compute u u0 */
       if (lehmer_un <= u1n)
diff --git a/gmp/mpn/generic/gcdext_1.c b/gmp/mpn/generic/gcdext_1.c
index ea46cceb72..f1dd9ee963 100644
--- a/gmp/mpn/generic/gcdext_1.c
+++ b/gmp/mpn/generic/gcdext_1.c
@@ -1,273 +1,27 @@
 /* mpn_gcdext -- Extended Greatest Common Divisor.
 
-Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#ifndef GCDEXT_1_USE_BINARY
-#define GCDEXT_1_USE_BINARY 0
-#endif
-
-#ifndef GCDEXT_1_BINARY_METHOD
-#define GCDEXT_1_BINARY_METHOD 2
-#endif
-
-#ifndef USE_ZEROTAB
-#define USE_ZEROTAB 1
-#endif
-
-#if GCDEXT_1_USE_BINARY
-
-#if USE_ZEROTAB
-static unsigned char zerotab[0x40] = {
-  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-#endif
-
-mp_limb_t
-mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
-	      mp_limb_t u, mp_limb_t v)
-{
-  /* Maintain
-
-     U = t1 u + t0 v
-     V = s1 u + s0 v
-
-     where U, V are the inputs (without any shared power of two),
-     and the matrix has determinant � 2^{shift}.
-  */
-  mp_limb_t s0 = 1;
-  mp_limb_t t0 = 0;
-  mp_limb_t s1 = 0;
-  mp_limb_t t1 = 1;
-  mp_limb_t ug;
-  mp_limb_t vg;
-  mp_limb_t ugh;
-  mp_limb_t vgh;
-  unsigned zero_bits;
-  unsigned shift;
-  unsigned i;
-#if GCDEXT_1_BINARY_METHOD == 2
-  mp_limb_t det_sign;
-#endif
-
-  ASSERT (u > 0);
-  ASSERT (v > 0);
-
-  count_trailing_zeros (zero_bits, u | v);
-  u >>= zero_bits;
-  v >>= zero_bits;
-
-  if ((u & 1) == 0)
-    {
-      count_trailing_zeros (shift, u);
-      u >>= shift;
-      t1 <<= shift;
-    }
-  else if ((v & 1) == 0)
-    {
-      count_trailing_zeros (shift, v);
-      v >>= shift;
-      s0 <<= shift;
-    }
-  else
-    shift = 0;
-
-#if GCDEXT_1_BINARY_METHOD == 1
-  while (u != v)
-    {
-      unsigned count;
-      if (u > v)
-	{
-	  u -= v;
-#if USE_ZEROTAB
-	  count = zerotab [u & 0x3f];
-	  u >>= count;
-	  if (UNLIKELY (count == 6))
-	    {
-	      unsigned c;
-	      do
-		{
-		  c = zerotab[u & 0x3f];
-		  u >>= c;
-		  count += c;
-		}
-	      while (c == 6);
-	    }
-#else
-	  count_trailing_zeros (count, u);
-	  u >>= count;
-#endif
-	  t0 += t1; t1 <<= count;
-	  s0 += s1; s1 <<= count;
-	}
-      else
-	{
-	  v -= u;
-#if USE_ZEROTAB
-	  count = zerotab [v & 0x3f];
-	  v >>= count;
-	  if (UNLIKELY (count == 6))
-	    {
-	      unsigned c;
-	      do
-		{
-		  c = zerotab[v & 0x3f];
-		  v >>= c;
-		  count += c;
-		}
-	      while (c == 6);
-	    }
-#else
-	  count_trailing_zeros (count, v);
-	  v >>= count;
-#endif
-	  t1 += t0; t0 <<= count;
-	  s1 += s0; s0 <<= count;
-	}
-      shift += count;
-    }
-#else
-# if GCDEXT_1_BINARY_METHOD == 2
-  u >>= 1;
-  v >>= 1;
-
-  det_sign = 0;
-
-  while (u != v)
-    {
-      unsigned count;
-      mp_limb_t d =  u - v;
-      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
-      mp_limb_t sx;
-      mp_limb_t tx;
-
-      /* When v <= u (vgtu == 0), the updates are:
-
-	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
-	   (t1, t0) <-- (t1 << count, t0 + t1)
-
-	 and when v > 0, the updates are
-
-	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
-	   (t1, t0) <-- (t0 << count, t0 + t1)
-
-	 and similarly for s1, s0
-      */
-
-      /* v <-- min (u, v) */
-      v += (vgtu & d);
-
-      /* u <-- |u - v| */
-      u = (d ^ vgtu) - vgtu;
-
-      /* Number of trailing zeros is the same no matter if we look at
-       * d or u, but using d gives more parallelism. */
-#if USE_ZEROTAB
-      count = zerotab[d & 0x3f];
-      if (UNLIKELY (count == 6))
-	{
-	  unsigned c = 6;
-	  do
-	    {
-	      d >>= c;
-	      c = zerotab[d & 0x3f];
-	      count += c;
-	    }
-	  while (c == 6);
-	}
-#else
-      count_trailing_zeros (count, d);
-#endif
-      det_sign ^= vgtu;
-
-      tx = vgtu & (t0 - t1);
-      sx = vgtu & (s0 - s1);
-      t0 += t1;
-      s0 += s1;
-      t1 += tx;
-      s1 += sx;
-
-      count++;
-      u >>= count;
-      t1 <<= count;
-      s1 <<= count;
-      shift += count;
-    }
-  u = (u << 1) + 1;
-# else /* GCDEXT_1_BINARY_METHOD == 2 */
-#  error Unknown GCDEXT_1_BINARY_METHOD
-# endif
-#endif
-
-  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
-  ug = t0 + t1;
-  vg = s0 + s1;
-
-  ugh = ug/2 + (ug & 1);
-  vgh = vg/2 + (vg & 1);
-
-  /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
-     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
-  for (i = 0; i < shift; i++)
-    {
-      mp_limb_t mask = - ( (s0 | t0) & 1);
-
-      s0 /= 2;
-      t0 /= 2;
-      s0 += mask & vgh;
-      t0 += mask & ugh;
-    }
-  /* FIXME: Try simplifying this condition. */
-  if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
-    {
-      s0 -= vg;
-      t0 -= ug;
-    }
-#if GCDEXT_1_BINARY_METHOD == 2
-  /* Conditional negation. */
-  s0 = (s0 ^ det_sign) - det_sign;
-  t0 = (t0 ^ det_sign) - det_sign;
-#endif
-  *sp = s0;
-  *tp = -t0;
-
-  return u << zero_bits;
-}
-
-#else /* !GCDEXT_1_USE_BINARY */
-
 
 /* FIXME: Takes two single-word limbs. It could be extended to a
  * function that accepts a bignum for the first input, and only
@@ -325,4 +79,3 @@ mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
       v1 -= q * v0;
     }
 }
-#endif /* !GCDEXT_1_USE_BINARY */
diff --git a/gmp/mpn/generic/gcdext_lehmer.c b/gmp/mpn/generic/gcdext_lehmer.c
index 547f69a409..8599a4f554 100644
--- a/gmp/mpn/generic/gcdext_lehmer.c
+++ b/gmp/mpn/generic/gcdext_lehmer.c
@@ -1,146 +1,31 @@
 /* mpn_gcdext -- Extended Greatest Common Divisor.
 
-Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
-Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-/* Here, d is the index of the cofactor to update. FIXME: Could use qn
-   = 0 for the common case q = 1. */
-void
-mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
-		 mp_srcptr qp, mp_size_t qn, int d)
-{
-  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
-  mp_size_t un = ctx->un;
-
-  if (gp)
-    {
-      mp_srcptr up;
-
-      ASSERT (gn > 0);
-      ASSERT (gp[gn-1] > 0);
-
-      MPN_COPY (ctx->gp, gp, gn);
-      ctx->gn = gn;
-
-      if (d < 0)
-	{
-	  int c;
-
-	  /* Must return the smallest cofactor, +u1 or -u0 */
-	  MPN_CMP (c, ctx->u0, ctx->u1, un);
-	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
-
-	  d = c < 0;
-	}
-
-      up = d ? ctx->u0 : ctx->u1;
-
-      MPN_NORMALIZE (up, un);
-      MPN_COPY (ctx->up, up, un);
-
-      *ctx->usize = d ? -un : un;
-    }
-  else
-    {
-      mp_limb_t cy;
-      mp_ptr u0 = ctx->u0;
-      mp_ptr u1 = ctx->u1;
-
-      ASSERT (d >= 0);
-
-      if (d)
-	MP_PTR_SWAP (u0, u1);
-
-      qn -= (qp[qn-1] == 0);
-
-      /* Update u0 += q  * u1 */
-      if (qn == 1)
-	{
-	  mp_limb_t q = qp[0];
-
-	  if (q == 1)
-	    /* A common case. */
-	    cy = mpn_add_n (u0, u0, u1, un);
-	  else
-	    cy = mpn_addmul_1 (u0, u1, un, q);
-	}
-      else
-	{
-	  mp_size_t u1n;
-	  mp_ptr tp;
-
-	  u1n = un;
-	  MPN_NORMALIZE (u1, u1n);
-
-	  if (u1n == 0)
-	    return;
-
-	  /* Should always have u1n == un here, and u1 >= u0. The
-	     reason is that we alternate adding u0 to u1 and u1 to u0
-	     (corresponding to subtractions a - b and b - a), and we
-	     can get a large quotient only just after a switch, which
-	     means that we'll add (a multiple of) the larger u to the
-	     smaller. */
-
-	  tp = ctx->tp;
-
-	  if (qn > u1n)
-	    mpn_mul (tp, qp, qn, u1, u1n);
-	  else
-	    mpn_mul (tp, u1, u1n, qp, qn);
-
-	  u1n += qn;
-	  u1n -= tp[u1n-1] == 0;
-
-	  if (u1n >= un)
-	    {
-	      cy = mpn_add (u0, tp, u1n, u0, un);
-	      un = u1n;
-	    }
-	  else
-	    /* Note: Unlikely case, maybe never happens? */
-	    cy = mpn_add (u0, u0, un, tp, u1n);
-
-	}
-      u0[un] = cy;
-      ctx->un = un + (cy > 0);
-    }
-}
-
-/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
-   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
-   need at most n for the quotient and n+1 for the u update (reusing
-   the extra u). In all, 4n + 3. */
+/* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
+   multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
+   needed for the division, with most n for the quotient, and n+1 for
+   the product q u0. In all, 4n + 3. */
 
 mp_size_t
 mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
@@ -156,16 +41,8 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
    * which correspond to the first column of the inverse
    *
    *   M^{-1} = (u1, -v1; -u0, v0)
-   *
-   * This implies that
-   *
-   *   a =  u1 A (mod B)
-   *   b = -u0 A (mod B)
-   *
-   * where A, B denotes the input values.
    */
 
-  struct gcdext_ctx ctx;
   mp_size_t un;
   mp_ptr u0;
   mp_ptr u1;
@@ -178,10 +55,6 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
 
   u1[0] = 1; un = 1;
 
-  ctx.gp = gp;
-  ctx.up = up;
-  ctx.usize = usize;
-
   /* FIXME: Handle n == 2 differently, after the loop? */
   while (n >= 2)
     {
@@ -223,7 +96,7 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
       /* Try an mpn_nhgcd2 step */
       if (mpn_hgcd2 (ah, al, bh, bl, &M))
 	{
-	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
 	  MP_PTR_SWAP (ap, tp);
 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
 	  MP_PTR_SWAP (u0, u2);
@@ -233,18 +106,17 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
 	     small, or the difference is very small. Perform one
 	     subtraction followed by one division. */
-	  ctx.u0 = u0;
-	  ctx.u1 = u1;
-	  ctx.tp = u2;
-	  ctx.un = un;
+	  mp_size_t gn;
+	  mp_size_t updated_un = un;
 
 	  /* Temporary storage n for the quotient and ualloc for the
 	     new cofactor. */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
+				      u0, u1, &updated_un, tp, u2);
 	  if (n == 0)
-	    return ctx.gn;
+	    return gn;
 
-	  un = ctx.un;
+	  un = updated_un;
 	}
     }
   ASSERT_ALWAYS (ap[0] > 0);
diff --git a/gmp/mpn/generic/gcdext_subdiv_step.c b/gmp/mpn/generic/gcdext_subdiv_step.c
new file mode 100644
index 0000000000..d54b3bdee1
--- /dev/null
+++ b/gmp/mpn/generic/gcdext_subdiv_step.c
@@ -0,0 +1,197 @@
+/* gcdext_subdiv_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+   b is small, or the difference is small. Perform one subtraction
+   followed by one division. If the gcd is found, stores it in gp and
+   *gn, and returns zero. Otherwise, compute the reduced a and b,
+   return the new size, and cofactors. */
+
+/* Temporary storage: Needs n limbs for the quotient, at qp. tp must
+   point to an area large enough for the resulting cofactor, plus one
+   limb extra. All in all, 2N + 1 if N is a bound for both inputs and
+   outputs. */
+mp_size_t
+mpn_gcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr up, mp_size_t *usizep,
+			mp_ptr ap, mp_ptr bp, mp_size_t n,
+			mp_ptr u0, mp_ptr u1, mp_size_t *unp,
+			mp_ptr qp, mp_ptr tp)
+{
+  mp_size_t an, bn, un;
+  mp_size_t qn;
+  mp_size_t u0n;
+
+  int swapped;
+
+  an = bn = n;
+
+  ASSERT (an > 0);
+  ASSERT (ap[an-1] > 0 || bp[an-1] > 0);
+
+  MPN_NORMALIZE (ap, an);
+  MPN_NORMALIZE (bp, bn);
+
+  un = *unp;
+
+  swapped = 0;
+
+  if (UNLIKELY (an == 0))
+    {
+    return_b:
+      MPN_COPY (gp, bp, bn);
+      *gn = bn;
+
+      MPN_NORMALIZE (u0, un);
+      MPN_COPY (up, u0, un);
+
+      *usizep = swapped ? un : -un;
+
+      return 0;
+    }
+  else if (UNLIKELY (bn == 0))
+    {
+      MPN_COPY (gp, ap, an);
+      *gn = an;
+
+      MPN_NORMALIZE (u1, un);
+      MPN_COPY (up, u1, un);
+
+      *usizep = swapped ? -un : un;
+
+      return 0;
+    }
+
+  /* Arrange so that a > b, subtract an -= bn, and maintain
+     normalization. */
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      MP_PTR_SWAP (u0, u1);
+      swapped ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	{
+	  MPN_COPY (gp, ap, an);
+	  *gn = an;
+
+	  /* Must return the smallest cofactor, +u1 or -u0 */
+	  MPN_CMP (c, u0, u1, un);
+	  ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+
+	  if (c < 0)
+	    {
+	      MPN_NORMALIZE (u0, un);
+	      MPN_COPY (up, u0, un);
+	      swapped ^= 1;
+	    }
+	  else
+	    {
+	      MPN_NORMALIZE_NOT_ZERO (u1, un);
+	      MPN_COPY (up, u1, un);
+	    }
+
+	  *usizep = swapped ? -un : un;
+	  return 0;
+	}
+      else if (c < 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  MP_PTR_SWAP (u0, u1);
+	  swapped ^= 1;
+	}
+    }
+  /* Reduce a -= b, u1 += u0 */
+  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
+  MPN_NORMALIZE (ap, an);
+  ASSERT (an > 0);
+
+  u1[un] = mpn_add_n (u1, u1, u0, un);
+  un += (u1[un] > 0);
+
+  /* Arrange so that a > b, and divide a = q b + r */
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      MP_PTR_SWAP (u0, u1);
+      swapped ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	goto return_b;
+      else if (c < 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  MP_PTR_SWAP (u0, u1);
+	  swapped ^= 1;
+	}
+    }
+
+  /* Reduce a -= q b, u1 += q u0 */
+  qn = an - bn + 1;
+  mpn_tdiv_qr (qp, ap, 0, ap, an, bp, bn);
+
+  if (mpn_zero_p (ap, bn))
+    goto return_b;
+
+  n = bn;
+
+  /* Update u1 += q u0 */
+  u0n = un;
+  MPN_NORMALIZE (u0, u0n);
+
+  if (u0n > 0)
+    {
+      qn -= (qp[qn - 1] == 0);
+
+      if (qn > u0n)
+	mpn_mul (tp, qp, qn, u0, u0n);
+      else
+	mpn_mul (tp, u0, u0n, qp, qn);
+
+      if (qn + u0n > un)
+	{
+	  ASSERT_NOCARRY (mpn_add (u1, tp, qn + u0n, u1, un));
+	  un = qn + u0n;
+	  un -= (u1[un-1] == 0);
+	}
+      else
+	{
+	  u1[un] = mpn_add (u1, u1, un, tp, qn + u0n);
+	  un += (u1[un] > 0);
+	}
+    }
+
+  *unp = un;
+  return n;
+}
diff --git a/gmp/mpn/generic/get_d.c b/gmp/mpn/generic/get_d.c
index d73d314856..cf4ae86efc 100644
--- a/gmp/mpn/generic/get_d.c
+++ b/gmp/mpn/generic/get_d.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2003, 2004, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,20 +29,33 @@ see https://www.gnu.org/licenses/.  */
 #define _GMP_IEEE_FLOATS 0
 #endif
 
+#if ! _GMP_IEEE_FLOATS
+/* dummy definition, just to let dead code compile */
+union ieee_double_extract {
+  struct {
+    int manh, manl, sig, exp;
+  } s;
+  double d;
+};
+#endif
+
 /* To force use of the generic C code for testing, put
    "#define _GMP_IEEE_FLOATS 0" at this point.  */
 
 
+
 /* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
    rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
    wrong if that addition overflows.
 
-   The workaround here avoids this bug by ensuring n is not a literal constant.
-   Note that this is alpha specific.  The offending transformation is/was in
-   alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
+   The workaround here avoids this bug by ensuring n is not a literal
+   constant.  Note that this is alpha specific.  The offending transformation
+   is/was in alpha.c alpha_emit_conditional_branch() under "We want to use
+   cmpcc/bcc".
 
-   Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
-   and has the same solution.  Don't know why or how.  */
+   Bizarrely, it turns out this happens also with Cray cc on
+   alphaev5-cray-unicosmk2.0.6.X, and has the same solution.  Don't know why
+   or how.  */
 
 #if HAVE_HOST_CPU_FAMILY_alpha				\
   && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4))	\
@@ -68,73 +70,69 @@ static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
 #endif
 
 
-/* Return the value {ptr,size}*2^exp, and negative if sign<0.  Must have
-   size>=1, and a non-zero high limb ptr[size-1].
 
-   When we know the fp format, the result is truncated towards zero.  This is
-   consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
-   easy to implement and test.
+/* Return the value {ptr,size}*2^exp, and negative if sign<0.
+   Must have size>=1, and a non-zero high limb ptr[size-1].
 
-   When we do not know the format, such truncation seems much harder.  One
-   would need to defeat any rounding mode, including round-up.
+   {ptr,size} is truncated towards zero.  This is consistent with other gmp
+   conversions, like mpz_set_f or mpz_set_q, and is easy to implement and
+   test.
+
+   In the past conversions had attempted (imperfectly) to let the hardware
+   float rounding mode take effect, but that gets tricky since multiple
+   roundings need to be avoided, or taken into account, and denorms mean the
+   effective precision of the mantissa is not constant.  (For reference,
+   mpz_get_d on IEEE systems was ok, except it operated on the absolute
+   value.  mpf_get_d and mpq_get_d suffered from multiple roundings and from
+   not always using enough bits to get the rounding right.)
 
    It's felt that GMP is not primarily concerned with hardware floats, and
    really isn't enhanced by getting involved with hardware rounding modes
-   (which could even be some weird unknown style), so something unambiguous and
-   straightforward is best.
+   (which could even be some weird unknown style), so something unambiguous
+   and straightforward is best.
 
 
    The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
    limb and is done with shifts and masks.  The 64-bit case in particular
    should come out nice and compact.
 
-   The generic code used to work one bit at a time, which was not only slow,
-   but implicitly relied upon denorms for intermediates, since the lowest bits'
-   weight of a perfectly valid fp number underflows in non-denorm.  Therefore,
-   the generic code now works limb-per-limb, initially creating a number x such
-   that 1 <= x <= BASE.  (BASE is reached only as result of rounding.)  Then
-   x's exponent is scaled with explicit code (not ldexp to avoid libm
-   dependency).  It is a tap-dance to avoid underflow or overflow, beware!
+   The generic code works one bit at a time, which will be quite slow, but
+   should support any binary-based "double" and be safe against any rounding
+   mode.  Note in particular it works on IEEE systems too.
 
 
    Traps:
 
-   Hardware traps for overflow to infinity, underflow to zero, or unsupported
-   denorms may or may not be taken.  The IEEE code works bitwise and so
-   probably won't trigger them, the generic code works by float operations and
-   so probably will.  This difference might be thought less than ideal, but
-   again its felt straightforward code is better than trying to get intimate
-   with hardware exceptions (of perhaps unknown nature).
+   Hardware traps for overflow to infinity, underflow to zero, or
+   unsupported denorms may or may not be taken.  The IEEE code works bitwise
+   and so probably won't trigger them, the generic code works by float
+   operations and so probably will.  This difference might be thought less
+   than ideal, but again its felt straightforward code is better than trying
+   to get intimate with hardware exceptions (of perhaps unknown nature).
 
 
    Not done:
 
-   mpz_get_d in the past handled size==1 with a cast limb->double.  This might
-   still be worthwhile there (for up to the mantissa many bits), but for
-   mpn_get_d here, the cost of applying "exp" to the resulting exponent would
-   probably use up any benefit a cast may have over bit twiddling.  Also, if
-   the exponent is pushed into denorm range then bit twiddling is the only
-   option, to ensure the desired truncation is obtained.
+   mpz_get_d in the past handled size==1 with a cast limb->double.  This
+   might still be worthwhile there (for up to the mantissa many bits), but
+   for mpn_get_d here, the cost of applying "exp" to the resulting exponent
+   would probably use up any benefit a cast may have over bit twiddling.
+   Also, if the exponent is pushed into denorm range then bit twiddling is
+   the only option, to ensure the desired truncation is obtained.
 
 
    Other:
 
    For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL
-   to the kernel for values >= 2^63.  This makes it slow, and worse the kernel
-   Linux (what versions?) apparently uses untested code in its trap handling
-   routines, and gets the sign wrong.  We don't use such a limb-to-double
-   cast, neither in the IEEE or generic code.  */
-
+   to the kernel for values >= 2^63.  This makes it slow, and worse the
+   Linux kernel (what versions?) apparently uses untested code in its trap
+   handling routines, and gets the sign wrong.  We don't use such a limb to
+   double cast, neither in the IEEE or generic code.  */
 
 
-#undef FORMAT_RECOGNIZED
-
 double
 mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
 {
-  int lshift, nbits;
-  mp_limb_t x, mhi, mlo;
-
   ASSERT (size >= 0);
   ASSERT_MPN (up, size);
   ASSERT (size == 0 || up[size-1] != 0);
@@ -146,11 +144,10 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
      overflow.  After this exp can of course be reduced to anywhere within
      the {up,size} region without underflow.  */
   if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
-		> ((unsigned long) LONG_MAX - exp)))
+		> (unsigned long) (LONG_MAX - exp)))
     {
-#if _GMP_IEEE_FLOATS
-      goto ieee_infinity;
-#endif
+      if (_GMP_IEEE_FLOATS)
+	goto ieee_infinity;
 
       /* generic */
       exp = LONG_MAX;
@@ -160,253 +157,334 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
       exp += GMP_NUMB_BITS * size;
     }
 
-#if _GMP_IEEE_FLOATS
-    {
-      union ieee_double_extract u;
-
-      up += size;
 
+#if 1
+{
+  int lshift, nbits;
+  union ieee_double_extract u;
+  mp_limb_t x, mhi, mlo;
 #if GMP_LIMB_BITS == 64
-      mlo = up[-1];
-      count_leading_zeros (lshift, mlo);
+  mp_limb_t m;
+  up += size;
+  m = *--up;
+  count_leading_zeros (lshift, m);
 
-      exp -= (lshift - GMP_NAIL_BITS) + 1;
-      mlo <<= lshift;
+  exp -= (lshift - GMP_NAIL_BITS) + 1;
+  m <<= lshift;
 
-      nbits = GMP_LIMB_BITS - lshift;
+  nbits = GMP_LIMB_BITS - lshift;
 
-      if (nbits < 53 && size > 1)
+  if (nbits < 53 && size > 1)
+    {
+      x = *--up;
+      x <<= GMP_NAIL_BITS;
+      x >>= nbits;
+      m |= x;
+      nbits += GMP_NUMB_BITS;
+
+      if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
 	{
-	  x = up[-2];
+	  x = *--up;
 	  x <<= GMP_NAIL_BITS;
 	  x >>= nbits;
-	  mlo |= x;
+	  m |= x;
 	  nbits += GMP_NUMB_BITS;
-
-	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
-	    {
-	      x = up[-3];
-	      x <<= GMP_NAIL_BITS;
-	      x >>= nbits;
-	      mlo |= x;
-	      nbits += GMP_NUMB_BITS;
-	    }
 	}
-      mhi = mlo >> (32 + 11);
-      mlo = mlo >> 11;		/* later implicitly truncated to 32 bits */
+    }
+  mhi = m >> (32 + 11);
+  mlo = m >> 11;
 #endif
 #if GMP_LIMB_BITS == 32
-      x = *--up;
-      count_leading_zeros (lshift, x);
+  up += size;
+  x = *--up, size--;
+  count_leading_zeros (lshift, x);
 
-      exp -= (lshift - GMP_NAIL_BITS) + 1;
-      x <<= lshift;
-      mhi = x >> 11;
+  exp -= (lshift - GMP_NAIL_BITS) + 1;
+  x <<= lshift;
+  mhi = x >> 11;
 
-      if (lshift < 11)		/* FIXME: never true if NUMB < 20 bits */
+  if (lshift < 11)		/* FIXME: never true if NUMB < 20 bits */
+    {
+      /* All 20 bits in mhi */
+      mlo = x << 21;
+      /* >= 1 bit in mlo */
+      nbits = GMP_LIMB_BITS - lshift - 21;
+    }
+  else
+    {
+      if (size != 0)
 	{
-	  /* All 20 bits in mhi */
-	  mlo = x << 21;
-	  /* >= 1 bit in mlo */
-	  nbits = GMP_LIMB_BITS - lshift - 21;
+	  nbits = GMP_LIMB_BITS - lshift;
+
+	  x = *--up, size--;
+	  x <<= GMP_NAIL_BITS;
+	  mhi |= x >> nbits >> 11;
+
+	  mlo = x << GMP_LIMB_BITS - nbits - 11;
+	  nbits = nbits + 11 - GMP_NAIL_BITS;
 	}
       else
 	{
-	  if (size > 1)
-	    {
-	      nbits = GMP_LIMB_BITS - lshift;
-
-	      x = *--up, size--;
-	      x <<= GMP_NAIL_BITS;
-	      mhi |= x >> nbits >> 11;
-
-	      mlo = x << GMP_LIMB_BITS - nbits - 11;
-	      nbits = nbits + 11 - GMP_NAIL_BITS;
-	    }
-	  else
-	    {
-	      mlo = 0;
-	      goto done;
-	    }
+	  mlo = 0;
+	  goto done;
 	}
+    }
 
-      /* Now all needed bits in mhi have been accumulated.  Add bits to mlo.  */
+  if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size != 0)
+    {
+      x = *--up, size--;
+      x <<= GMP_NAIL_BITS;
+      x >>= nbits;
+      mlo |= x;
+      nbits += GMP_NUMB_BITS;
 
-      if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
+      if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size != 0)
 	{
-	  x = up[-1];
+	  x = *--up, size--;
 	  x <<= GMP_NAIL_BITS;
 	  x >>= nbits;
 	  mlo |= x;
 	  nbits += GMP_NUMB_BITS;
 
-	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
+	  if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size != 0)
 	    {
-	      x = up[-2];
+	      x = *--up;
 	      x <<= GMP_NAIL_BITS;
 	      x >>= nbits;
 	      mlo |= x;
 	      nbits += GMP_NUMB_BITS;
-
-	      if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
-		{
-		  x = up[-3];
-		  x <<= GMP_NAIL_BITS;
-		  x >>= nbits;
-		  mlo |= x;
-		  nbits += GMP_NUMB_BITS;
-		}
 	    }
 	}
+    }
 
-    done:;
+ done:;
 
 #endif
+  {
+    if (UNLIKELY (exp >= CONST_1024))
+      {
+	/* overflow, return infinity */
+      ieee_infinity:
+	mhi = 0;
+	mlo = 0;
+	exp = 1024;
+      }
+    else if (UNLIKELY (exp <= CONST_NEG_1023))
+      {
+	int rshift;
+
+	if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+	  return 0.0;	 /* denorm underflows to zero */
+
+	rshift = -1022 - exp;
+	ASSERT (rshift > 0 && rshift < 53);
+#if GMP_LIMB_BITS > 53
+	mlo >>= rshift;
+	mhi = mlo >> 32;
+#else
+	if (rshift >= 32)
+	  {
+	    mlo = mhi;
+	    mhi = 0;
+	    rshift -= 32;
+	  }
+	lshift = GMP_LIMB_BITS - rshift;
+	mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+	mhi >>= rshift;
+#endif
+	exp = -1023;
+      }
+  }
+  u.s.manh = mhi;
+  u.s.manl = mlo;
+  u.s.exp = exp + 1023;
+  u.s.sig = (sign < 0);
+  return u.d;
+}
+#else
+
+
+#define ONE_LIMB    (GMP_LIMB_BITS == 64 && 2*GMP_NUMB_BITS >= 53)
+#define TWO_LIMBS   (GMP_LIMB_BITS == 32 && 3*GMP_NUMB_BITS >= 53)
+
+  if (_GMP_IEEE_FLOATS && (ONE_LIMB || TWO_LIMBS))
+    {
+      union ieee_double_extract	 u;
+      mp_limb_t	 m0, m1, m2, rmask;
+      int	 lshift, rshift;
+
+      m0 = up[size-1];			    /* high limb */
+      m1 = (size >= 2 ? up[size-2] : 0);   /* second highest limb */
+      count_leading_zeros (lshift, m0);
+
+      /* relative to just under high non-zero bit */
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+
+      if (ONE_LIMB)
+	{
+	  /* lshift to have high of m0 non-zero, and collapse nails */
+	  rshift = GMP_LIMB_BITS - lshift;
+	  m1 <<= GMP_NAIL_BITS;
+	  rmask = GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX;
+	  m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
+
+	  /* rshift back to have bit 53 of m0 the high non-zero */
+	  m0 >>= 11;
+	}
+      else /* TWO_LIMBS */
+	{
+	  m2 = (size >= 3 ? up[size-3] : 0);  /* third highest limb */
+
+	  /* collapse nails from m1 and m2 */
+#if GMP_NAIL_BITS != 0
+	  m1 = (m1 << GMP_NAIL_BITS) | (m2 >> (GMP_NUMB_BITS-GMP_NAIL_BITS));
+	  m2 <<= 2*GMP_NAIL_BITS;
+#endif
+
+	  /* lshift to have high of m0:m1 non-zero, collapse nails from m0 */
+	  rshift = GMP_LIMB_BITS - lshift;
+	  rmask = (GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX);
+	  m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
+	  m1 = (m1 << lshift) | ((m2 >> rshift) & rmask);
+
+	  /* rshift back to have bit 53 of m0:m1 the high non-zero */
+	  m1 = (m1 >> 11) | (m0 << (GMP_LIMB_BITS-11));
+	  m0 >>= 11;
+	}
+
       if (UNLIKELY (exp >= CONST_1024))
 	{
 	  /* overflow, return infinity */
 	ieee_infinity:
-	  mhi = 0;
-	  mlo = 0;
+	  m0 = 0;
+	  m1 = 0;
 	  exp = 1024;
 	}
       else if (UNLIKELY (exp <= CONST_NEG_1023))
 	{
-	  int rshift;
-
 	  if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
 	    return 0.0;	 /* denorm underflows to zero */
 
 	  rshift = -1022 - exp;
 	  ASSERT (rshift > 0 && rshift < 53);
-#if GMP_LIMB_BITS > 53
-	  mlo >>= rshift;
-	  mhi = mlo >> 32;
-#else
-	  if (rshift >= 32)
+	  if (ONE_LIMB)
 	    {
-	      mlo = mhi;
-	      mhi = 0;
-	      rshift -= 32;
+	      m0 >>= rshift;
 	    }
-	  lshift = GMP_LIMB_BITS - rshift;
-	  mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
-	  mhi >>= rshift;
-#endif
-	  exp = -1023;
-	}
-      u.s.manh = mhi;
-      u.s.manl = mlo;
-      u.s.exp = exp + 1023;
-      u.s.sig = (sign < 0);
-      return u.d;
-    }
-#define FORMAT_RECOGNIZED 1
-#endif
-
-#if HAVE_DOUBLE_VAX_D
-    {
-      union double_extract u;
-
-      up += size;
-
-      mhi = up[-1];
-
-      count_leading_zeros (lshift, mhi);
-      exp -= lshift;
-      mhi <<= lshift;
-
-      mlo = 0;
-      if (size > 1)
-	{
-	  mlo = up[-2];
-	  if (lshift != 0)
-	    mhi += mlo >> (GMP_LIMB_BITS - lshift);
-	  mlo <<= lshift;
-
-	  if (size > 2 && lshift > 8)
+	  else /* TWO_LIMBS */
 	    {
-	      x = up[-3];
-	      mlo += x >> (GMP_LIMB_BITS - lshift);
+	      if (rshift >= 32)
+		{
+		  m1 = m0;
+		  m0 = 0;
+		  rshift -= 32;
+		}
+	      lshift = GMP_LIMB_BITS - rshift;
+	      m1 = (m1 >> rshift) | (rshift == 0 ? 0 : m0 << lshift);
+	      m0 >>= rshift;
 	    }
+	  exp = -1023;
 	}
 
-      if (UNLIKELY (exp >= 128))
+      if (ONE_LIMB)
 	{
-	  /* overflow, return maximum number */
-	  mhi = 0xffffffff;
-	  mlo = 0xffffffff;
-	  exp = 127;
+#if GMP_LIMB_BITS > 32	/* avoid compiler warning about big shift */
+	  u.s.manh = m0 >> 32;
+#endif
+	  u.s.manl = m0;
 	}
-      else if (UNLIKELY (exp < -128))
+      else /* TWO_LIMBS */
 	{
-	  return 0.0;	 /* underflows to zero */
+	  u.s.manh = m0;
+	  u.s.manl = m1;
 	}
 
-      u.s.man3 = mhi >> 24;	/* drop msb, since implicit */
-      u.s.man2 = mhi >> 8;
-      u.s.man1 = (mhi << 8) + (mlo >> 24);
-      u.s.man0 = mlo >> 8;
-      u.s.exp = exp + 128;
-      u.s.sig = sign < 0;
+      u.s.exp = exp + 1023;
+      u.s.sig = (sign < 0);
       return u.d;
     }
-#define FORMAT_RECOGNIZED 1
-#endif
-
-#if ! FORMAT_RECOGNIZED
-    {      /* Non-IEEE or strange limb size, do something generic. */
-      mp_size_t i;
-      double d, weight;
-      unsigned long uexp;
-
-      /* First generate an fp number disregarding exp, instead keeping things
-	 within the numb base factor from 1, which should prevent overflow and
-	 underflow even for the most exponent limited fp formats.  The
-	 termination criteria should be refined, since we now include too many
-	 limbs.  */
-      weight = 1/MP_BASE_AS_DOUBLE;
-      d = up[size - 1];
-      for (i = size - 2; i >= 0; i--)
+  else
+    {
+      /* Non-IEEE or strange limb size, do something generic. */
+
+      mp_size_t	     i;
+      mp_limb_t	     limb, bit;
+      int	     shift;
+      double	     base, factor, prev_factor, d, new_d, diff;
+
+      /* "limb" is "up[i]" the limb being examined, "bit" is a mask for the
+	 bit being examined, initially the highest non-zero bit.  */
+      i = size-1;
+      limb = up[i];
+      count_leading_zeros (shift, limb);
+      bit = GMP_LIMB_HIGHBIT >> shift;
+
+      /* relative to just under high non-zero bit */
+      exp -= (shift - GMP_NAIL_BITS) + 1;
+
+      /* Power up "factor" to 2^exp, being the value of the "bit" in "limb"
+	 being examined.  */
+      base = (exp >= 0 ? 2.0 : 0.5);
+      exp = ABS (exp);
+      factor = 1.0;
+      for (;;)
 	{
-	  d += up[i] * weight;
-	  weight /= MP_BASE_AS_DOUBLE;
-	  if (weight == 0)
+	  if (exp & 1)
+	    {
+	      prev_factor = factor;
+	      factor *= base;
+	      FORCE_DOUBLE (factor);
+	      if (factor == 0.0)
+		return 0.0;	/* underflow */
+	      if (factor == prev_factor)
+		{
+		  d = factor;	  /* overflow, apparent infinity */
+		  goto generic_done;
+		}
+	    }
+	  exp >>= 1;
+	  if (exp == 0)
 	    break;
+	  base *= base;
 	}
 
-      /* Now apply exp.  */
-      exp -= GMP_NUMB_BITS;
-      if (exp > 0)
-	{
-	  weight = 2.0;
-	  uexp = exp;
-	}
-      else
-	{
-	  weight = 0.5;
-	  uexp = 1 - (unsigned long) (exp + 1);
-	}
-#if 1
-      /* Square-and-multiply exponentiation.  */
-      if (uexp & 1)
-	d *= weight;
-      while (uexp >>= 1)
-	{
-	  weight *= weight;
-	  if (uexp & 1)
-	    d *= weight;
-	}
-#else
-      /* Plain exponentiation.  */
-      while (uexp > 0)
+      /* Add a "factor" for each non-zero bit, working from high to low.
+	 Stop if any rounding occurs, hence implementing a truncation.
+
+	 Note no attention is paid to DBL_MANT_DIG, since the effective
+	 number of bits in the mantissa isn't constant when in denorm range.
+	 We also encountered an ARM system with apparently somewhat doubtful
+	 software floats where DBL_MANT_DIG claimed 53 bits but only 32
+	 actually worked.  */
+
+      d = factor;  /* high bit */
+      for (;;)
 	{
-	  d *= weight;
-	  uexp--;
+	  factor *= 0.5;  /* next bit */
+	  bit >>= 1;
+	  if (bit == 0)
+	    {
+	      /* next limb, if any */
+	      i--;
+	      if (i < 0)
+		break;
+	      limb = up[i];
+	      bit = GMP_NUMB_HIGHBIT;
+	    }
+
+	  if (bit & limb)
+	    {
+	      new_d = d + factor;
+	      FORCE_DOUBLE (new_d);
+	      diff = new_d - d;
+	      if (diff != factor)
+		break;	 /* rounding occured, stop now */
+	      d = new_d;
+	    }
 	}
-#endif
 
-      return sign >= 0 ? d : -d;
+    generic_done:
+      return (sign >= 0 ? d : -d);
     }
 #endif
 }
diff --git a/gmp/mpn/generic/get_str.c b/gmp/mpn/generic/get_str.c
index 42e93c9cee..df007578cc 100644
--- a/gmp/mpn/generic/get_str.c
+++ b/gmp/mpn/generic/get_str.c
@@ -7,34 +7,23 @@
    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
    GNU MP RELEASE.
 
-Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2011, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
+2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -361,8 +350,7 @@ mpn_dc_get_str (unsigned char *str, size_t len,
 
 
 /* There are no leading zeros on the digits generated at str, but that's not
-   currently a documented feature.  The current mpz_out_str and mpz_get_str
-   rely on it.  */
+   currently a documented feature.  */
 
 size_t
 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
@@ -394,7 +382,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
       int bit_pos;
       mp_size_t i;
       unsigned char *s = str;
-      mp_bitcnt_t bits;
+      unsigned long bits;
 
       n1 = up[un - 1];
       count_leading_zeros (cnt, n1);
@@ -403,11 +391,11 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 	 R + bits_per_digit * n when input ends in nth least significant
 	 nibble. */
 
-      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
+      bits = GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
       cnt = bits % bits_per_digit;
       if (cnt != 0)
 	bits += bits_per_digit - cnt;
-      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
+      bit_pos = bits - (un - 1) * GMP_NUMB_BITS;
 
       /* Fast loop for bit output.  */
       i = un - 1;
@@ -451,12 +439,9 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
     mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
     mp_limb_t cy;
     mp_size_t shift;
-    size_t ndig;
-
-    DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
-    xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
 
     n_pows = 0;
+    xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
     for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
       {
 	exptab[n_pows] = pn;
@@ -488,7 +473,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 
 	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
 
-	mpn_sqr (t, p, n);
+	mpn_sqr_n (t, p, n);
 
 	digits_in_base *= 2;
 	n *= 2;  n -= t[n - 1] == 0;
@@ -546,7 +531,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 
   /* Using our precomputed powers, now in powtab[], convert our number.  */
   tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
-  out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
+  out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
   TMP_FREE;
 
   return out_len;
diff --git a/gmp/mpn/generic/gmp-mparam.h b/gmp/mpn/generic/gmp-mparam.h
index 7dc057aa0c..b22b96ef67 100644
--- a/gmp/mpn/generic/gmp-mparam.h
+++ b/gmp/mpn/generic/gmp-mparam.h
@@ -5,29 +5,18 @@ Copyright 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-/* Values for GMP_LIMB_BITS etc will be determined by ./configure and put
+/* Values for BITS_PER_MP_LIMB etc will be determined by ./configure and put
    in config.h. */
diff --git a/gmp/mpn/generic/hgcd.c b/gmp/mpn/generic/hgcd.c
index e27a9bdd82..5fc650bbd9 100644
--- a/gmp/mpn/generic/hgcd.c
+++ b/gmp/mpn/generic/hgcd.c
@@ -4,38 +4,497 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+   - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+  mp_size_t s = (n+1)/2 + 1;
+  M->alloc = s;
+  M->n = 1;
+  MPN_ZERO (p, 4 * s);
+  M->p[0][0] = p;
+  M->p[0][1] = p + s;
+  M->p[1][0] = p + 2 * s;
+  M->p[1][1] = p + 3 * s;
+
+  M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Updated column COL, adding in column (1-COL). */
+static void
+hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
+{
+  mp_limb_t c0, c1;
+  ASSERT (col < 2);
+
+  c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
+  c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
+
+  M->p[0][col][M->n] = c0;
+  M->p[1][col][M->n] = c1;
+
+  M->n += (c0 | c1) != 0;
+  ASSERT (M->n < M->alloc);
+}
+
+/* Updated column COL, adding in column Q * (1-COL). Temporary
+ * storage: qn + n <= M->alloc, where n is the size of the largest
+ * element in column 1 - COL. */
+static void
+hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+		      unsigned col, mp_ptr tp)
+{
+  ASSERT (col < 2);
+
+  if (qn == 1)
+    {
+      mp_limb_t q = qp[0];
+      mp_limb_t c0, c1;
+
+      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+      M->p[0][col][M->n] = c0;
+      M->p[1][col][M->n] = c1;
+
+      M->n += (c0 | c1) != 0;
+    }
+  else
+    {
+      unsigned row;
+
+      /* Carries for the unlikely case that we get both high words
+	 from the multiplication and carries from the addition. */
+      mp_limb_t c[2];
+      mp_size_t n;
+
+      /* The matrix will not necessarily grow in size by qn, so we
+	 need normalization in order not to overflow M. */
+
+      for (n = M->n; n + qn > M->n; n--)
+	{
+	  ASSERT (n > 0);
+	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+	    break;
+	}
+
+      ASSERT (qn + n <= M->alloc);
+
+      for (row = 0; row < 2; row++)
+	{
+	  if (qn <= n)
+	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+	  else
+	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+	  ASSERT (n + qn >= M->n);
+	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+	}
+      if (c[0] | c[1])
+	{
+	  M->n = n + qn + 1;
+	  M->p[0][col][n-1] = c[0];
+	  M->p[1][col][n-1] = c[1];
+	}
+      else
+	{
+	  n += qn;
+	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+	  if (n > M->n)
+	    M->n = n;
+	}
+    }
+
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+   temporary space M->n */
+static void
+hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+		   mp_ptr tp)
+{
+  mp_size_t n0, n1;
+
+  /* Could avoid copy by some swapping of pointers. */
+  MPN_COPY (tp, M->p[0][0], M->n);
+  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+  MPN_COPY (tp, M->p[1][0], M->n);
+  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+  /* Depends on zero initialization */
+  M->n = MAX(n0, n1);
+  ASSERT (M->n < M->alloc);
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   resulting size of $.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+   < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+	   struct hgcd_matrix *M, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+  mp_size_t an, bn, qn;
+  int col;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+	goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+    {
+      /* Multiply M <- M * M1 */
+      hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+  /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
+     bh was too small, or ah, bh were (almost) equal. Perform one
+     subtraction step (for possible cancellation of high limbs),
+     followed by one division. */
+
+  /* Since we must ensure that #(a-b) > s, we handle cancellation of
+     high limbs explicitly up front. (FIXME: Or is it better to just
+     subtract, normalize, and use an addition to undo if it turns out
+     the the difference is too small?) */
+  for (an = n; an > s; an--)
+    if (ap[an-1] != bp[an-1])
+      break;
+
+  if (an == s)
+    return 0;
+
+  /* Maintain a > b. When needed, swap a and b, and let col keep track
+     of how to update M. */
+  if (ap[an-1] > bp[an-1])
+    {
+      /* a is largest. In the subtraction step, we need to update
+	 column 1 of M */
+      col = 1;
+    }
+  else
+    {
+      MP_PTR_SWAP (ap, bp);
+      col = 0;
+    }
+
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+  if (bn <= s)
+    return 0;
+
+  /* We have #a, #b > s. When is it possible that #(a-b) < s? For
+     cancellation to happen, the numbers must be of the form
+
+       a = x + 1, 0,            ..., 0,            al
+       b = x    , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
+
+     where al, bl denotes the least significant k limbs. If al < bl,
+     then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
+     then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
+
+  if (ap[an-1] == bp[an-1] + 1)
+    {
+      mp_size_t k;
+      int c;
+      for (k = an-1; k > s; k--)
+	if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
+	  break;
+
+      MPN_CMP (c, ap, bp, k);
+      if (c < 0)
+	{
+	  mp_limb_t cy;
+
+	  /* The limbs from k and up are cancelled. */
+	  if (k == s)
+	    return 0;
+	  cy = mpn_sub_n (ap, ap, bp, k);
+	  ASSERT (cy == 1);
+	  an = k;
+	}
+      else
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
+	  ap[k] = 1;
+	  an = k + 1;
+	}
+    }
+  else
+    ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
+
+  ASSERT (an > s);
+  ASSERT (ap[an-1] > 0);
+  ASSERT (bn > s);
+  ASSERT (bp[bn-1] > 0);
+
+  hgcd_matrix_update_1 (M, col);
+
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      col ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (c < 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  col ^= 1;
+	}
+    }
+
+  /* Divide a / b. */
+  qn = an + 1 - bn;
+
+  /* FIXME: We could use an approximate division, that may return a
+     too small quotient, and only guarantee that the size of r is
+     almost the size of b. FIXME: Let ap and remainder overlap. */
+  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
+  qn -= (tp[qn -1] == 0);
+
+  /* Normalize remainder */
+  an = bn;
+  for ( ; an > s; an--)
+    if (ap[an-1] > 0)
+      break;
+
+  if (an <= s)
+    {
+      /* Quotient is too large */
+      mp_limb_t cy;
+
+      cy = mpn_add (ap, bp, bn, ap, an);
+
+      if (cy > 0)
+	{
+	  ASSERT (bn < n);
+	  ap[bn] = cy;
+	  bp[bn] = 0;
+	  bn++;
+	}
+
+      MPN_DECR_U (tp, qn, 1);
+      qn -= (tp[qn-1] == 0);
+    }
+
+  if (qn > 0)
+    hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
+
+  return bn;
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+mp_size_t
+mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
+		 struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+  mp_size_t nn;
+
+  ASSERT (n > s);
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+  nn = hgcd_step (n, ap, bp, s, M, tp);
+  if (!nn)
+    return 0;
+
+  for (;;)
+    {
+      n = nn;
+      ASSERT (n > s);
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn )
+	return n;
+    }
+}
+
+/* Multiply M by M1 from the right. Needs 4*(M->n + M1->n) + 5 limbs
+   of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+		     mp_ptr tp)
+{
+  mp_size_t n;
+
+  /* About the new size of M:s elements. Since M1's diagonal elements
+     are > 0, no element can decrease. The new elements are of size
+     M->n + M1->n, one limb more or less. The computation of the
+     matrix product produces elements of size M->n + M1->n + 1. But
+     the true size, after normalization, may be three limbs smaller. */
+
+  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+     multiplication range, this function could be sped up quite a lot
+     using invariance. */
+  ASSERT (M->n + M1->n < M->alloc);
+
+  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+		    M->p[1][0], M->p[1][1], M->n,
+		    M1->p[0][0], M1->p[0][1],
+		    M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+  /* Index of last potentially non-zero limb, size is one greater. */
+  n = M->n + M1->n;
+
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+  M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+   Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
+			mp_size_t n, mp_ptr ap, mp_ptr bp,
+			mp_size_t p, mp_ptr tp)
+{
+  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+     = (r11 a - r01 b; - r10 a + r00 b */
+
+  mp_ptr t0 = tp;
+  mp_ptr t1 = tp + p + M->n;
+  mp_limb_t ah, bh;
+  mp_limb_t cy;
+
+  ASSERT (p + M->n  < n);
+
+  /* First compute the two values depending on a, before overwriting a */
+
+  if (M->n >= p)
+    {
+      mpn_mul (t0, M->p[1][1], M->n, ap, p);
+      mpn_mul (t1, M->p[1][0], M->n, ap, p);
+    }
+  else
+    {
+      mpn_mul (t0, ap, p, M->p[1][1], M->n);
+      mpn_mul (t1, ap, p, M->p[1][0], M->n);
+    }
+
+  /* Update a */
+  MPN_COPY (ap, t0, p);
+  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][1], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+  cy = mpn_sub (ap, ap, n, t0, p + M->n);
+  ASSERT (cy <= ah);
+  ah -= cy;
+
+  /* Update b */
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][0], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+  MPN_COPY (bp, t0, p);
+  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+  cy = mpn_sub (bp, bp, n, t1, p + M->n);
+  ASSERT (cy <= bh);
+  bh -= cy;
+
+  if (ah > 0 || bh > 0)
+    {
+      ap[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* The subtraction can reduce the size by at most one limb. */
+      if (ap[n-1] == 0 && bp[n-1] == 0)
+	n--;
+    }
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+  return n;
+}
 
 /* Size analysis for hgcd:
 
@@ -46,15 +505,16 @@ see https://www.gnu.org/licenses/.  */
 
    Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
    = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
-   and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
-   4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
+   and for the hgcd_matrix_mul, we may need 4 ceil(n/2) + 1. In total,
+   4 * ceil(n/4) + 4 ceil(n/2) + 5 <= 12 ceil(n/4) + 5.
 
    For the recursive call, we need S(n1) = S(ceil(n/2)).
 
-   S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
-	<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
-	<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
-	<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
+   S(n) <= 12*ceil(n/4) + 5 + S(ceil(n/2))
+	<= 12*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 5k + S(ceil(n/2^k))
+	<= 12*(2 ceil(n/4) + k) + 5k + S(n/2^k)
+	<= 24 ceil(n/4) + 17k + S(n/2^k)
+
 */
 
 mp_size_t
@@ -65,14 +525,15 @@ mpn_hgcd_itch (mp_size_t n)
   mp_size_t nscaled;
 
   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
-    return n;
+    return MPN_HGCD_LEHMER_ITCH (n);
 
   /* Get the recursion depth. */
   nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
   count_leading_zeros (count, nscaled);
   k = GMP_LIMB_BITS - count;
 
-  return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+  return 24 * ((n+3) / 4) + 17 * k
+    + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
 }
 
 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
@@ -84,8 +545,9 @@ mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
 	  struct hgcd_matrix *M, mp_ptr tp)
 {
   mp_size_t s = n/2 + 1;
+  mp_size_t n2 = (3*n)/4 + 1;
 
-  mp_size_t nn;
+  mp_size_t p, nn;
   int success = 0;
 
   if (n <= s)
@@ -97,83 +559,65 @@ mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
 
   ASSERT ((n+1)/2 - 1 < M->alloc);
 
-  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
-    {
-      mp_size_t n2 = (3*n)/4 + 1;
-      mp_size_t p = n/2;
+  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+    return mpn_hgcd_lehmer (ap, bp, n, M, tp);
 
-      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
-      if (nn)
-	{
-	  n = nn;
-	  success = 1;
-	}
+  p = n/2;
+  nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+  if (nn > 0)
+    {
+      /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+	 = 2 (n - 1) */
+      n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+      success = 1;
+    }
+  while (n > n2)
+    {
+      /* Needs n + 1 storage */
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn)
+	return success ? n : 0;
+      n = nn;
+      success = 1;
+    }
 
-      /* NOTE: It appears this loop never runs more than once (at
-	 least when not recursing to hgcd_appr). */
-      while (n > n2)
-	{
-	  /* Needs n + 1 storage */
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-	  if (!nn)
-	    return success ? n : 0;
+  if (n > s + 2)
+    {
+      struct hgcd_matrix M1;
+      mp_size_t scratch;
 
-	  n = nn;
-	  success = 1;
-	}
+      p = 2*s - n + 1;
+      scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
 
-      if (n > s + 2)
+      mpn_hgcd_matrix_init(&M1, n - p, tp);
+      nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+      if (nn > 0)
 	{
-	  struct hgcd_matrix M1;
-	  mp_size_t scratch;
-
-	  p = 2*s - n + 1;
-	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
-
-	  mpn_hgcd_matrix_init(&M1, n - p, tp);
-
-	  /* FIXME: Should use hgcd_reduce, but that may require more
-	     scratch space, which requires review. */
-
-	  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
-	  if (nn > 0)
-	    {
-	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-	      ASSERT (M->n + 2 >= M1.n);
-
-	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-		 then either q or q + 1 is a correct quotient, and M1 will
-		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-		 rules out the case that the size of M * M1 is much
-		 smaller than the expected M->n + M1->n. */
-
-	      ASSERT (M->n + M1.n < M->alloc);
-
-	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
-		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
-	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
-
-	      /* We need a bound for of M->n + M1.n. Let n be the original
-		 input size. Then
-
-		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
-
-		 and it follows that
-
-		 M.n + M1.n <= ceil(n/2) + 1
-
-		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-		 amount of needed scratch space. */
-	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-	      success = 1;
-	    }
+	  /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	  ASSERT (M->n + 2 >= M1.n);
+
+	  /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+	     then either q or q + 1 is a correct quotient, and M1 will
+	     start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+	     rules out the case that the size of M * M1 is much
+	     smaller than the expected M->n + M1->n. */
+
+	  ASSERT (M->n + M1.n < M->alloc);
+
+	  /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+	     = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+	  n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+	  /* Needs 4 ceil(n/2) + 1 */
+	  mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	  success = 1;
 	}
     }
 
+  /* This really is the base case */
   for (;;)
     {
       /* Needs s+3 < n */
-      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+      nn = hgcd_step (n, ap, bp, s, M, tp);
       if (!nn)
 	return success ? n : 0;
 
diff --git a/gmp/mpn/generic/hgcd2.c b/gmp/mpn/generic/hgcd2.c
index 129637063f..ffc8c44f67 100644
--- a/gmp/mpn/generic/hgcd2.c
+++ b/gmp/mpn/generic/hgcd2.c
@@ -4,33 +4,23 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 1996, 1998, 2000-2004, 2008, 2012 Free Software Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -209,7 +199,7 @@ div2 (mp_ptr rp,
 
 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
    matrix M. Returns 1 if we make progress, i.e. can perform at least
-   one subtraction. Otherwise returns zero. */
+   one subtraction. Otherwise returns zero.. */
 
 /* FIXME: Possible optimizations:
 
@@ -348,6 +338,8 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
   for (;;)
     {
       ASSERT (ah >= bh);
+      if (ah == bh)
+	break;
 
       ah -= bh;
       if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
@@ -377,6 +369,8 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
 	}
     subtract_a1:
       ASSERT (bh >= ah);
+      if (ah == bh)
+	break;
 
       bh -= ah;
       if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
@@ -445,3 +439,31 @@ mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
   n += (ah | bh) > 0;
   return n;
 }
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+   the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
+				     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t h0, h1;
+
+  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+     r  = u11 * a
+     r -= u01 * b
+     b *= u00
+     b -= u10 * a
+  */
+
+  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
+  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+  ASSERT (h0 == h1);
+
+  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
+  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+  ASSERT (h0 == h1);
+
+  n -= (rp[n-1] | bp[n-1]) == 0;
+  return n;
+}
diff --git a/gmp/mpn/generic/hgcd2_jacobi.c b/gmp/mpn/generic/hgcd2_jacobi.c
deleted file mode 100644
index e59c32a341..0000000000
--- a/gmp/mpn/generic/hgcd2_jacobi.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/* hgcd2_jacobi.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1996, 1998, 2000-2004, 2008, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if GMP_NAIL_BITS > 0
-#error Nails not supported.
-#endif
-
-/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
-   possibly be renamed. */
-static inline mp_limb_t
-div1 (mp_ptr rp,
-      mp_limb_t n0,
-      mp_limb_t d0)
-{
-  mp_limb_t q = 0;
-
-  if ((mp_limb_signed_t) n0 < 0)
-    {
-      int cnt;
-      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
-	{
-	  d0 = d0 << 1;
-	}
-
-      q = 0;
-      while (cnt)
-	{
-	  q <<= 1;
-	  if (n0 >= d0)
-	    {
-	      n0 = n0 - d0;
-	      q |= 1;
-	    }
-	  d0 = d0 >> 1;
-	  cnt--;
-	}
-    }
-  else
-    {
-      int cnt;
-      for (cnt = 0; n0 >= d0; cnt++)
-	{
-	  d0 = d0 << 1;
-	}
-
-      q = 0;
-      while (cnt)
-	{
-	  d0 = d0 >> 1;
-	  q <<= 1;
-	  if (n0 >= d0)
-	    {
-	      n0 = n0 - d0;
-	      q |= 1;
-	    }
-	  cnt--;
-	}
-    }
-  *rp = n0;
-  return q;
-}
-
-/* Two-limb division optimized for small quotients.  */
-static inline mp_limb_t
-div2 (mp_ptr rp,
-      mp_limb_t nh, mp_limb_t nl,
-      mp_limb_t dh, mp_limb_t dl)
-{
-  mp_limb_t q = 0;
-
-  if ((mp_limb_signed_t) nh < 0)
-    {
-      int cnt;
-      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
-	{
-	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
-	  dl = dl << 1;
-	}
-
-      while (cnt)
-	{
-	  q <<= 1;
-	  if (nh > dh || (nh == dh && nl >= dl))
-	    {
-	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
-	      q |= 1;
-	    }
-	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
-	  dh = dh >> 1;
-	  cnt--;
-	}
-    }
-  else
-    {
-      int cnt;
-      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
-	{
-	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
-	  dl = dl << 1;
-	}
-
-      while (cnt)
-	{
-	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
-	  dh = dh >> 1;
-	  q <<= 1;
-	  if (nh > dh || (nh == dh && nl >= dl))
-	    {
-	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
-	      q |= 1;
-	    }
-	  cnt--;
-	}
-    }
-
-  rp[0] = nl;
-  rp[1] = nh;
-
-  return q;
-}
-
-int
-mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
-		  struct hgcd_matrix1 *M, unsigned *bitsp)
-{
-  mp_limb_t u00, u01, u10, u11;
-  unsigned bits = *bitsp;
-
-  if (ah < 2 || bh < 2)
-    return 0;
-
-  if (ah > bh || (ah == bh && al > bl))
-    {
-      sub_ddmmss (ah, al, ah, al, bh, bl);
-      if (ah < 2)
-	return 0;
-
-      u00 = u01 = u11 = 1;
-      u10 = 0;
-      bits = mpn_jacobi_update (bits, 1, 1);
-    }
-  else
-    {
-      sub_ddmmss (bh, bl, bh, bl, ah, al);
-      if (bh < 2)
-	return 0;
-
-      u00 = u10 = u11 = 1;
-      u01 = 0;
-      bits = mpn_jacobi_update (bits, 0, 1);
-    }
-
-  if (ah < bh)
-    goto subtract_a;
-
-  for (;;)
-    {
-      ASSERT (ah >= bh);
-      if (ah == bh)
-	goto done;
-
-      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
-	{
-	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
-	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
-
-	  break;
-	}
-
-      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
-	 1), affecting the second column of M. */
-      ASSERT (ah > bh);
-      sub_ddmmss (ah, al, ah, al, bh, bl);
-
-      if (ah < 2)
-	goto done;
-
-      if (ah <= bh)
-	{
-	  /* Use q = 1 */
-	  u01 += u00;
-	  u11 += u10;
-	  bits = mpn_jacobi_update (bits, 1, 1);
-	}
-      else
-	{
-	  mp_limb_t r[2];
-	  mp_limb_t q = div2 (r, ah, al, bh, bl);
-	  al = r[0]; ah = r[1];
-	  if (ah < 2)
-	    {
-	      /* A is too small, but q is correct. */
-	      u01 += q * u00;
-	      u11 += q * u10;
-	      bits = mpn_jacobi_update (bits, 1, q & 3);
-	      goto done;
-	    }
-	  q++;
-	  u01 += q * u00;
-	  u11 += q * u10;
-	  bits = mpn_jacobi_update (bits, 1, q & 3);
-	}
-    subtract_a:
-      ASSERT (bh >= ah);
-      if (ah == bh)
-	goto done;
-
-      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
-	{
-	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
-	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
-
-	  goto subtract_a1;
-	}
-
-      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
-	 1), affecting the first column of M. */
-      sub_ddmmss (bh, bl, bh, bl, ah, al);
-
-      if (bh < 2)
-	goto done;
-
-      if (bh <= ah)
-	{
-	  /* Use q = 1 */
-	  u00 += u01;
-	  u10 += u11;
-	  bits = mpn_jacobi_update (bits, 0, 1);
-	}
-      else
-	{
-	  mp_limb_t r[2];
-	  mp_limb_t q = div2 (r, bh, bl, ah, al);
-	  bl = r[0]; bh = r[1];
-	  if (bh < 2)
-	    {
-	      /* B is too small, but q is correct. */
-	      u00 += q * u01;
-	      u10 += q * u11;
-	      bits = mpn_jacobi_update (bits, 0, q & 3);
-	      goto done;
-	    }
-	  q++;
-	  u00 += q * u01;
-	  u10 += q * u11;
-	  bits = mpn_jacobi_update (bits, 0, q & 3);
-	}
-    }
-
-  /* NOTE: Since we discard the least significant half limb, we don't
-     get a truly maximal M (corresponding to |a - b| <
-     2^{GMP_LIMB_BITS +1}). */
-  /* Single precision loop */
-  for (;;)
-    {
-      ASSERT (ah >= bh);
-      if (ah == bh)
-	break;
-
-      ah -= bh;
-      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
-	break;
-
-      if (ah <= bh)
-	{
-	  /* Use q = 1 */
-	  u01 += u00;
-	  u11 += u10;
-	  bits = mpn_jacobi_update (bits, 1, 1);
-	}
-      else
-	{
-	  mp_limb_t r;
-	  mp_limb_t q = div1 (&r, ah, bh);
-	  ah = r;
-	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
-	    {
-	      /* A is too small, but q is correct. */
-	      u01 += q * u00;
-	      u11 += q * u10;
-	      bits = mpn_jacobi_update (bits, 1, q & 3);
-	      break;
-	    }
-	  q++;
-	  u01 += q * u00;
-	  u11 += q * u10;
-	  bits = mpn_jacobi_update (bits, 1, q & 3);
-	}
-    subtract_a1:
-      ASSERT (bh >= ah);
-      if (ah == bh)
-	break;
-
-      bh -= ah;
-      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
-	break;
-
-      if (bh <= ah)
-	{
-	  /* Use q = 1 */
-	  u00 += u01;
-	  u10 += u11;
-	  bits = mpn_jacobi_update (bits, 0, 1);
-	}
-      else
-	{
-	  mp_limb_t r;
-	  mp_limb_t q = div1 (&r, bh, ah);
-	  bh = r;
-	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
-	    {
-	      /* B is too small, but q is correct. */
-	      u00 += q * u01;
-	      u10 += q * u11;
-	      bits = mpn_jacobi_update (bits, 0, q & 3);
-	      break;
-	    }
-	  q++;
-	  u00 += q * u01;
-	  u10 += q * u11;
-	  bits = mpn_jacobi_update (bits, 0, q & 3);
-	}
-    }
-
- done:
-  M->u[0][0] = u00; M->u[0][1] = u01;
-  M->u[1][0] = u10; M->u[1][1] = u11;
-  *bitsp = bits;
-
-  return 1;
-}
diff --git a/gmp/mpn/generic/hgcd_appr.c b/gmp/mpn/generic/hgcd_appr.c
deleted file mode 100644
index 660219372f..0000000000
--- a/gmp/mpn/generic/hgcd_appr.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/* hgcd_appr.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
-   HGCD_THRESHOLD at the end? */
-mp_size_t
-mpn_hgcd_appr_itch (mp_size_t n)
-{
-  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
-    return n;
-  else
-    {
-      unsigned k;
-      int count;
-      mp_size_t nscaled;
-
-      /* Get the recursion depth. */
-      nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
-      count_leading_zeros (count, nscaled);
-      k = GMP_LIMB_BITS - count;
-
-      return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
-    }
-}
-
-/* Destroys inputs. */
-int
-mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
-	       struct hgcd_matrix *M, mp_ptr tp)
-{
-  mp_size_t s;
-  int success = 0;
-
-  ASSERT (n > 0);
-
-  ASSERT ((ap[n-1] | bp[n-1]) != 0);
-
-  if (n <= 2)
-    /* Implies s = n. A fairly uninteresting case but exercised by the
-       random inputs of the testsuite. */
-    return 0;
-
-  ASSERT ((n+1)/2 - 1 < M->alloc);
-
-  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
-     we discard some of the least significant limbs, we must keep one
-     additional bit to account for the truncation error. We maintain
-     the GMP_NUMB_BITS * s - extra_bits as the current target size. */
-
-  s = n/2 + 1;
-  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
-    {
-      unsigned extra_bits = 0;
-
-      while (n > 2)
-	{
-	  mp_size_t nn;
-
-	  ASSERT (n > s);
-	  ASSERT (n <= 2*s);
-
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-	  if (!nn)
-	    break;
-
-	  n = nn;
-	  success = 1;
-
-	  /* We can truncate and discard the lower p bits whenever nbits <=
-	     2*sbits - p. To account for the truncation error, we must
-	     adjust
-
-	     sbits <-- sbits + 1 - p,
-
-	     rather than just sbits <-- sbits - p. This adjustment makes
-	     the produced matrix slightly smaller than it could be. */
-
-	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
-	    {
-	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
-
-	      if (extra_bits == 0)
-		{
-		  /* We cross a limb boundary and bump s. We can't do that
-		     if the result is that it makes makes min(U, V)
-		     smaller than 2^{GMP_NUMB_BITS} s. */
-		  if (s + 1 == n
-		      || mpn_zero_p (ap + s + 1, n - s - 1)
-		      || mpn_zero_p (bp + s + 1, n - s - 1))
-		    continue;
-
-		  extra_bits = GMP_NUMB_BITS - 1;
-		  s++;
-		}
-	      else
-		{
-		  extra_bits--;
-		}
-
-	      /* Drop the p least significant limbs */
-	      ap += p; bp += p; n -= p; s -= p;
-	    }
-	}
-
-      ASSERT (s > 0);
-
-      if (extra_bits > 0)
-	{
-	  /* We can get here only of we have dropped at least one of the least
-	     significant bits, so we can decrement ap and bp. We can then shift
-	     left extra bits using mpn_rshift. */
-	  /* NOTE: In the unlikely case that n is large, it would be preferable
-	     to do an initial subdiv step to reduce the size before shifting,
-	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
-	     count rather than a limb count. */
-	  ap--; bp--;
-	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
-	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
-	  n += (ap[n] | bp[n]) > 0;
-
-	  ASSERT (success);
-
-	  while (n > 2)
-	    {
-	      mp_size_t nn;
-
-	      ASSERT (n > s);
-	      ASSERT (n <= 2*s);
-
-	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-
-	      if (!nn)
-		return 1;
-
-	      n = nn;
-	    }
-	}
-
-      if (n == 2)
-	{
-	  struct hgcd_matrix1 M1;
-	  ASSERT (s == 1);
-
-	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
-	    {
-	      /* Multiply M <- M * M1 */
-	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-	      success = 1;
-	    }
-	}
-      return success;
-    }
-  else
-    {
-      mp_size_t n2 = (3*n)/4 + 1;
-      mp_size_t p = n/2;
-      mp_size_t nn;
-
-      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
-      if (nn)
-	{
-	  n = nn;
-	  /* FIXME: Discard some of the low limbs immediately? */
-	  success = 1;
-	}
-
-      while (n > n2)
-	{
-	  mp_size_t nn;
-
-	  /* Needs n + 1 storage */
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-	  if (!nn)
-	    return success;
-
-	  n = nn;
-	  success = 1;
-	}
-      if (n > s + 2)
-	{
-	  struct hgcd_matrix M1;
-	  mp_size_t scratch;
-
-	  p = 2*s - n + 1;
-	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
-
-	  mpn_hgcd_matrix_init(&M1, n - p, tp);
-	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
-	    {
-	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-	      ASSERT (M->n + 2 >= M1.n);
-
-	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-		 then either q or q + 1 is a correct quotient, and M1 will
-		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-		 rules out the case that the size of M * M1 is much
-		 smaller than the expected M->n + M1->n. */
-
-	      ASSERT (M->n + M1.n < M->alloc);
-
-	      /* We need a bound for of M->n + M1.n. Let n be the original
-		 input size. Then
-
-		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
-
-		 and it follows that
-
-		 M.n + M1.n <= ceil(n/2) + 1
-
-		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-		 amount of needed scratch space. */
-	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-	      return 1;
-	    }
-	}
-
-      for(;;)
-	{
-	  mp_size_t nn;
-
-	  ASSERT (n > s);
-	  ASSERT (n <= 2*s);
-
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-
-	  if (!nn)
-	    return success;
-
-	  n = nn;
-	  success = 1;
-	}
-    }
-}
diff --git a/gmp/mpn/generic/hgcd_jacobi.c b/gmp/mpn/generic/hgcd_jacobi.c
deleted file mode 100644
index 0a49e5b3a7..0000000000
--- a/gmp/mpn/generic/hgcd_jacobi.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/* hgcd_jacobi.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* This file is almost a copy of hgcd.c, with some added calls to
-   mpn_jacobi_update */
-
-struct hgcd_jacobi_ctx
-{
-  struct hgcd_matrix *M;
-  unsigned *bitsp;
-};
-
-static void
-hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
-		  mp_srcptr qp, mp_size_t qn, int d)
-{
-  ASSERT (!gp);
-  ASSERT (d >= 0);
-
-  MPN_NORMALIZE (qp, qn);
-  if (qn > 0)
-    {
-      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
-      /* NOTES: This is a bit ugly. A tp area is passed to
-	 gcd_subdiv_step, which stores q at the start of that area. We
-	 now use the rest. */
-      mp_ptr tp = (mp_ptr) qp + qn;
-
-      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
-      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
-    }
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
-   division. Reduces the size by almost one limb or more, but never
-   below the given size s. Return new size for a and b, or 0 if no
-   more steps are possible.
-
-   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
-   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
-   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
-   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
-   resulting size of M.
-
-   If N is the input size to the calling hgcd, then s = floor(N/2) +
-   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
-   < N, so N is sufficient.
-*/
-
-static mp_size_t
-hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
-		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
-{
-  struct hgcd_matrix1 M1;
-  mp_limb_t mask;
-  mp_limb_t ah, al, bh, bl;
-
-  ASSERT (n > s);
-
-  mask = ap[n-1] | bp[n-1];
-  ASSERT (mask > 0);
-
-  if (n == s + 1)
-    {
-      if (mask < 4)
-	goto subtract;
-
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else if (mask & GMP_NUMB_HIGHBIT)
-    {
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else
-    {
-      int shift;
-
-      count_leading_zeros (shift, mask);
-      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-    }
-
-  /* Try an mpn_hgcd2 step */
-  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
-    {
-      /* Multiply M <- M * M1 */
-      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-
-      /* Can't swap inputs, so we need to copy. */
-      MPN_COPY (tp, ap, n);
-      /* Multiply M1^{-1} (a;b) */
-      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
-    }
-
- subtract:
-  {
-    struct hgcd_jacobi_ctx ctx;
-    ctx.M = M;
-    ctx.bitsp = bitsp;
-
-    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
-  }
-}
-
-/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
-   with elements of size at most (n+1)/2 - 1. Returns new size of a,
-   b, or zero if no reduction is possible. */
-
-/* Same scratch requirements as for mpn_hgcd. */
-mp_size_t
-mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
-		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
-{
-  mp_size_t s = n/2 + 1;
-
-  mp_size_t nn;
-  int success = 0;
-
-  if (n <= s)
-    /* Happens when n <= 2, a fairly uninteresting case but exercised
-       by the random inputs of the testsuite. */
-    return 0;
-
-  ASSERT ((ap[n-1] | bp[n-1]) > 0);
-
-  ASSERT ((n+1)/2 - 1 < M->alloc);
-
-  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
-    {
-      mp_size_t n2 = (3*n)/4 + 1;
-      mp_size_t p = n/2;
-
-      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
-      if (nn > 0)
-	{
-	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
-	     = 2 (n - 1) */
-	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
-	  success = 1;
-	}
-      while (n > n2)
-	{
-	  /* Needs n + 1 storage */
-	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
-	  if (!nn)
-	    return success ? n : 0;
-	  n = nn;
-	  success = 1;
-	}
-
-      if (n > s + 2)
-	{
-	  struct hgcd_matrix M1;
-	  mp_size_t scratch;
-
-	  p = 2*s - n + 1;
-	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
-
-	  mpn_hgcd_matrix_init(&M1, n - p, tp);
-	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
-	  if (nn > 0)
-	    {
-	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-	      ASSERT (M->n + 2 >= M1.n);
-
-	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-		 then either q or q + 1 is a correct quotient, and M1 will
-		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-		 rules out the case that the size of M * M1 is much
-		 smaller than the expected M->n + M1->n. */
-
-	      ASSERT (M->n + M1.n < M->alloc);
-
-	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
-		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
-	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
-
-	      /* We need a bound for of M->n + M1.n. Let n be the original
-		 input size. Then
-
-		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
-
-		 and it follows that
-
-		 M.n + M1.n <= ceil(n/2) + 1
-
-		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-		 amount of needed scratch space. */
-	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-	      success = 1;
-	    }
-	}
-    }
-
-  for (;;)
-    {
-      /* Needs s+3 < n */
-      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
-      if (!nn)
-	return success ? n : 0;
-
-      n = nn;
-      success = 1;
-    }
-}
diff --git a/gmp/mpn/generic/hgcd_matrix.c b/gmp/mpn/generic/hgcd_matrix.c
deleted file mode 100644
index d9db331603..0000000000
--- a/gmp/mpn/generic/hgcd_matrix.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/* hgcd_matrix.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* For input of size n, matrix elements are of size at most ceil(n/2)
-   - 1, but we need two limbs extra. */
-void
-mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
-{
-  mp_size_t s = (n+1)/2 + 1;
-  M->alloc = s;
-  M->n = 1;
-  MPN_ZERO (p, 4 * s);
-  M->p[0][0] = p;
-  M->p[0][1] = p + s;
-  M->p[1][0] = p + 2 * s;
-  M->p[1][1] = p + 3 * s;
-
-  M->p[0][0][0] = M->p[1][1][0] = 1;
-}
-
-/* Update column COL, adding in Q * column (1-COL). Temporary storage:
- * qn + n <= M->alloc, where n is the size of the largest element in
- * column 1 - COL. */
-void
-mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
-			  unsigned col, mp_ptr tp)
-{
-  ASSERT (col < 2);
-
-  if (qn == 1)
-    {
-      mp_limb_t q = qp[0];
-      mp_limb_t c0, c1;
-
-      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
-      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
-
-      M->p[0][col][M->n] = c0;
-      M->p[1][col][M->n] = c1;
-
-      M->n += (c0 | c1) != 0;
-    }
-  else
-    {
-      unsigned row;
-
-      /* Carries for the unlikely case that we get both high words
-	 from the multiplication and carries from the addition. */
-      mp_limb_t c[2];
-      mp_size_t n;
-
-      /* The matrix will not necessarily grow in size by qn, so we
-	 need normalization in order not to overflow M. */
-
-      for (n = M->n; n + qn > M->n; n--)
-	{
-	  ASSERT (n > 0);
-	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
-	    break;
-	}
-
-      ASSERT (qn + n <= M->alloc);
-
-      for (row = 0; row < 2; row++)
-	{
-	  if (qn <= n)
-	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
-	  else
-	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
-
-	  ASSERT (n + qn >= M->n);
-	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
-	}
-
-      n += qn;
-
-      if (c[0] | c[1])
-	{
-	  M->p[0][col][n] = c[0];
-	  M->p[1][col][n] = c[1];
-	  n++;
-	}
-      else
-	{
-	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
-	  ASSERT (n >= M->n);
-	}
-      M->n = n;
-    }
-
-  ASSERT (M->n < M->alloc);
-}
-
-/* Multiply M by M1 from the right. Since the M1 elements fit in
-   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
-   temporary space M->n */
-void
-mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
-		       mp_ptr tp)
-{
-  mp_size_t n0, n1;
-
-  /* Could avoid copy by some swapping of pointers. */
-  MPN_COPY (tp, M->p[0][0], M->n);
-  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
-  MPN_COPY (tp, M->p[1][0], M->n);
-  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
-
-  /* Depends on zero initialization */
-  M->n = MAX(n0, n1);
-  ASSERT (M->n < M->alloc);
-}
-
-/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
-   of temporary storage (see mpn_matrix22_mul_itch). */
-void
-mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
-		     mp_ptr tp)
-{
-  mp_size_t n;
-
-  /* About the new size of M:s elements. Since M1's diagonal elements
-     are > 0, no element can decrease. The new elements are of size
-     M->n + M1->n, one limb more or less. The computation of the
-     matrix product produces elements of size M->n + M1->n + 1. But
-     the true size, after normalization, may be three limbs smaller.
-
-     The reason that the product has normalized size >= M->n + M1->n -
-     2 is subtle. It depends on the fact that M and M1 can be factored
-     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
-     M ending with a large power and M1 starting with a large power of
-     the same matrix. */
-
-  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
-     multiplication range, this function could be sped up quite a lot
-     using invariance. */
-  ASSERT (M->n + M1->n < M->alloc);
-
-  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
-	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
-
-  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
-	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
-
-  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
-		    M->p[1][0], M->p[1][1], M->n,
-		    M1->p[0][0], M1->p[0][1],
-		    M1->p[1][0], M1->p[1][1], M1->n, tp);
-
-  /* Index of last potentially non-zero limb, size is one greater. */
-  n = M->n + M1->n;
-
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-
-  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
-
-  M->n = n + 1;
-}
-
-/* Multiplies the least significant p limbs of (a;b) by M^-1.
-   Temporary space needed: 2 * (p + M->n)*/
-mp_size_t
-mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
-			mp_size_t n, mp_ptr ap, mp_ptr bp,
-			mp_size_t p, mp_ptr tp)
-{
-  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
-     = (r11 a - r01 b; - r10 a + r00 b */
-
-  mp_ptr t0 = tp;
-  mp_ptr t1 = tp + p + M->n;
-  mp_limb_t ah, bh;
-  mp_limb_t cy;
-
-  ASSERT (p + M->n  < n);
-
-  /* First compute the two values depending on a, before overwriting a */
-
-  if (M->n >= p)
-    {
-      mpn_mul (t0, M->p[1][1], M->n, ap, p);
-      mpn_mul (t1, M->p[1][0], M->n, ap, p);
-    }
-  else
-    {
-      mpn_mul (t0, ap, p, M->p[1][1], M->n);
-      mpn_mul (t1, ap, p, M->p[1][0], M->n);
-    }
-
-  /* Update a */
-  MPN_COPY (ap, t0, p);
-  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
-
-  if (M->n >= p)
-    mpn_mul (t0, M->p[0][1], M->n, bp, p);
-  else
-    mpn_mul (t0, bp, p, M->p[0][1], M->n);
-
-  cy = mpn_sub (ap, ap, n, t0, p + M->n);
-  ASSERT (cy <= ah);
-  ah -= cy;
-
-  /* Update b */
-  if (M->n >= p)
-    mpn_mul (t0, M->p[0][0], M->n, bp, p);
-  else
-    mpn_mul (t0, bp, p, M->p[0][0], M->n);
-
-  MPN_COPY (bp, t0, p);
-  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
-  cy = mpn_sub (bp, bp, n, t1, p + M->n);
-  ASSERT (cy <= bh);
-  bh -= cy;
-
-  if (ah > 0 || bh > 0)
-    {
-      ap[n] = ah;
-      bp[n] = bh;
-      n++;
-    }
-  else
-    {
-      /* The subtraction can reduce the size by at most one limb. */
-      if (ap[n-1] == 0 && bp[n-1] == 0)
-	n--;
-    }
-  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
-  return n;
-}
diff --git a/gmp/mpn/generic/hgcd_reduce.c b/gmp/mpn/generic/hgcd_reduce.c
deleted file mode 100644
index 6f3d61ecea..0000000000
--- a/gmp/mpn/generic/hgcd_reduce.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/* hgcd_reduce.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Computes R -= A * B. Result must be non-negative. Normalized down
-   to size an, and resulting size is returned. */
-static mp_size_t
-submul (mp_ptr rp, mp_size_t rn,
-	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
-  mp_ptr tp;
-  TMP_DECL;
-
-  ASSERT (bn > 0);
-  ASSERT (an >= bn);
-  ASSERT (rn >= an);
-  ASSERT (an + bn <= rn + 1);
-
-  TMP_MARK;
-  tp = TMP_ALLOC_LIMBS (an + bn);
-
-  mpn_mul (tp, ap, an, bp, bn);
-  if (an + bn > rn)
-    {
-      ASSERT (tp[rn] == 0);
-      bn--;
-    }
-  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
-  TMP_FREE;
-
-  while (rn > an && (rp[rn-1] == 0))
-    rn--;
-
-  return rn;
-}
-
-/* Computes (a, b)  <--  M^{-1} (a; b) */
-/* FIXME:
-    x Take scratch parameter, and figure out scratch need.
-
-    x Use some fallback for small M->n?
-*/
-static mp_size_t
-hgcd_matrix_apply (const struct hgcd_matrix *M,
-		   mp_ptr ap, mp_ptr bp,
-		   mp_size_t n)
-{
-  mp_size_t an, bn, un, vn, nn;
-  mp_size_t mn[2][2];
-  mp_size_t modn;
-  mp_ptr tp, sp, scratch;
-  mp_limb_t cy;
-  unsigned i, j;
-
-  TMP_DECL;
-
-  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
-
-  an = n;
-  MPN_NORMALIZE (ap, an);
-  bn = n;
-  MPN_NORMALIZE (bp, bn);
-
-  for (i = 0; i < 2; i++)
-    for (j = 0; j < 2; j++)
-      {
-	mp_size_t k;
-	k = M->n;
-	MPN_NORMALIZE (M->p[i][j], k);
-	mn[i][j] = k;
-      }
-
-  ASSERT (mn[0][0] > 0);
-  ASSERT (mn[1][1] > 0);
-  ASSERT ( (mn[0][1] | mn[1][0]) > 0);
-
-  TMP_MARK;
-
-  if (mn[0][1] == 0)
-    {
-      /* A unchanged, M = (1, 0; q, 1) */
-      ASSERT (mn[0][0] == 1);
-      ASSERT (M->p[0][0][0] == 1);
-      ASSERT (mn[1][1] == 1);
-      ASSERT (M->p[1][1][0] == 1);
-
-      /* Put B <-- B - q A */
-      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
-    }
-  else if (mn[1][0] == 0)
-    {
-      /* B unchanged, M = (1, q; 0, 1) */
-      ASSERT (mn[0][0] == 1);
-      ASSERT (M->p[0][0][0] == 1);
-      ASSERT (mn[1][1] == 1);
-      ASSERT (M->p[1][1][0] == 1);
-
-      /* Put A  <-- A - q * B */
-      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
-    }
-  else
-    {
-      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
-	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
-      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
-      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
-
-      nn = MAX (un, vn);
-      /* In the range of interest, mulmod_bnm1 should always beat mullo. */
-      modn = mpn_mulmod_bnm1_next_size (nn + 1);
-
-      scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
-      tp = TMP_ALLOC_LIMBS (modn);
-      sp = TMP_ALLOC_LIMBS (modn);
-
-      ASSERT (n <= 2*modn);
-
-      if (n > modn)
-	{
-	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
-	  MPN_INCR_U (ap, modn, cy);
-
-	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
-	  MPN_INCR_U (bp, modn, cy);
-
-	  n = modn;
-	}
-
-      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
-      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
-
-      /* FIXME: Handle the small n case in some better way. */
-      if (n + mn[1][1] < modn)
-	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
-      if (n + mn[0][1] < modn)
-	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
-
-      cy = mpn_sub_n (tp, tp, sp, modn);
-      MPN_DECR_U (tp, modn, cy);
-
-      ASSERT (mpn_zero_p (tp + nn, modn - nn));
-
-      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
-      MPN_COPY (ap, tp, nn);
-      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
-
-      if (n + mn[1][0] < modn)
-	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
-      if (n + mn[0][0] < modn)
-	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
-
-      cy = mpn_sub_n (tp, tp, sp, modn);
-      MPN_DECR_U (tp, modn, cy);
-
-      ASSERT (mpn_zero_p (tp + nn, modn - nn));
-      MPN_COPY (bp, tp, nn);
-
-      while ( (ap[nn-1] | bp[nn-1]) == 0)
-	{
-	  nn--;
-	  ASSERT (nn > 0);
-	}
-    }
-  TMP_FREE;
-
-  return nn;
-}
-
-mp_size_t
-mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
-{
-  mp_size_t itch;
-  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
-    {
-      itch = mpn_hgcd_itch (n-p);
-
-      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
-	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
-      if (itch < n + p - 1)
-	itch = n + p - 1;
-    }
-  else
-    {
-      itch = 2*(n-p) + mpn_hgcd_itch (n-p);
-      /* Currently, hgcd_matrix_apply allocates its own storage. */
-    }
-  return itch;
-}
-
-/* FIXME: Document storage need. */
-mp_size_t
-mpn_hgcd_reduce (struct hgcd_matrix *M,
-		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
-		 mp_ptr tp)
-{
-  mp_size_t nn;
-  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
-    {
-      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
-      if (nn > 0)
-	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
-	   = 2 (n - 1) */
-	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
-    }
-  else
-    {
-      MPN_COPY (tp, ap + p, n - p);
-      MPN_COPY (tp + n - p, bp + p, n - p);
-      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
-	return hgcd_matrix_apply (M, ap, bp, n);
-    }
-  return 0;
-}
diff --git a/gmp/mpn/generic/hgcd_step.c b/gmp/mpn/generic/hgcd_step.c
deleted file mode 100644
index e58894ff3b..0000000000
--- a/gmp/mpn/generic/hgcd_step.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/* hgcd_step.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-static void
-hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	   mp_srcptr qp, mp_size_t qn, int d)
-{
-  ASSERT (!gp);
-  ASSERT (d >= 0);
-  ASSERT (d <= 1);
-
-  MPN_NORMALIZE (qp, qn);
-  if (qn > 0)
-    {
-      struct hgcd_matrix *M = (struct hgcd_matrix *) p;
-      /* NOTES: This is a bit ugly. A tp area is passed to
-	 gcd_subdiv_step, which stores q at the start of that area. We
-	 now use the rest. */
-      mp_ptr tp = (mp_ptr) qp + qn;
-      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
-    }
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
-   division. Reduces the size by almost one limb or more, but never
-   below the given size s. Return new size for a and b, or 0 if no
-   more steps are possible.
-
-   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
-   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
-   fails, needs space for the quotient, qn <= n - s limbs, for and
-   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
-   (resulting size of M) + 1.
-
-   If N is the input size to the calling hgcd, then s = floor(N/2) +
-   1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
-   <= N.
-*/
-
-mp_size_t
-mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
-	       struct hgcd_matrix *M, mp_ptr tp)
-{
-  struct hgcd_matrix1 M1;
-  mp_limb_t mask;
-  mp_limb_t ah, al, bh, bl;
-
-  ASSERT (n > s);
-
-  mask = ap[n-1] | bp[n-1];
-  ASSERT (mask > 0);
-
-  if (n == s + 1)
-    {
-      if (mask < 4)
-	goto subtract;
-
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else if (mask & GMP_NUMB_HIGHBIT)
-    {
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else
-    {
-      int shift;
-
-      count_leading_zeros (shift, mask);
-      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-    }
-
-  /* Try an mpn_hgcd2 step */
-  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
-    {
-      /* Multiply M <- M * M1 */
-      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-
-      /* Can't swap inputs, so we need to copy. */
-      MPN_COPY (tp, ap, n);
-      /* Multiply M1^{-1} (a;b) */
-      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
-    }
-
- subtract:
-
-  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
-}
diff --git a/gmp/mpn/generic/invert.c b/gmp/mpn/generic/invert.c
index 4bc459d728..e40d3611e6 100644
--- a/gmp/mpn/generic/invert.c
+++ b/gmp/mpn/generic/invert.c
@@ -1,91 +1,60 @@
-/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
+/* Compute {up,n}^(-1).
 
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright (C) 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
+#include <stdlib.h>
 #include "gmp.h"
 #include "gmp-impl.h"
-#include "longlong.h"
+
+/* Formulas:
+	z = 2z-(zz)d
+	z = 2z-(zd)z
+	z = z(2-zd)
+	z = z-z*(zd-1)
+	z = z+z*(1-zd)
+*/
+
+mp_size_t
+mpn_invert_itch (mp_size_t n)
+{
+  return 3 * n + 2;
+}
 
 void
 mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
 {
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  if (n == 1)
-    invert_limb (*ip, *dp);
-  else {
-    TMP_DECL;
-
-    TMP_MARK;
-    if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
-      {
-	/* Maximum scratch needed by this branch: 2*n */
-	mp_size_t i;
-	mp_ptr xp;
-
-	xp = scratch;				/* 2 * n limbs */
-	for (i = n - 1; i >= 0; i--)
-	  xp[i] = GMP_NUMB_MAX;
-	mpn_com (xp + n, dp, n);
-	if (n == 2) {
-	  mpn_divrem_2 (ip, 0, xp, 4, dp);
-	} else {
-	  gmp_pi1_t inv;
-	  invert_pi1 (inv, dp[n-1], dp[n-2]);
-	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
-	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
-	}
-      }
-    else { /* Use approximated inverse; correct the result if needed. */
-      mp_limb_t e; /* The possible error in the approximate inverse */
+  mp_ptr np, rp;
+  mp_size_t i;
+  TMP_DECL;
+
+  TMP_MARK;
+  if (scratch == NULL)
+    {
+      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n));
+    }
 
-      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
-      e = mpn_ni_invertappr (ip, dp, n, scratch);
+  np = scratch;					/* 2 * n limbs */
+  rp = scratch + 2 * n;				/* n + 2 limbs */
+  for (i = n - 1; i >= 0; i--)
+    np[i] = ~CNST_LIMB(0);
+  mpn_com_n (np + n, dp, n);
+  mpn_tdiv_qr (rp, ip, 0L, np, 2 * n, dp, n);
+  MPN_COPY (ip, rp, n);
 
-      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
-	/* Code to detect and correct the "off by one" approximation. */
-	mpn_mul_n (scratch, ip, dp, n);
-	ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
-	if (! mpn_add (scratch, scratch, 2*n, dp, n))
-	  MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
-      }
-    }
-    TMP_FREE;
-  }
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/invertappr.c b/gmp/mpn/generic/invertappr.c
deleted file mode 100644
index 12326b8b75..0000000000
--- a/gmp/mpn/generic/invertappr.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* mpn_invertappr and helper functions.  Compute I such that
-   floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   The algorithm used here was inspired by ApproximateReciprocal from "Modern
-   Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
-   thanks to Paul Zimmermann for his very valuable suggestions on all the
-   theoretical aspects during the work on this code.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
-   allocate and pass the scratch to the function. */
-#include <stdlib.h>		/* for NULL */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* FIXME: The iterative version splits the operand in two slightly unbalanced
-   parts, the use of log_2 (or counting the bits) underestimate the maximum
-   number of iterations.  */
-
-#if TUNE_PROGRAM_BUILD
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
-#define MAYBE_dcpi1_divappr   1
-#else
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
-#define MAYBE_dcpi1_divappr \
-  (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
-#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
-    (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
-#undef  INV_MULMOD_BNM1_THRESHOLD
-#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
-#endif
-#endif
-
-/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
-   the strictly normalised value {dp,n} (i.e., most significant bit must be set)
-   as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
-
-   Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
-   following conditions are satisfied by the output:
-     0 <= e <= 1;
-     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
-   I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
-	e=1 means that the result _may_ be one less than expected.
-
-   The _bc version returns e=1 most of the time.
-   The _ni version should return e=0 most of the time; only about 1% of
-   possible random input should give e=1.
-
-   When the strict result is needed, i.e., e=0 in the relation above:
-     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
-   the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
-
-/* Maximum scratch needed by this branch (at tp): 3*n + 2 */
-static mp_limb_t
-mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp)
-{
-  mp_ptr xp;
-
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, tp, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, tp, mpn_invertappr_itch(n)));
-
-  /* Compute a base value of r limbs. */
-  if (n == 1)
-    invert_limb (*ip, *dp);
-  else {
-    mp_size_t i;
-    xp = tp + n + 2;				/* 2 * n limbs */
-
-    for (i = n - 1; i >= 0; i--)
-      xp[i] = GMP_NUMB_MAX;
-    mpn_com (xp + n, dp, n);
-
-    /* Now xp contains B^2n - {dp,n}*B^n - 1 */
-
-    /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
-    if (n == 2) {
-      mpn_divrem_2 (ip, 0, xp, 4, dp);
-    } else {
-      gmp_pi1_t inv;
-      invert_pi1 (inv, dp[n-1], dp[n-2]);
-      if (! MAYBE_dcpi1_divappr
-	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
-	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
-      else
-	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
-      MPN_DECR_U(ip, n, 1);
-      return 1;
-    }
-  }
-  return 0;
-}
-
-/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
-   iterations (at least one).
-
-   Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
-   Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
-   in version 0.4 of the book.
-
-   Some adaptations were introduced, to allow product mod B^m-1 and return the
-   value e.
-
-   USE_MUL_N = 1 (default) introduces a correction in such a way that "the
-   value of B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
-   "2B^n-1").  This correction should not require to modify the proof.
-
-   We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
-   problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
-   B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
-   happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
-   B^{n+h} - A, then we get into the "negative" branch, where X_h is not
-   incremented (because A < B^n).
-
-   FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
-   is allocated apart.  */
-
-#define USE_MUL_N 1
-
-mp_limb_t
-mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
-{
-  mp_limb_t cy;
-  mp_ptr xp;
-  mp_size_t rn, mn;
-  mp_size_t sizes[NPOWS], *sizp;
-  mp_ptr tp;
-  TMP_DECL;
-#define rp scratch
-
-  ASSERT (n > 2);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  /* Compute the computation precisions from highest to lowest, leaving the
-     base case size in 'rn'.  */
-  sizp = sizes;
-  rn = n;
-  do {
-    *sizp = rn;
-    rn = ((rn) >> 1) + 1;
-    sizp ++;
-  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
-
-  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
-  dp += n;
-  ip += n;
-
-  /* Compute a base value of rn limbs. */
-  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
-
-  TMP_MARK;
-
-  if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
-    {
-      mn = mpn_mulmod_bnm1_next_size (n + 1);
-      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
-    }
-  /* Use Newton's iterations to get the desired precision.*/
-
-  /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3  limbs */
-  /* Maximum scratch needed by this branch <= 3*n + 2 */
-  xp = scratch + n + 3;				/*  n + rn limbs */
-  while (1) {
-    mp_limb_t method;
-
-    n = *--sizp;
-    /*
-      v    n  v
-      +----+--+
-      ^ rn ^
-    */
-
-    /* Compute i_jd . */
-    if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
-	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
-      /* FIXME: We do only need {xp,n+1}*/
-      mpn_mul (xp, dp - n, n, ip - rn, rn);
-      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
-      method = 1; /* Remember we used (truncated) product */
-      /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
-    } else { /* Use B^n-1 wraparound */
-      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
-      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
-      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
-      /* Add dp*B^rn mod (B^mn-1) */
-      ASSERT (n >= mn - rn);
-      xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
-      cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
-      MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
-      ASSERT (n + rn >=  mn);
-      /* Subtract B^{rn+n} */
-      MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
-      if (xp[mn])
-	MPN_INCR_U (xp, mn, xp[mn] - 1);
-      else
-	MPN_DECR_U (xp, mn, 1);
-      method = 0; /* Remember we are working Mod B^m-1 */
-    }
-
-    if (xp[n] < 2) { /* "positive" residue class */
-      cy = 1;
-      while (xp[n] || mpn_cmp (xp, dp - n, n)>0) {
-	xp[n] -= mpn_sub_n (xp, xp, dp - n, n);
-	cy ++;
-      }
-      MPN_DECR_U(ip - rn, rn, cy);
-      ASSERT (cy <= 4); /* at most 3 cycles for the while above */
-      ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
-      ASSERT (xp[n] == 0);
-    } else { /* "negative" residue class */
-      mpn_com (xp, xp, n + 1);
-      MPN_INCR_U(xp, n + 1, method);
-      ASSERT (xp[n] <= 1);
-#if USE_MUL_N
-      if (xp[n]) {
-	MPN_INCR_U(ip - rn, rn, 1);
-	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
-      }
-#endif
-    }
-
-    /* Compute x_ju_j. FIXME:We need {rp+rn,rn}, mulhi? */
-#if USE_MUL_N
-    mpn_mul_n (rp, xp + n - rn, ip - rn, rn);
-#else
-    rp[2*rn] = 0;
-    mpn_mul (rp, xp + n - rn, rn + xp[n], ip - rn, rn);
-#endif
-    /* We need _only_ the carry from the next addition  */
-    /* Anyway 2rn-n <= 2... we don't need to optimise.  */
-    cy = mpn_add_n (rp + rn, rp + rn, xp + n - rn, 2*rn - n);
-    cy = mpn_add_nc (ip - n, rp + 3*rn - n, xp + rn, n - rn, cy);
-    MPN_INCR_U (ip - rn, rn, cy + (1-USE_MUL_N)*(rp[2*rn] + xp[n]));
-    if (sizp == sizes) { /* Get out of the cycle */
-      /* Check for possible carry propagation from below. */
-      cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - 7; /* Be conservative. */
-/*    cy = mpn_add_1 (rp + rn, rp + rn, 2*rn - n, 4); */
-      break;
-    }
-    rn = n;
-  }
-  TMP_FREE;
-
-  return cy;
-#undef rp
-}
-
-mp_limb_t
-mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
-{
-  mp_limb_t res;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  if (scratch == NULL)
-    scratch = TMP_ALLOC_LIMBS (mpn_invertappr_itch (n));
-
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
-    res = mpn_bc_invertappr (ip, dp, n, scratch);
-  else
-    res = mpn_ni_invertappr (ip, dp, n, scratch);
-
-  TMP_FREE;
-  return res;
-}
diff --git a/gmp/mpn/generic/jacbase.c b/gmp/mpn/generic/jacbase.c
index cd52bc9513..6972a130d9 100644
--- a/gmp/mpn/generic/jacbase.c
+++ b/gmp/mpn/generic/jacbase.c
@@ -3,33 +3,22 @@
    THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
    INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
 
-Copyright 1999-2002, 2010 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -83,15 +72,15 @@ see https://www.gnu.org/licenses/.  */
 #define PROCESS_TWOS_EVEN               \
   {                                     \
     int  two, mask, shift;              \
-					\
+                                        \
     two = JACOBI_TWO_U_BIT1 (b);        \
     mask = (~a & 2);                    \
     a >>= 1;                            \
-					\
+                                        \
     shift = (~a & 1);                   \
     a >>= shift;                        \
     result_bit1 ^= two ^ (two & mask);  \
-					\
+                                        \
     while ((a & 1) == 0)                \
       {                                 \
 	a >>= 1;                        \
@@ -102,14 +91,14 @@ see https://www.gnu.org/licenses/.  */
 #define PROCESS_TWOS_ANY                \
   {                                     \
     int  two, mask, shift;              \
-					\
+                                        \
     two = JACOBI_TWO_U_BIT1 (b);        \
     shift = (~a & 1);                   \
     a >>= shift;                        \
-					\
+                                        \
     mask = shift << 1;                  \
     result_bit1 ^= (two & mask);        \
-					\
+                                        \
     while ((a & 1) == 0)                \
       {                                 \
 	a >>= 1;                        \
@@ -119,9 +108,9 @@ see https://www.gnu.org/licenses/.  */
   }
 #endif
 
-#if JACOBI_BASE_METHOD < 4
+
 /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
-   with a restricted range of inputs accepted, namely b>1, b odd.
+   with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
 
    The initial result_bit1 is taken as a parameter for the convenience of
    mpz_kronecker_ui() et al.  The sign changes both here and in those
@@ -133,13 +122,17 @@ see https://www.gnu.org/licenses/.  */
 
    Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
    possible, but a couple of tests suggest it's not a significant speedup,
-   and may even be a slowdown, so what's here is good enough for now. */
+   and may even be a slowdown, so what's here is good enough for now.
+
+   Future: The code doesn't demand a<=b actually, so maybe this could be
+   relaxed.  All the places this is used currently call with a<=b though.  */
 
 int
 mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
 {
   ASSERT (b & 1);  /* b odd */
   ASSERT (b != 1);
+  ASSERT (a <= b);
 
   if (a == 0)
     return 0;
@@ -148,15 +141,11 @@ mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
   if (a == 1)
     goto done;
 
-  if (a >= b)
-    goto a_gt_b;
-
   for (;;)
     {
       result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
       MP_LIMB_T_SWAP (a, b);
 
-    a_gt_b:
       do
 	{
 	  /* working on (a/b), a,b odd, a>=b */
@@ -177,67 +166,3 @@ mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
  done:
   return JACOBI_BIT1_TO_PN (result_bit1);
 }
-#endif
-
-#if JACOBI_BASE_METHOD == 4
-/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
- * parameter. We have no need for the convention that the sign is in
- * bit 1, internally we use bit 0. */
-
-/* FIXME: Could try table-based count_trailing_zeros. */
-int
-mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
-{
-  int c;
-
-  ASSERT (b & 1);
-  ASSERT (b > 1);
-
-  if (a == 0)
-    /* This is the only line which depends on b > 1 */
-    return 0;
-
-  bit >>= 1;
-
-  /* Below, we represent a and b shifted right so that the least
-     significant one bit is implicit. */
-
-  b >>= 1;
-
-  count_trailing_zeros (c, a);
-  bit ^= c & (b ^ (b >> 1));
-
-  /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
-  a >>= c;
-  a >>= 1;
-
-  do
-    {
-      mp_limb_t t = a - b;
-      mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
-
-      if (t == 0)
-	return 0;
-
-      /* If b > a, invoke reciprocity */
-      bit ^= (bgta & a & b);
-
-      /* b <-- min (a, b) */
-      b += (bgta & t);
-
-      /* a <-- |a - b| */
-      a = (t ^ bgta) - bgta;
-
-      /* Number of trailing zeros is the same no matter if we look at
-       * t or a, but using t gives more parallelism. */
-      count_trailing_zeros (c, t);
-      c ++;
-      /* (2/b) = -1 if b = 3 or 5 mod 8 */
-      bit ^= c & (b ^ (b >> 1));
-      a >>= c;
-    }
-  while (b > 0);
-
-  return 1-2*(bit & 1);
-}
-#endif /* JACOBI_BASE_METHOD == 4 */
diff --git a/gmp/mpn/generic/jacobi.c b/gmp/mpn/generic/jacobi.c
deleted file mode 100644
index bdc3ec67da..0000000000
--- a/gmp/mpn/generic/jacobi.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/* jacobi.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef JACOBI_DC_THRESHOLD
-#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
-#endif
-
-/* Schönhage's rules:
- *
- * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
- *
- * If r1 is odd, then
- *
- *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
- *
- * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
- *
- * If r1 is even, r2 must be odd. We have
- *
- *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
- *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
- *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
- *
- * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
- * q1 times gives
- *
- *   (r1 | r0) = (r1 | r2) = (r3 | r2)
- *
- * On the other hand, if r1 = 2 (mod 4), the sign factor is
- * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
- *
- *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
- *   = q1 (r0-1)/2 + q1 (q1-1)/2
- *
- * and we can summarize the even case as
- *
- *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
- *
- * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
- *
- * What about termination? The remainder sequence ends with (0|1) = 1
- * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
- * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
- * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
- *
- * Examples: (11|15) = - (15|11) = - (4|11)
- *            (4|11) =    (4| 3) =   (1| 3)
- *            (1| 3) = (3|1) = (0|1) = 1
- *
- *             (2|7) = (2|1) = (0|1) = 1
- *
- * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
- *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
- *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
- *
- */
-
-/* In principle, the state consists of four variables: e (one bit), a,
-   b (two bits each), d (one bit). Collected factors are (-1)^e. a and
-   b are the least significant bits of the current remainders. d
-   (denominator) is 0 if we're currently subtracting multiplies of a
-   from b, and 1 if we're subtracting b from a.
-
-   e is stored in the least significant bit, while a, b and d are
-   coded as only 13 distinct values in bits 1-4, according to the
-   following table. For rows not mentioning d, the value is either
-   implied, or it doesn't matter. */
-
-#if WANT_ASSERT
-static const struct
-{
-  unsigned char a;
-  unsigned char b;
-} decode_table[13] = {
-  /*  0 */ { 0, 1 },
-  /*  1 */ { 0, 3 },
-  /*  2 */ { 1, 1 },
-  /*  3 */ { 1, 3 },
-  /*  4 */ { 2, 1 },
-  /*  5 */ { 2, 3 },
-  /*  6 */ { 3, 1 },
-  /*  7 */ { 3, 3 }, /* d = 1 */
-  /*  8 */ { 1, 0 },
-  /*  9 */ { 1, 2 },
-  /* 10 */ { 3, 0 },
-  /* 11 */ { 3, 2 },
-  /* 12 */ { 3, 3 }, /* d = 0 */
-};
-#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
-#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
-#endif /* WANT_ASSERT */
-
-const unsigned char jacobi_table[208] = {
-#include "jacobitab.h"
-};
-
-#define BITS_FAIL 31
-
-static void
-jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	     mp_srcptr qp, mp_size_t qn, int d)
-{
-  unsigned *bitsp = (unsigned *) p;
-
-  if (gp)
-    {
-      ASSERT (gn > 0);
-      if (gn != 1 || gp[0] != 1)
-	{
-	  *bitsp = BITS_FAIL;
-	  return;
-	}
-    }
-
-  if (qp)
-    {
-      ASSERT (qn > 0);
-      ASSERT (d >= 0);
-      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
-    }
-}
-
-#define CHOOSE_P(n) (2*(n) / 3)
-
-int
-mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
-{
-  mp_size_t scratch;
-  mp_size_t matrix_scratch;
-  mp_ptr tp;
-
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
-  ASSERT ( (bp[0] | ap[0]) & 1);
-
-  /* FIXME: Check for small sizes first, before setting up temporary
-     storage etc. */
-  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
-
-  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
-    {
-      mp_size_t hgcd_scratch;
-      mp_size_t update_scratch;
-      mp_size_t p = CHOOSE_P (n);
-      mp_size_t dc_scratch;
-
-      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
-      hgcd_scratch = mpn_hgcd_itch (n - p);
-      update_scratch = p + n - 1;
-
-      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
-      if (dc_scratch > scratch)
-	scratch = dc_scratch;
-    }
-
-  TMP_MARK;
-  tp = TMP_ALLOC_LIMBS(scratch);
-
-  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
-    {
-      struct hgcd_matrix M;
-      mp_size_t p = 2*n/3;
-      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
-      mp_size_t nn;
-      mpn_hgcd_matrix_init (&M, n - p, tp);
-
-      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
-			    tp + matrix_scratch);
-      if (nn > 0)
-	{
-	  ASSERT (M.n <= (n - p - 1)/2);
-	  ASSERT (M.n + p <= (p + n - 1) / 2);
-	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
-	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
-	}
-      else
-	{
-	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
-	  if (!n)
-	    {
-	      TMP_FREE;
-	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
-	    }
-	}
-    }
-
-  while (n > 2)
-    {
-      struct hgcd_matrix1 M;
-      mp_limb_t ah, al, bh, bl;
-      mp_limb_t mask;
-
-      mask = ap[n-1] | bp[n-1];
-      ASSERT (mask > 0);
-
-      if (mask & GMP_NUMB_HIGHBIT)
-	{
-	  ah = ap[n-1]; al = ap[n-2];
-	  bh = bp[n-1]; bl = bp[n-2];
-	}
-      else
-	{
-	  int shift;
-
-	  count_leading_zeros (shift, mask);
-	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-	}
-
-      /* Try an mpn_nhgcd2 step */
-      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
-	{
-	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
-	  MP_PTR_SWAP (ap, tp);
-	}
-      else
-	{
-	  /* mpn_hgcd2 has failed. Then either one of a or b is very
-	     small, or the difference is very small. Perform one
-	     subtraction followed by one division. */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
-	  if (!n)
-	    {
-	      TMP_FREE;
-	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
-	    }
-	}
-    }
-
-  if (bits >= 16)
-    MP_PTR_SWAP (ap, bp);
-
-  ASSERT (bp[0] & 1);
-
-  if (n == 1)
-    {
-      mp_limb_t al, bl;
-      al = ap[0];
-      bl = bp[0];
-
-      TMP_FREE;
-      if (bl == 1)
-	return 1 - 2*(bits & 1);
-      else
-	return mpn_jacobi_base (al, bl, bits << 1);
-    }
-
-  else
-    {
-      int res = mpn_jacobi_2 (ap, bp, bits & 1);
-      TMP_FREE;
-      return res;
-    }
-}
diff --git a/gmp/mpn/generic/jacobi_2.c b/gmp/mpn/generic/jacobi_2.c
deleted file mode 100644
index 9f480f7834..0000000000
--- a/gmp/mpn/generic/jacobi_2.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/* jacobi_2.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1996, 1998, 2000-2004, 2008, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef JACOBI_2_METHOD
-#define JACOBI_2_METHOD 2
-#endif
-
-/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
-   two-limb numbers. */
-#if JACOBI_2_METHOD == 1
-int
-mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
-{
-  mp_limb_t ah, al, bh, bl;
-  int c;
-
-  al = ap[0];
-  ah = ap[1];
-  bl = bp[0];
-  bh = bp[1];
-
-  ASSERT (bl & 1);
-
-  bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
-  bh >>= 1;
-
-  if ( (bh | bl) == 0)
-    return 1 - 2*(bit & 1);
-
-  if ( (ah | al) == 0)
-    return 0;
-
-  if (al == 0)
-    {
-      al = ah;
-      ah = 0;
-      bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
-    }
-  count_trailing_zeros (c, al);
-  bit ^= c & (bl ^ (bl >> 1));
-
-  c++;
-  if (UNLIKELY (c == GMP_NUMB_BITS))
-    {
-      al = ah;
-      ah = 0;
-    }
-  else
-    {
-      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-      ah >>= c;
-    }
-  while ( (ah | bh) > 0)
-    {
-      mp_limb_t th, tl;
-      mp_limb_t bgta;
-
-      sub_ddmmss (th, tl, ah, al, bh, bl);
-      if ( (tl | th) == 0)
-	return 0;
-
-      bgta = LIMB_HIGHBIT_TO_MASK (th);
-
-      /* If b > a, invoke reciprocity */
-      bit ^= (bgta & al & bl);
-
-      /* b <-- min (a, b) */
-      add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
-
-      if ( (bh | bl) == 0)
-	return 1 - 2*(bit & 1);
-
-      /* a <-- |a - b| */
-      al = (bgta ^ tl) - bgta;
-      ah = (bgta ^ th);
-
-      if (UNLIKELY (al == 0))
-	{
-	  /* If b > a, al == 0 implies that we have a carry to
-	     propagate. */
-	  al = ah - bgta;
-	  ah = 0;
-	  bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
-	}
-      count_trailing_zeros (c, al);
-      c++;
-      bit ^= c & (bl ^ (bl >> 1));
-
-      if (UNLIKELY (c == GMP_NUMB_BITS))
-	{
-	  al = ah;
-	  ah = 0;
-	}
-      else
-	{
-	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-	  ah >>= c;
-	}
-    }
-
-  ASSERT (bl > 0);
-
-  while ( (al | bl) & GMP_LIMB_HIGHBIT)
-    {
-      /* Need an extra comparison to get the mask. */
-      mp_limb_t t = al - bl;
-      mp_limb_t bgta = - (bl > al);
-
-      if (t == 0)
-	return 0;
-
-      /* If b > a, invoke reciprocity */
-      bit ^= (bgta & al & bl);
-
-      /* b <-- min (a, b) */
-      bl += (bgta & t);
-
-      /* a <-- |a - b| */
-      al = (t ^ bgta) - bgta;
-
-      /* Number of trailing zeros is the same no matter if we look at
-       * t or a, but using t gives more parallelism. */
-      count_trailing_zeros (c, t);
-      c ++;
-      /* (2/b) = -1 if b = 3 or 5 mod 8 */
-      bit ^= c & (bl ^ (bl >> 1));
-
-      if (UNLIKELY (c == GMP_NUMB_BITS))
-	return 1 - 2*(bit & 1);
-
-      al >>= c;
-    }
-
-  /* Here we have a little impedance mismatch. Better to inline it? */
-  return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
-}
-#elif JACOBI_2_METHOD == 2
-int
-mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
-{
-  mp_limb_t ah, al, bh, bl;
-  int c;
-
-  al = ap[0];
-  ah = ap[1];
-  bl = bp[0];
-  bh = bp[1];
-
-  ASSERT (bl & 1);
-
-  /* Use bit 1. */
-  bit <<= 1;
-
-  if (bh == 0 && bl == 1)
-    /* (a|1) = 1 */
-    return 1 - (bit & 2);
-
-  if (al == 0)
-    {
-      if (ah == 0)
-	/* (0|b) = 0, b > 1 */
-	return 0;
-
-      count_trailing_zeros (c, ah);
-      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
-
-      al = bl;
-      bl = ah >> c;
-
-      if (bl == 1)
-	/* (1|b) = 1 */
-	return 1 - (bit & 2);
-
-      ah = bh;
-
-      bit ^= al & bl;
-
-      goto b_reduced;
-    }
-  if ( (al & 1) == 0)
-    {
-      count_trailing_zeros (c, al);
-
-      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-      ah >>= c;
-      bit ^= (c << 1) & (bl ^ (bl >> 1));
-    }
-  if (ah == 0)
-    {
-      if (bh > 0)
-	{
-	  bit ^= al & bl;
-	  MP_LIMB_T_SWAP (al, bl);
-	  ah = bh;
-	  goto b_reduced;
-	}
-      goto ab_reduced;
-    }
-
-  while (bh > 0)
-    {
-      /* Compute (a|b) */
-      while (ah > bh)
-	{
-	  sub_ddmmss (ah, al, ah, al, bh, bl);
-	  if (al == 0)
-	    {
-	      count_trailing_zeros (c, ah);
-	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
-
-	      al = bl;
-	      bl = ah >> c;
-	      ah = bh;
-
-	      bit ^= al & bl;
-	      goto b_reduced;
-	    }
-	  count_trailing_zeros (c, al);
-	  bit ^= (c << 1) & (bl ^ (bl >> 1));
-	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-	  ah >>= c;
-	}
-      if (ah == bh)
-	goto cancel_hi;
-
-      if (ah == 0)
-	{
-	  bit ^= al & bl;
-	  MP_LIMB_T_SWAP (al, bl);
-	  ah = bh;
-	  break;
-	}
-
-      bit ^= al & bl;
-
-      /* Compute (b|a) */
-      while (bh > ah)
-	{
-	  sub_ddmmss (bh, bl, bh, bl, ah, al);
-	  if (bl == 0)
-	    {
-	      count_trailing_zeros (c, bh);
-	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
-
-	      bl = bh >> c;
-	      bit ^= al & bl;
-	      goto b_reduced;
-	    }
-	  count_trailing_zeros (c, bl);
-	  bit ^= (c << 1) & (al ^ (al >> 1));
-	  bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
-	  bh >>= c;
-	}
-      bit ^= al & bl;
-
-      /* Compute (a|b) */
-      if (ah == bh)
-	{
-	cancel_hi:
-	  if (al < bl)
-	    {
-	      MP_LIMB_T_SWAP (al, bl);
-	      bit ^= al & bl;
-	    }
-	  al -= bl;
-	  if (al == 0)
-	    return 0;
-
-	  count_trailing_zeros (c, al);
-	  bit ^= (c << 1) & (bl ^ (bl >> 1));
-	  al >>= c;
-
-	  if (al == 1)
-	    return 1 - (bit & 2);
-
-	  MP_LIMB_T_SWAP (al, bl);
-	  bit ^= al & bl;
-	  break;
-	}
-    }
-
- b_reduced:
-  /* Compute (a|b), with b a single limb. */
-  ASSERT (bl & 1);
-
-  if (bl == 1)
-    /* (a|1) = 1 */
-    return 1 - (bit & 2);
-
-  while (ah > 0)
-    {
-      ah -= (al < bl);
-      al -= bl;
-      if (al == 0)
-	{
-	  if (ah == 0)
-	    return 0;
-	  count_trailing_zeros (c, ah);
-	  bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
-	  al = ah >> c;
-	  goto ab_reduced;
-	}
-      count_trailing_zeros (c, al);
-
-      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-      ah >>= c;
-      bit ^= (c << 1) & (bl ^ (bl >> 1));
-    }
- ab_reduced:
-  ASSERT (bl & 1);
-  ASSERT (bl > 1);
-
-  return mpn_jacobi_base (al, bl, bit);
-}
-#else
-#error Unsupported value for JACOBI_2_METHOD
-#endif
diff --git a/gmp/mpn/generic/logops_n.c b/gmp/mpn/generic/logops_n.c
deleted file mode 100644
index 1b534ff4ba..0000000000
--- a/gmp/mpn/generic/logops_n.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#ifdef OPERATION_and_n
-#define func __MPN(and_n)
-#define call mpn_and_n
-#endif
-
-#ifdef OPERATION_andn_n
-#define func __MPN(andn_n)
-#define call mpn_andn_n
-#endif
-
-#ifdef OPERATION_nand_n
-#define func __MPN(nand_n)
-#define call mpn_nand_n
-#endif
-
-#ifdef OPERATION_ior_n
-#define func __MPN(ior_n)
-#define call mpn_ior_n
-#endif
-
-#ifdef OPERATION_iorn_n
-#define func __MPN(iorn_n)
-#define call mpn_iorn_n
-#endif
-
-#ifdef OPERATION_nior_n
-#define func __MPN(nior_n)
-#define call mpn_nior_n
-#endif
-
-#ifdef OPERATION_xor_n
-#define func __MPN(xor_n)
-#define call mpn_xor_n
-#endif
-
-#ifdef OPERATION_xnor_n
-#define func __MPN(xnor_n)
-#define call mpn_xnor_n
-#endif
-
-void
-func (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  call (rp, up, vp, n);
-}
diff --git a/gmp/mpn/generic/lshift.c b/gmp/mpn/generic/lshift.c
index 5182632976..fdc7e4423e 100644
--- a/gmp/mpn/generic/lshift.c
+++ b/gmp/mpn/generic/lshift.c
@@ -1,32 +1,22 @@
 /* mpn_lshift -- Shift left low level.
 
-Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/lshiftc.c b/gmp/mpn/generic/lshiftc.c
deleted file mode 100644
index e8051b7b93..0000000000
--- a/gmp/mpn/generic/lshiftc.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* mpn_lshiftc -- Shift left low level with complement.
-
-Copyright 1991, 1993, 1994, 1996, 2000-2002, 2009 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Shift U (pointed to by up and n limbs long) cnt bits to the left
-   and store the n least significant limbs of the result at rp.
-   Return the bits shifted out from the most significant limb.
-
-   Argument constraints:
-   1. 0 < cnt < GMP_NUMB_BITS.
-   2. If the result is to be written over the input, rp must be >= up.
-*/
-
-mp_limb_t
-mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
-  mp_limb_t high_limb, low_limb;
-  unsigned int tnc;
-  mp_size_t i;
-  mp_limb_t retval;
-
-  ASSERT (n >= 1);
-  ASSERT (cnt >= 1);
-  ASSERT (cnt < GMP_NUMB_BITS);
-  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
-
-  up += n;
-  rp += n;
-
-  tnc = GMP_NUMB_BITS - cnt;
-  low_limb = *--up;
-  retval = low_limb >> tnc;
-  high_limb = (low_limb << cnt);
-
-  for (i = n - 1; i != 0; i--)
-    {
-      low_limb = *--up;
-      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
-      high_limb = low_limb << cnt;
-    }
-  *--rp = (~high_limb) & GMP_NUMB_MASK;
-
-  return retval;
-}
diff --git a/gmp/mpn/generic/matrix22_mul.c b/gmp/mpn/generic/matrix22_mul.c
index 59531eb1b2..f979385d9d 100644
--- a/gmp/mpn/generic/matrix22_mul.c
+++ b/gmp/mpn/generic/matrix22_mul.c
@@ -1,38 +1,25 @@
 /* matrix22_mul.c.
 
-   Contributed by Niels Möller and Marco Bodrato.
-
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2003-2005, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -83,198 +70,143 @@ mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
       || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
     return 3*rn + 2*mn;
   else
-    return 3*(rn + mn) + 5;
+    return 4*(rn + mn) + 5;
 }
 
 /* Algorithm:
 
     / s0 \   /  1  0  0  0 \ / r0 \
-    | s1 |   |  0  1  0  1 | | r1 |
-    | s2 |   |  0  0 -1  1 | | r2 |
-    | s3 | = |  0  1 -1  1 | \ r3 /
-    | s4 |   | -1  1 -1  1 |
-    | s5 |   |  0  1  0  0 |
-    \ s6 /   \  0  0  1  0 /
+    | s1 |   |  0  1  0  0 | | r1 |
+    | s2 |   |  0  0  1  1 | | r2 |
+    | s3 | = | -1  0  1  1 | \ r3 /
+    | s4 |   |  1  0 -1  0 |
+    | s5 |   |  1  1 -1 -1 |
+    \ s6 /   \  0  0  0  1 /
 
     / t0 \   /  1  0  0  0 \ / m0 \
-    | t1 |   |  0  1  0  1 | | m1 |
-    | t2 |   |  0  0 -1  1 | | m2 |
-    | t3 | = |  0  1 -1  1 | \ m3 /
-    | t4 |   | -1  1 -1  1 |
-    | t5 |   |  0  1  0  0 |
-    \ t6 /   \  0  0  1  0 /
-
-  Note: the two matrices above are the same, but s_i and t_i are used
-  in the same product, only for i<4, see "A Strassen-like Matrix
-  Multiplication suited for squaring and higher power computation" by
-  M. Bodrato, in Proceedings of ISSAC 2010.
-
-    / r0 \   / 1 0  0  0  0  1  0 \ / s0*t0 \
-    | r1 | = | 0 0 -1  1 -1  1  0 | | s1*t1 |
-    | r2 |   | 0 1  0 -1  0 -1 -1 | | s2*t2 |
-    \ r3 /   \ 0 1  1 -1  0 -1  0 / | s3*t3 |
-				    | s4*t5 |
-				    | s5*t6 |
-				    \ s6*t4 /
-
-  The scheduling uses two temporaries U0 and U1 to store products, and
-  two, S0 and T0, to store combinations of entries of the two
-  operands.
+    | t1 |   |  0  0  1  0 | | m1 |
+    | t2 |   | -1  1  0  0 | | m2 |
+    | t3 | = |  1 -1  0  1 | \ m3 /
+    | t4 |   |  0 -1  0  1 |
+    | t5 |   |  0  0  0  1 |
+    \ t6 /   \ -1  1  1 -1 /
+
+    / r0 \   / 1 1 0 0 0 0 0 \ / s0 * t0 \
+    | r1 | = | 1 0 1 1 0 1 0 | | s1 * t1 |
+    | r2 |   | 1 0 0 1 1 0 1 | | s2 * t2 |
+    \ r3 /   \ 1 0 1 1 1 0 0 / | s3 * t3 |
+			       | s4 * t4 |
+			       | s5 * t5 |
+			       \ s6 * t6 /
 */
 
 /* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
  *
  * Resulting elements are of size up to rn + mn + 1.
  *
- * Temporary storage: 3 rn + 3 mn + 5. */
+ * Temporary storage: 4 rn + 4 mn + 5. */
 void
 mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
 			   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
 			   mp_ptr tp)
 {
-  mp_ptr s0, t0, u0, u1;
-  int r1s, r3s, s0s, t0s, u1s;
-  s0 = tp; tp += rn + 1;
-  t0 = tp; tp += mn + 1;
+  mp_ptr s2, s3, t2, t3, u0, u1;
+  int r2s, r3s, s3s, t2s, t3s, u0s, u1s;
+  s2 = tp; tp += rn;
+  s3 = tp; tp += rn + 1;
+  t2 = tp; tp += mn;
+  t3 = tp; tp += mn + 1;
   u0 = tp; tp += rn + mn + 1;
   u1 = tp; /* rn + mn + 2 */
 
-  MUL (u0, r1, rn, m2, mn);		/* u5 = s5 * t6 */
-  r3s = abs_sub_n (r3, r3, r2, rn);	/* r3 - r2 */
-  if (r3s)
-    {
-      r1s = abs_sub_n (r1, r1, r3, rn);
-      r1[rn] = 0;
-    }
-  else
-    {
-      r1[rn] = mpn_add_n (r1, r1, r3, rn);
-      r1s = 0;				/* r1 - r2 + r3  */
-    }
-  if (r1s)
-    {
-      s0[rn] = mpn_add_n (s0, r1, r0, rn);
-      s0s = 0;
-    }
-  else if (r1[rn] != 0)
-    {
-      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
-      s0s = 1;				/* s4 = -r0 + r1 - r2 + r3 */
-					/* Reverse sign! */
-    }
-  else
-    {
-      s0s = abs_sub_n (s0, r0, r1, rn);
-      s0[rn] = 0;
-    }
-  MUL (u1, r0, rn, m0, mn);		/* u0 = s0 * t0 */
-  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
-  ASSERT (r0[rn+mn] < 2);		/* u0 + u5 */
+  MUL (u0, r0, rn, m0, mn); /* 0 */
+  MUL (u1, r1, rn, m2, mn); /* 1 */
 
-  t0s = abs_sub_n (t0, m3, m2, mn);
-  u1s = r3s^t0s^1;			/* Reverse sign! */
-  MUL (u1, r3, rn, t0, mn);		/* u2 = s2 * t2 */
-  u1[rn+mn] = 0;
-  if (t0s)
-    {
-      t0s = abs_sub_n (t0, m1, t0, mn);
-      t0[mn] = 0;
-    }
-  else
-    {
-      t0[mn] = mpn_add_n (t0, t0, m1, mn);
-    }
+  MPN_COPY (s2, r3, rn);
 
-  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
-     at r3. I'd expect that for matrices of random size, the high
-     words t0[mn] and r1[rn] are non-zero with a pretty small
-     probability. If that can be confirmed this should be done as an
-     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
-     add_n. */
-  if (t0[mn] != 0)
+  r3[rn] = mpn_add_n (r3, r3, r2, rn);
+  r0[rn] = 0;
+  s3s = abs_sub_n (s3, r3, r0, rn + 1);
+  t2s = abs_sub_n (t2, m1, m0, mn);
+  if (t2s)
     {
-      MUL (r3, r1, rn, t0, mn + 1);	/* u3 = s3 * t3 */
-      ASSERT (r1[rn] < 2);
-      if (r1[rn] != 0)
-	mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
+      t3[mn] = mpn_add_n (t3, m3, t2, mn);
+      t3s = 0;
     }
   else
     {
-      MUL (r3, r1, rn + 1, t0, mn);
+      t3s = abs_sub_n (t3, m3, t2, mn);
+      t3[mn] = 0;
     }
 
-  ASSERT (r3[rn+mn] < 4);
+  r2s = abs_sub_n (r2, r0, r2, rn);
+  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
 
-  u0[rn+mn] = 0;
-  if (r1s^t0s)
+  MUL(u1, s3, rn+1, t3, mn+1); /* 3 */
+  u1s = s3s ^ t3s;
+  ASSERT (u1[rn+mn+1] == 0);
+  ASSERT (u1[rn+mn] < 4);
+
+  if (u1s)
     {
-      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
+      u0[rn+mn] = 0;
+      u0s = abs_sub_n (u0, u0, u1, rn + mn + 1);
     }
   else
     {
-      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
-      r3s = 0;				/* u3 + u5 */
+      u0[rn+mn] = u1[rn+mn] + mpn_add_n (u0, u0, u1, rn + mn);
+      u0s = 0;
     }
+  MUL(u1, r3, rn + 1, t2, mn); /* 2 */
+  u1s = t2s;
+  ASSERT (u1[rn+mn] < 2);
 
-  if (t0s)
-    {
-      t0[mn] = mpn_add_n (t0, t0, m0, mn);
-    }
-  else if (t0[mn] != 0)
-    {
-      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
-    }
-  else
+  u1s = add_signed_n (u1, u0, u0s, u1, u1s, rn + mn + 1);
+
+  t2s = abs_sub_n (t2, m3, m1, mn);
+  if (s3s)
     {
-      t0s = abs_sub_n (t0, t0, m0, mn);
+      s3[rn] += mpn_add_n (s3, s3, r1, rn);
+      s3s = 0;
     }
-  MUL (u0, r2, rn, t0, mn + 1);		/* u6 = s6 * t4 */
-  ASSERT (u0[rn+mn] < 2);
-  if (r1s)
+  else if (s3[rn] > 0)
     {
-      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
+      s3[rn] -= mpn_sub_n (s3, s3, r1, rn);
+      s3s = 1;
     }
   else
     {
-      r1[rn] += mpn_add_n (r1, r1, r2, rn);
-    }
-  rn++;
-  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
-					/* u3 + u5 + u6 */
-  ASSERT (r2[rn+mn-1] < 4);
-  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
-					/* -u2 + u3 + u5  */
-  ASSERT (r3[rn+mn-1] < 3);
-  MUL (u0, s0, rn, m1, mn);		/* u4 = s4 * t5 */
-  ASSERT (u0[rn+mn-1] < 2);
-  t0[mn] = mpn_add_n (t0, m3, m1, mn);
-  MUL (u1, r1, rn, t0, mn + 1);		/* u1 = s1 * t1 */
-  mn += rn;
-  ASSERT (u1[mn-1] < 4);
-  ASSERT (u1[mn] == 0);
-  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
-					/* -u2 + u3 - u4 + u5  */
-  ASSERT (r1[mn-1] < 2);
-  if (r3s)
-    {
-      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
+      s3s = abs_sub_n (s3, r1, s3, rn);
     }
-  else
+  MUL (r1, s3, rn+1, m3, mn); /* 5 */
+  ASSERT_NOCARRY(add_signed_n (r1, r1, s3s, u1, u1s, rn + mn + 1));
+  ASSERT (r1[rn + mn] < 2);
+
+  MUL (r3, r2, rn, t2, mn); /* 4 */
+  r3s = r2s ^ t2s;
+  r3[rn + mn] = 0;
+  u0s = add_signed_n (u0, u0, u0s, r3, r3s, rn + mn + 1);
+  ASSERT_NOCARRY (add_signed_n (r3, r3, r3s, u1, u1s, rn + mn + 1));
+  ASSERT (r3[rn + mn] < 2);
+
+  if (t3s)
     {
-      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
-					/* u1 + u2 - u3 - u5  */
+      t3[mn] += mpn_add_n (t3, m2, t3, mn);
+      t3s = 0;
     }
-  ASSERT (r3[mn-1] < 2);
-  if (t0s)
+  else if (t3[mn] > 0)
     {
-      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
+      t3[mn] -= mpn_sub_n (t3, t3, m2, mn);
+      t3s = 1;
     }
   else
     {
-      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
-					/* u1 - u3 - u5 - u6  */
+      t3s = abs_sub_n (t3, m2, t3, mn);
     }
-  ASSERT (r2[mn-1] < 2);
+  MUL (r2, s2, rn, t3, mn + 1); /* 6 */
+
+  ASSERT_NOCARRY (add_signed_n (r2, r2, t3s, u0, u0s, rn + mn + 1));
+  ASSERT (r2[rn + mn] < 2);
 }
 
 void
diff --git a/gmp/mpn/generic/matrix22_mul1_inverse_vector.c b/gmp/mpn/generic/matrix22_mul1_inverse_vector.c
deleted file mode 100644
index 83b2fb5134..0000000000
--- a/gmp/mpn/generic/matrix22_mul1_inverse_vector.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* matrix22_mul1_inverse_vector.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2008, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
-   the left. Uses three buffers, to avoid a copy. */
-mp_size_t
-mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
-				  mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
-{
-  mp_limb_t h0, h1;
-
-  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
-
-     r  = u11 * a
-     r -= u01 * b
-     b *= u00
-     b -= u10 * a
-  */
-
-  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
-  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
-  ASSERT (h0 == h1);
-
-  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
-  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
-  ASSERT (h0 == h1);
-
-  n -= (rp[n-1] | bp[n-1]) == 0;
-  return n;
-}
diff --git a/gmp/mpn/generic/mod_1.c b/gmp/mpn/generic/mod_1.c
index 0212020201..7c892814e1 100644
--- a/gmp/mpn/generic/mod_1.c
+++ b/gmp/mpn/generic/mod_1.c
@@ -3,34 +3,23 @@
    Return the single-limb remainder.
    There are no constraints on the value of the divisor.
 
-Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -53,43 +42,18 @@ see https://www.gnu.org/licenses/.  */
 #define MOD_1_UNNORM_THRESHOLD  0
 #endif
 
-#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
-#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#ifndef MOD_1_1_THRESHOLD
+#define MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
 #endif
 
-#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
-#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#ifndef MOD_1_2_THRESHOLD
+#define MOD_1_2_THRESHOLD  10
 #endif
 
-#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
+#ifndef MOD_1_4_THRESHOLD
+#define MOD_1_4_THRESHOLD  120
 #endif
 
-#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
-#endif
-
-#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
-/* Duplicates declarations in tune/speed.h */
-mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-
-void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
-void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
-
-#undef mpn_mod_1_1p
-#define mpn_mod_1_1p(ap, n, b, pre)			     \
-  (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
-   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
-      : __gmpn_mod_1_1p (ap, n, b, pre)))
-
-#undef mpn_mod_1_1p_cps
-#define mpn_mod_1_1p_cps(pre, b)				\
-  (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
-   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
-      : __gmpn_mod_1_1p_cps (pre, b)))
-#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
-
 
 /* The comments in mpn/generic/divrem_1.c apply here too.
 
@@ -150,12 +114,12 @@ mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
   if (UDIV_NEEDS_NORMALIZATION
       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
     {
-      mp_limb_t nshift;
       for (i = un - 2; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-	  udiv_qrnnd (dummy, r, r, nshift, d);
+	  udiv_qrnnd (dummy, r, r,
+		      (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
+		      d);
 	  r >>= GMP_NAIL_BITS;
 	  n1 = n0;
 	}
@@ -165,18 +129,19 @@ mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
     }
   else
     {
-      mp_limb_t inv, nshift;
+      mp_limb_t inv;
       invert_limb (inv, d);
 
       for (i = un - 2; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-	  udiv_rnnd_preinv (r, r, nshift, d, inv);
+	  udiv_qrnnd_preinv (dummy, r, r,
+			     (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
+			     d, inv);
 	  r >>= GMP_NAIL_BITS;
 	  n1 = n0;
 	}
-      udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
+      udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
       r >>= GMP_NAIL_BITS;
       return r >> cnt;
     }
@@ -222,7 +187,7 @@ mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
       for (i = un - 1; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  udiv_rnnd_preinv (r, r, n0, d, inv);
+	  udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
 	  r >>= GMP_NAIL_BITS;
 	}
       return r;
@@ -242,40 +207,29 @@ mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
 
   if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
     {
-      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
-	{
-	  return mpn_mod_1_norm (ap, n, b);
-	}
-      else
-	{
-	  mp_limb_t pre[4];
-	  mpn_mod_1_1p_cps (pre, b);
-	  return mpn_mod_1_1p (ap, n, b, pre);
-	}
+      /* The functions below do not handle this large divisor.  */
+      return mpn_mod_1_norm (ap, n, b);
+    }
+  else if (BELOW_THRESHOLD (n, MOD_1_1_THRESHOLD))
+    {
+      return mpn_mod_1_unnorm (ap, n, b);
+    }
+  else if (BELOW_THRESHOLD (n, MOD_1_2_THRESHOLD))
+    {
+      mp_limb_t pre[4];
+      mpn_mod_1s_1p_cps (pre, b);
+      return mpn_mod_1s_1p (ap, n, b << pre[1], pre);
+    }
+  else if (BELOW_THRESHOLD (n, MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+    {
+      mp_limb_t pre[5];
+      mpn_mod_1s_2p_cps (pre, b);
+      return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
     }
   else
     {
-      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
-	{
-	  return mpn_mod_1_unnorm (ap, n, b);
-	}
-      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
-	{
-	  mp_limb_t pre[4];
-	  mpn_mod_1_1p_cps (pre, b);
-	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
-	}
-      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
-	{
-	  mp_limb_t pre[5];
-	  mpn_mod_1s_2p_cps (pre, b);
-	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
-	}
-      else
-	{
-	  mp_limb_t pre[7];
-	  mpn_mod_1s_4p_cps (pre, b);
-	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
-	}
+      mp_limb_t pre[7];
+      mpn_mod_1s_4p_cps (pre, b);
+      return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
     }
 }
diff --git a/gmp/mpn/generic/mod_1_1.c b/gmp/mpn/generic/mod_1_1.c
index 2e111399ed..27c7f8f1b6 100644
--- a/gmp/mpn/generic/mod_1_1.c
+++ b/gmp/mpn/generic/mod_1_1.c
@@ -1,208 +1,74 @@
-/* mpn_mod_1_1p (ap, n, b, cps)
+/* mpn_mod_1s_1p (ap, n, b, cps)
    Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 2.
 
-   Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
-   Based on a suggestion by Peter L. Montgomery.
+   Contributed to the GNU project by Torbjorn Granlund.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2011, 2013 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#ifndef MOD_1_1P_METHOD
-# define MOD_1_1P_METHOD 1    /* need to make sure this is 2 for asm testing */
-#endif
-
-/* Define some longlong.h-style macros, but for wider operations.
- * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
- * carry out, in the form of a mask. */
-
-#if defined (__GNUC__)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %k2\n\t"					\
-	     "adc	%4, %k1\n\t"					\
-	     "sbb	%k0, %k0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %q2\n\t"					\
-	     "adc	%4, %q1\n\t"					\
-	     "sbb	%q0, %q0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxcc	%r3, %4, %1\n\t"				\
-	     "subx	%%g0, %%g0, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addccc	%r7, %8, %%g0\n\t"				\
-	     "addccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
-	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
-	 __CLOBBER_CC)
-#if __VIS__ >= 0x300
-#undef add_mssaaaa
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
-	     "adde	%1, %3, %4\n\t"					\
-	     "subfe	%0, %0, %0\n\t"					\
-	     "nor	%0, %0, %0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#if defined (__s390x__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "algr	%2, %6\n\t"					\
-	     "alcgr	%1, %4\n\t"					\
-	     "lghi	%0, 0\n\t"					\
-	     "alcgr	%0, %0\n\t"					\
-	     "lcgr	%0, %0"						\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
-	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
-#endif
-
-#if defined (__arm__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "adds	%2, %5, %6\n\t"					\
-	     "adcs	%1, %3, %4\n\t"					\
-	     "movcc	%0, #0\n\t"					\
-	     "movcs	%0, #-1"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
-#endif
-#endif /* defined (__GNUC__) */
-
-#ifndef add_mssaaaa
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (m) = - (__c1 + (__s1 < __c0));					\
-  } while (0)
-#endif
-
-#if MOD_1_1P_METHOD == 1
 void
-mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+mpn_mod_1s_1p_cps (mp_limb_t cps[4], mp_limb_t b)
 {
   mp_limb_t bi;
   mp_limb_t B1modb, B2modb;
   int cnt;
 
+  ASSERT (b <= GMP_NUMB_MAX / 2);
+
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
-  B1modb = -b;
-  if (LIKELY (cnt != 0))
-    B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+
+  B1modb >>= cnt;
+  B2modb >>= cnt;
 
-  /* In the normalized case, this can be simplified to
-   *
-   *   B2modb = - b * bi;
-   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
-   */
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
-  cps[3] = B2modb >> cnt;
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb;
+  cps[3] = B2modb;
 }
 
 mp_limb_t
-mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+mpn_mod_1s_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
 {
-  mp_limb_t rh, rl, bi, ph, pl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, r;
   mp_limb_t B1modb, B2modb;
   mp_size_t i;
   int cnt;
-  mp_limb_t mask;
-
-  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
 
   B1modb = bmodb[2];
   B2modb = bmodb[3];
 
-  rl = ap[n - 1];
-  umul_ppmm (ph, pl, rl, B1modb);
-  add_ssaaaa (rh, rl, ph, pl, CNST_LIMB(0), ap[n - 2]);
+  umul_ppmm (ph, pl, ap[n - 1], B1modb);
+  add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
 
   for (i = n - 3; i >= 0; i -= 1)
     {
@@ -211,122 +77,28 @@ mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
 	    + HI(rr)  * (B^2 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, rl, B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i]);
 
       umul_ppmm (rh, rl, rh, B2modb);
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  cnt = bmodb[1];
   bi = bmodb[0];
-
-  if (LIKELY (cnt != 0))
-    rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-
-  mask = -(mp_limb_t) (rh >= b);
-  rh -= mask & b;
-
-  udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
-
-  return r >> cnt;
-}
-#endif /* MOD_1_1P_METHOD == 1 */
-
-#if MOD_1_1P_METHOD == 2
-void
-mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
-{
-  mp_limb_t bi;
-  mp_limb_t B2modb;
-  int cnt;
-
-  count_leading_zeros (cnt, b);
-
-  b <<= cnt;
-  invert_limb (bi, b);
-
-  cps[0] = bi;
-  cps[1] = cnt;
-
-  if (LIKELY (cnt != 0))
-    {
-      mp_limb_t B1modb = -b;
-      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-      ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-      cps[2] = B1modb >> cnt;
-    }
-  B2modb = - b * bi;
-  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
-  cps[3] = B2modb;
-}
-
-mp_limb_t
-mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
-{
-  int cnt;
-  mp_limb_t bi, B1modb;
-  mp_limb_t r0, r1;
-  mp_limb_t r;
-
-  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
-
-  r0 = ap[n-2];
-  r1 = ap[n-1];
-
-  if (n > 2)
-    {
-      mp_limb_t B2modb, B2mb;
-      mp_limb_t p0, p1;
-      mp_limb_t r2;
-      mp_size_t j;
-
-      B2modb = bmodb[3];
-      B2mb = B2modb - b;
-
-      umul_ppmm (p1, p0, r1, B2modb);
-      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
-
-      for (j = n-4; j >= 0; j--)
-	{
-	  mp_limb_t cy;
-	  /* mp_limb_t t = r0 + B2mb; */
-	  umul_ppmm (p1, p0, r1, B2modb);
-
-	  ADDC_LIMB (cy, r0, r0, r2 & B2modb);
-	  /* Alternative, for cmov: if (cy) r0 = t; */
-	  r0 -= (-cy) & b;
-	  add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
-	}
-
-      r1 -= (r2 & b);
-    }
-
   cnt = bmodb[1];
+#if 1
+  {
+    mp_limb_t mask;
+    r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+    mask = -(mp_limb_t) (r >= b);
+    r -= mask & b;
+  }
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 1);	/* optimize for small quotient? */
+#endif
 
-  if (LIKELY (cnt != 0))
-    {
-      mp_limb_t t;
-      mp_limb_t B1modb = bmodb[2];
-
-      umul_ppmm (r1, t, r1, B1modb);
-      r0 += t;
-      r1 += (r0 < t);
-
-      /* Normalize */
-      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
-      r0 <<= cnt;
-
-      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows that. */
-    }
-  else
-    {
-      mp_limb_t mask = -(mp_limb_t) (r1 >= b);
-      r1 -= mask & b;
-    }
-
-  bi = bmodb[0];
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
-  udiv_rnnd_preinv (r, r1, r0, b, bi);
   return r >> cnt;
 }
-#endif /* MOD_1_1P_METHOD == 2 */
diff --git a/gmp/mpn/generic/mod_1_2.c b/gmp/mpn/generic/mod_1_2.c
index 7acf3dbdd1..ffadd536de 100644
--- a/gmp/mpn/generic/mod_1_2.c
+++ b/gmp/mpn/generic/mod_1_2.c
@@ -3,39 +3,27 @@
    Requires that b < B / 2.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2010 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,75 +36,49 @@ mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
   mp_limb_t B1modb, B2modb, B3modb;
   int cnt;
 
-  ASSERT (b <= (~(mp_limb_t) 0) / 2);
+  ASSERT (b <= GMP_NUMB_MAX / 2);
 
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
 
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
   cps[4] = B3modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 4; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
 }
 
 mp_limb_t
-mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
+mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
 {
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
   mp_limb_t B1modb, B2modb, B3modb;
   mp_size_t i;
   int cnt;
 
-  ASSERT (n >= 1);
-
   B1modb = cps[2];
   B2modb = cps[3];
   B3modb = cps[4];
 
   if ((n & 1) != 0)
     {
-      if (n == 1)
-	{
-	  rl = ap[n - 1];
-	  bi = cps[0];
-	  cnt = cps[1];
-	  udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
-			     rl << cnt, b, bi);
-	  return r >> cnt;
-	}
-
-      umul_ppmm (ph, pl, ap[n - 2], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
       umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
       n--;
     }
   else
     {
-      rh = ap[n - 1];
-      rl = ap[n - 2];
+      umul_ppmm (rh, rl, ap[n - 1], B1modb);
+      add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
     }
 
   for (i = n - 4; i >= 0; i -= 2)
@@ -127,7 +89,7 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
 	    + HI(rr)  * (B^3 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, ap[i + 1], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
 
       umul_ppmm (ch, cl, rl, B2modb);
       add_ssaaaa (ph, pl, ph, pl, ch, cl);
@@ -136,14 +98,20 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  umul_ppmm (rh, cl, rh, B1modb);
-  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
-
-  cnt = cps[1];
   bi = cps[0];
+  cnt = cps[1];
 
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 2);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
 }
diff --git a/gmp/mpn/generic/mod_1_3.c b/gmp/mpn/generic/mod_1_3.c
index f4137f4315..77989fc0ae 100644
--- a/gmp/mpn/generic/mod_1_3.c
+++ b/gmp/mpn/generic/mod_1_3.c
@@ -3,39 +3,27 @@
    Requires that d < B / 3.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2010, 2013 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,82 +36,46 @@ mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
   mp_limb_t B1modb, B2modb, B3modb, B4modb;
   int cnt;
 
-  ASSERT (b <= (~(mp_limb_t) 0) / 3);
+  ASSERT (b <= GMP_NUMB_MAX / 3);
 
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
 
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
   cps[4] = B3modb >> cnt;
-
-  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
   cps[5] = B4modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 5; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
 }
 
 mp_limb_t
-mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
+mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
 {
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
   mp_limb_t B1modb, B2modb, B3modb, B4modb;
   mp_size_t i;
   int cnt;
 
-  ASSERT (n >= 1);
-
   B1modb = cps[2];
   B2modb = cps[3];
   B3modb = cps[4];
   B4modb = cps[5];
 
-  /* We compute n mod 3 in a tricky way, which works except for when n is so
-     close to the maximum size that we don't need to support it.  The final
-     cast to int is a workaround for HP cc.  */
-  switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
-    {
-    case 0:
-      umul_ppmm (ph, pl, ap[n - 2], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
-      umul_ppmm (rh, rl, ap[n - 1], B2modb);
-      add_ssaaaa (rh, rl, rh, rl, ph, pl);
-      n -= 3;
-      break;
-    case 2:	/* n mod 3 = 1 */
-      rh = 0;
-      rl = ap[n - 1];
-      n -= 1;
-      break;
-    case 1:	/* n mod 3 = 2 */
-      rh = ap[n - 1];
-      rl = ap[n - 2];
-      n -= 2;
-      break;
-    }
+  umul_ppmm (ph, pl, ap[n - 2], B1modb);
+  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+  umul_ppmm (ch, cl, ap[n - 1], B2modb);
+  add_ssaaaa (rh, rl, ph, pl, ch, cl);
 
-  for (i = n - 3; i >= 0; i -= 3)
+  for (i = n - 6; i >= 0; i -= 3)
     {
       /* rr = ap[i]				< B
 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
@@ -132,7 +84,7 @@ mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
 	    + HI(rr)  * (B^4 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, ap[i + 1], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
 
       umul_ppmm (ch, cl, ap[i + 2], B2modb);
       add_ssaaaa (ph, pl, ph, pl, ch, cl);
@@ -144,14 +96,35 @@ mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  umul_ppmm (rh, cl, rh, B1modb);
-  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+  if (i >= -2)
+    {
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]);
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      if (i >= -1)
+	{
+	  umul_ppmm (ph, pl, rl, B1modb);
+	  add_ssaaaa (ph, pl, ph, pl, 0, ap[0]);
+	  umul_ppmm (rh, rl, rh, B2modb);
+	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	}
+    }
 
-  cnt = cps[1];
   bi = cps[0];
+  cnt = cps[1];
 
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 3);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
 }
diff --git a/gmp/mpn/generic/mod_1_4.c b/gmp/mpn/generic/mod_1_4.c
index 716a0c66de..74893386a9 100644
--- a/gmp/mpn/generic/mod_1_4.c
+++ b/gmp/mpn/generic/mod_1_4.c
@@ -1,41 +1,29 @@
-/* mpn_mod_1s_4p (ap, n, b, cps)
+/* mpn_mod_1s_3p (ap, n, b, cps)
    Divide (ap,,n) by b.  Return the single-limb remainder.
    Requires that d < B / 4.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2010 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,92 +36,53 @@ mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
   mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
   int cnt;
 
-  ASSERT (b <= (~(mp_limb_t) 0) / 4);
+  ASSERT (b <= GMP_NUMB_MAX / 4);
 
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
+  udiv_rnd_preinv (B5modb, B4modb, b, bi);
 
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
   cps[4] = B3modb >> cnt;
-
-  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
   cps[5] = B4modb >> cnt;
-
-  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
   cps[6] = B5modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 6; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
 }
 
 mp_limb_t
-mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
 {
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
   mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
   mp_size_t i;
   int cnt;
 
-  ASSERT (n >= 1);
-
   B1modb = cps[2];
   B2modb = cps[3];
   B3modb = cps[4];
   B4modb = cps[5];
   B5modb = cps[6];
 
-  switch (n & 3)
-    {
-    case 0:
-      umul_ppmm (ph, pl, ap[n - 3], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
-      umul_ppmm (ch, cl, ap[n - 2], B2modb);
-      add_ssaaaa (ph, pl, ph, pl, ch, cl);
-      umul_ppmm (rh, rl, ap[n - 1], B3modb);
-      add_ssaaaa (rh, rl, rh, rl, ph, pl);
-      n -= 4;
-      break;
-    case 1:
-      rh = 0;
-      rl = ap[n - 1];
-      n -= 1;
-      break;
-    case 2:
-      rh = ap[n - 1];
-      rl = ap[n - 2];
-      n -= 2;
-      break;
-    case 3:
-      umul_ppmm (ph, pl, ap[n - 2], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
-      umul_ppmm (rh, rl, ap[n - 1], B2modb);
-      add_ssaaaa (rh, rl, rh, rl, ph, pl);
-      n -= 3;
-      break;
-    }
+  umul_ppmm (ph, pl, ap[n - 3], B1modb);
+  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+
+  umul_ppmm (ch, cl, ap[n - 2], B2modb);
+  add_ssaaaa (ph, pl, ph, pl, ch, cl);
 
-  for (i = n - 4; i >= 0; i -= 4)
+  umul_ppmm (ch, cl, ap[n - 1], B3modb);
+  add_ssaaaa (rh, rl, ph, pl, ch, cl);
+
+  for (i = n - 8; i >= 0; i -= 4)
     {
       /* rr = ap[i]				< B
 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
@@ -143,7 +92,7 @@ mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
 	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, ap[i + 1], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
 
       umul_ppmm (ch, cl, ap[i + 2], B2modb);
       add_ssaaaa (ph, pl, ph, pl, ch, cl);
@@ -158,14 +107,42 @@ mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  umul_ppmm (rh, cl, rh, B1modb);
-  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+  if (i >= -3)
+    {
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 3]);
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      if (i >= -2)
+	{
+	  umul_ppmm (ph, pl, rl, B1modb);
+	  add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]);
+	  umul_ppmm (rh, rl, rh, B2modb);
+	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	  if (i >= -1)
+	    {
+	      umul_ppmm (ph, pl, rl, B1modb);
+	      add_ssaaaa (ph, pl, ph, pl, 0, ap[0]);
+	      umul_ppmm (rh, rl, rh, B2modb);
+	      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	    }
+	}
+    }
 
-  cnt = cps[1];
   bi = cps[0];
+  cnt = cps[1];
 
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 4);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
 }
diff --git a/gmp/mpn/generic/mod_34lsub1.c b/gmp/mpn/generic/mod_34lsub1.c
index 7c07af7acc..6bd149892d 100644
--- a/gmp/mpn/generic/mod_34lsub1.c
+++ b/gmp/mpn/generic/mod_34lsub1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
diff --git a/gmp/mpn/generic/mode1o.c b/gmp/mpn/generic/mode1o.c
index ec91da223d..064becdadf 100644
--- a/gmp/mpn/generic/mode1o.c
+++ b/gmp/mpn/generic/mode1o.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/.  */
 
            r*B^k + a - c == q*d
 
-   where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
+   where B=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1
    (the caller won't know which), and q is the quotient (discarded).  d must
    be odd, c can be any limb value.
 
diff --git a/gmp/mpn/generic/mu_bdiv_q.c b/gmp/mpn/generic/mu_bdiv_q.c
index 0a8010ec15..3b5f56d088 100644
--- a/gmp/mpn/generic/mu_bdiv_q.c
+++ b/gmp/mpn/generic/mu_bdiv_q.c
@@ -4,44 +4,40 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+/* We use the "misunderstanding algorithm" (MU), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
+
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
+
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.
 */
 
 #include "gmp.h"
@@ -53,10 +49,11 @@ see https://www.gnu.org/licenses/.  */
 
    Requirements: N >= D
 		 D >= 1
+		 N mod D = 0
 		 D odd
 		 dn >= 2
 		 nn >= 2
-		 scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
+		 scratch space as determined by mpn_divexact_itch(nn,dn).
 
    Write quotient to Q = {qp,nn}.
 
@@ -72,10 +69,10 @@ mpn_mu_bdiv_q (mp_ptr qp,
 	       mp_srcptr dp, mp_size_t dn,
 	       mp_ptr scratch)
 {
+  mp_ptr ip;
+  mp_ptr rp;
   mp_size_t qn;
   mp_size_t in;
-  int cy, c0;
-  mp_size_t tn, wn;
 
   qn = nn;
 
@@ -85,52 +82,74 @@ mpn_mu_bdiv_q (mp_ptr qp,
   if (qn > dn)
     {
       mp_size_t b;
+      mp_ptr tp;
+      mp_limb_t cy;
+      int k;
+      mp_size_t m, wn;
+      mp_size_t i;
 
       /* |_______________________|   dividend
 			|________|   divisor  */
 
-#define ip           scratch			/* in */
-#define rp           (scratch + in)		/* dn or rest >= binvert_itch(in) */
-#define tp           (scratch + in + dn)	/* dn+in or next_size(dn) */
-#define scratch_out  (scratch + in + dn + tn)	/* mulmod_bnm1_itch(next_size(dn)) */
-
       /* Compute an inverse size that is a nice partition of the quotient.  */
       b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
       in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
 
       /* Some notes on allocation:
 
-	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+	 When in = dn, R dies when mpn_mullow returns, if in < dn the low in
 	 limbs of R dies at that point.  We could save memory by letting T live
 	 just under R, and let the upper part of T expand into R. These changes
 	 should reduce itch to perhaps 3dn.
        */
 
-      mpn_binvert (ip, dp, in, rp);
+      ip = scratch;			/* in limbs */
+      rp = scratch + in;		/* dn limbs */
+      tp = scratch + in + dn;		/* dn + in limbs FIXME: mpn_fft_next_size */
+      scratch += in;			/* Roughly 2in+1 limbs */
+
+      mpn_binvert (ip, dp, in, scratch);
 
       cy = 0;
 
       MPN_COPY (rp, np, dn);
       np += dn;
-      mpn_mullo_n (qp, rp, ip, in);
+      mpn_mullow_n (qp, rp, ip, in);
       qn -= in;
 
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+	{
+	  k = mpn_fft_best_k (dn, 0);
+	  m = mpn_fft_next_size (dn, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+	  ASSERT_ALWAYS (wn >= 0);		/* could handle this below */
+	}
+
       while (qn > in)
 	{
-	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
-	  else
+#if WANT_FFT
+	  if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	    {
-	      tn = mpn_mulmod_bnm1_next_size (dn);
-	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	      wn = dn + in - tn;		/* number of wrapped limbs */
-	      if (wn > 0)
-		{
-		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-		  mpn_decr_u (tp + wn, c0);
-		}
+	      /* The two multiplicands are dn and 'in' limbs, with dn >= in.
+		 The relevant part of the result will typically partially wrap,
+		 and that part will come out as subtracted to the right.  The
+		 unwrapped part, m-in limbs at the high end of tp, is the lower
+		 part of the sought product.  The wrapped part, at the low end
+		 of tp, will be subtracted from the low part of the partial
+		 remainder; we undo that operation with another subtraction. */
+	      int c0;
+
+	      mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
+	      c0 = mpn_sub_n (tp + m, rp, tp, wn);
+
+	      for (i = wn; c0 != 0 && i < in; i++)
+		c0 = tp[i] == GMP_NUMB_MASK;
+	      mpn_incr_u (tp + in, c0);
 	    }
-
+	  else
+#endif
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
 	  qp += in;
 	  if (dn != in)
 	    {
@@ -145,28 +164,29 @@ mpn_mu_bdiv_q (mp_ptr qp,
 	  /* Subtract tp[dn+in-1...dn] from dividend.  */
 	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
 	  np += in;
-	  mpn_mullo_n (qp, rp, ip, in);
+	  mpn_mullow_n (qp, rp, ip, in);
 	  qn -= in;
 	}
 
       /* Generate last qn limbs.
 	 FIXME: It should be possible to limit precision here, since qn is
 	 typically somewhat smaller than dn.  No big gains expected.  */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
-      else
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
+	  int c0;
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
+	  c0 = mpn_sub_n (tp + m, rp, tp, wn);
 
+	  for (i = wn; c0 != 0 && i < in; i++)
+	    c0 = tp[i] == GMP_NUMB_MASK;
+	  mpn_incr_u (tp + in, c0);
+	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
       qp += in;
       if (dn != in)
 	{
@@ -179,93 +199,57 @@ mpn_mu_bdiv_q (mp_ptr qp,
 	}
 
       mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
-      mpn_mullo_n (qp, rp, ip, qn);
-
-#undef ip
-#undef rp
-#undef tp
-#undef scratch_out
-   }
+      mpn_mullow_n (qp, rp, ip, qn);
+    }
   else
     {
       /* |_______________________|   dividend
 		|________________|   divisor  */
 
-#define ip           scratch		/* in */
-#define tp           (scratch + in)	/* qn+in or next_size(qn) or rest >= binvert_itch(in) */
-#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
-
       /* Compute half-sized inverse.  */
       in = qn - (qn >> 1);
 
-      mpn_binvert (ip, dp, in, tp);
+      ip = scratch;			/* ceil(qn/2) limbs */
+      rp = scratch + in;		/* ceil(qn/2)+qn limbs */
+      scratch += in;			/* 2*ceil(qn/2)+2 */
 
-      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
+      mpn_binvert (ip, dp, in, scratch);
 
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, qn, qp, in);		/* mulhigh */
-      else
+      mpn_mullow_n (qp, np, ip, in);		/* low `in' quotient limbs */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (qn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
-	  wn = qn + in - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_cmp (tp, np, wn) < 0;
-	      mpn_decr_u (tp + wn, c0);
-	    }
+	  int k;
+	  mp_size_t m;
+
+	  k = mpn_fft_best_k (qn, 0);
+	  m = mpn_fft_next_size (qn, k);
+	  mpn_mul_fft (rp, m, dp, qn, qp, in, k);
+	  if (mpn_cmp (np, rp, in) < 0)
+	    mpn_incr_u (rp + in, 1);
 	}
+      else
+#endif
+	mpn_mul (rp, dp, qn, qp, in);		/* mulhigh */
 
-      mpn_sub_n (tp, np + in, tp + in, qn - in);
-      mpn_mullo_n (qp + in, tp, ip, qn - in);	/* high qn-in quotient limbs */
-
-#undef ip
-#undef tp
-#undef scratch_out
+      mpn_sub_n (rp, np + in, rp + in, qn - in);
+      mpn_mullow_n (qp + in, rp, ip, qn - in);	/* high qn-in quotient limbs */
     }
 }
 
 mp_size_t
 mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
 {
-  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
-  mp_size_t b;
+  mp_size_t qn;
 
   qn = nn;
 
   if (qn > dn)
     {
-      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = dn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
-	}
-      itch_binvert = mpn_binvert_itch (in);
-      itches = dn + tn + itch_out;
-      return in + MAX (itches, itch_binvert);
+      return 4 * dn;		/* FIXME FIXME FIXME need mpn_fft_next_size */
     }
   else
     {
-      in = qn - (qn >> 1);
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = qn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (qn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
-	}
-      itch_binvert = mpn_binvert_itch (in);
-      itches = tn + itch_out;
-      return in + MAX (itches, itch_binvert);
+      return 2 * qn + 1 + 2;	/* FIXME FIXME FIXME need mpn_fft_next_size */
     }
 }
diff --git a/gmp/mpn/generic/mu_bdiv_qr.c b/gmp/mpn/generic/mu_bdiv_qr.c
index d265440f2b..e66b4a117e 100644
--- a/gmp/mpn/generic/mu_bdiv_qr.c
+++ b/gmp/mpn/generic/mu_bdiv_qr.c
@@ -1,289 +1,51 @@
-/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
-   where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
-   and N; all other overlap disallowed.
+/* mpn_mu_bdiv_qr -- divide-and-conquer Hensel division using a variant of
+   Barrett's algorithm, returning quotient and remainder.
 
-   Contributed to the GNU project by Torbjorn Granlund.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2005-2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
-*/
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
 
-/* N = {np,nn}
-   D = {dp,dn}
+/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
+
+   Output:
+
+      q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
 
-   Requirements: N >= D
-		 D >= 1
-		 D odd
-		 dn >= 2
-		 nn >= 2
-		 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
+      r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
 
-   Write quotient to Q = {qp,nn-dn}.
+   Stores q at qp. Stores the n least significant limbs of r at the high half
+   of np, and returns the borrow from the subtraction n - q*d.
 
-   FIXME: When iterating, perhaps do the small step before loop, not after.
-   FIXME: Try to avoid the scalar divisions when computing inverse size.
-   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
-	  particular, when dn==in, tp and rp could use the same space.
-*/
-mp_limb_t
+   d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
+
+void
 mpn_mu_bdiv_qr (mp_ptr qp,
 		mp_ptr rp,
 		mp_srcptr np, mp_size_t nn,
 		mp_srcptr dp, mp_size_t dn,
 		mp_ptr scratch)
 {
-  mp_size_t qn;
-  mp_size_t in;
-  mp_limb_t cy, c0;
-  mp_size_t tn, wn;
-
-  qn = nn - dn;
-
-  ASSERT (dn >= 2);
-  ASSERT (qn >= 2);
-
-  if (qn > dn)
-    {
-      mp_size_t b;
-
-      /* |_______________________|   dividend
-			|________|   divisor  */
-
-#define ip           scratch		/* in */
-#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
-#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
-
-      /* Compute an inverse size that is a nice partition of the quotient.  */
-      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-
-      /* Some notes on allocation:
-
-	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
-	 limbs of R dies at that point.  We could save memory by letting T live
-	 just under R, and let the upper part of T expand into R. These changes
-	 should reduce itch to perhaps 3dn.
-       */
-
-      mpn_binvert (ip, dp, in, tp);
-
-      MPN_COPY (rp, np, dn);
-      np += dn;
-      cy = 0;
-
-      while (qn > in)
-	{
-	  mpn_mullo_n (qp, rp, ip, in);
-
-	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
-	  else
-	    {
-	      tn = mpn_mulmod_bnm1_next_size (dn);
-	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	      wn = dn + in - tn;		/* number of wrapped limbs */
-	      if (wn > 0)
-		{
-		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-		  mpn_decr_u (tp + wn, c0);
-		}
-	    }
-
-	  qp += in;
-	  qn -= in;
-
-	  if (dn != in)
-	    {
-	      /* Subtract tp[dn-1...in] from partial remainder.  */
-	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
-	      if (cy == 2)
-		{
-		  mpn_incr_u (tp + dn, 1);
-		  cy = 1;
-		}
-	    }
-	  /* Subtract tp[dn+in-1...dn] from dividend.  */
-	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
-	  np += in;
-	}
-
-      /* Generate last qn limbs.  */
-      mpn_mullo_n (qp, rp, ip, qn);
-
-      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, qn);		/* mulhi, need tp[qn+in-1...in] */
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
-	  wn = dn + qn - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
-
-      if (dn != qn)
-	{
-	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
-	  if (cy == 2)
-	    {
-	      mpn_incr_u (tp + dn, 1);
-	      cy = 1;
-	    }
-	}
-      return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
-
-#undef ip
-#undef tp
-#undef scratch_out
-    }
-  else
-    {
-      /* |_______________________|   dividend
-		|________________|   divisor  */
-
-#define ip           scratch		/* in */
-#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
-#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
-
-      /* Compute half-sized inverse.  */
-      in = qn - (qn >> 1);
-
-      mpn_binvert (ip, dp, in, tp);
-
-      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* mulhigh */
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
-
-      qp += in;
-      qn -= in;
-
-      cy = mpn_sub_n (rp, np + in, tp + in, dn);
-      mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */
-
-      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, qn);		/* mulhigh */
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
-	  wn = dn + qn - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
-
-      cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
-      if (cy == 2)
-	{
-	  mpn_incr_u (tp + dn, 1);
-	  cy = 1;
-	}
-      return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
-
-#undef ip
-#undef tp
-#undef scratch_out
-    }
-}
-
-mp_size_t
-mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
-{
-  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
-  mp_size_t b;
-
-  qn = nn - dn;
-
-  if (qn > dn)
-    {
-      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = dn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
-	}
-      itch_binvert = mpn_binvert_itch (in);
-      itches = tn + itch_out;
-      return in + MAX (itches, itch_binvert);
-    }
-  else
-    {
-      in = qn - (qn >> 1);
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = dn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
-	}
-    }
-  itch_binvert = mpn_binvert_itch (in);
-  itches = tn + itch_out;
-  return in + MAX (itches, itch_binvert);
+  ASSERT_ALWAYS (0);
 }
diff --git a/gmp/mpn/generic/mu_div_q.c b/gmp/mpn/generic/mu_div_q.c
index 8768ba6c60..150e8b77cd 100644
--- a/gmp/mpn/generic/mu_div_q.c
+++ b/gmp/mpn/generic/mu_div_q.c
@@ -1,46 +1,29 @@
-/* mpn_mu_div_q.
+/* mpn_mu_div_q, mpn_preinv_mu_div_q.
 
-   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+   Contributed to the GNU project by Torbj�rn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
-*/
 
 /*
   Things to work on:
@@ -48,14 +31,18 @@ see https://www.gnu.org/licenses/.  */
   1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
      probably close to optimal, except when mpn_mu_divappr_q fails.
 
-  2. We used to fall back to mpn_mu_div_qr when we detect a possible
-     mpn_mu_divappr_q rounding problem, now we multiply and compare.
+     An alternative which could be considered for much simpler code for the
+     complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
+     simply call mpn_mu_divappr_q.  Such a temporary allocation is
+     unfortunately very large.
+
+  2. Instead of falling back to mpn_mu_div_qr when we detect a possible
+     mpn_mu_divappr_q rounding problem, we could multiply and compare.
      Unfortunately, since mpn_mu_divappr_q does not return the partial
-     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
-     solve that.
+     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr
+     could solve that.
 
-  3. The allocations done here should be made from the scratch area, which
-     then would need to be amended.
+  3. The allocations done here should be made from the scratch area.
 */
 
 #include <stdlib.h>		/* for NULL */
@@ -65,13 +52,13 @@ see https://www.gnu.org/licenses/.  */
 
 mp_limb_t
 mpn_mu_div_q (mp_ptr qp,
-	      mp_srcptr np, mp_size_t nn,
+	      mp_ptr np, mp_size_t nn,
 	      mp_srcptr dp, mp_size_t dn,
 	      mp_ptr scratch)
 {
-  mp_ptr tp, rp;
-  mp_size_t qn;
-  mp_limb_t cy, qh;
+  mp_ptr tp, rp, ip, this_ip;
+  mp_size_t qn, in, this_in;
+  mp_limb_t cy;
   TMP_DECL;
 
   TMP_MARK;
@@ -82,28 +69,59 @@ mpn_mu_div_q (mp_ptr qp,
 
   if (qn >= dn)			/* nn >= 2*dn + 1 */
     {
-       /* |_______________________|   dividend
-			 |________|   divisor  */
+      /* Find max inverse size needed by the two preinv calls.  */
+      if (dn != qn)
+	{
+	  mp_size_t in1, in2;
 
-      rp = TMP_BALLOC_LIMBS (nn + 1);
-      MPN_COPY (rp + 1, np, nn);
-      rp[0] = 0;
+	  in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
+	  in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+	  in = MAX (in1, in2);
+	}
+      else
+	{
+	  in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+	}
 
-      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
-      if (qh != 0)
-	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
+      ip = TMP_BALLOC_LIMBS (in + 1);
 
-      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
+      if (dn == in)
+	{
+	  MPN_COPY (scratch + 1, dp, in);
+	  scratch[0] = 1;
+	  mpn_invert (ip, scratch, in + 1, NULL);
+	  MPN_COPY_INCR (ip, ip + 1, in);
+	}
+      else
+	{
+	  cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
+	  if (UNLIKELY (cy != 0))
+	    MPN_ZERO (ip, in);
+	  else
+	    {
+	      mpn_invert (ip, scratch, in + 1, NULL);
+	      MPN_COPY_INCR (ip, ip + 1, in);
+	    }
+	}
 
-      if (UNLIKELY (cy != 0))
+       /* |_______________________|   dividend
+			 |________|   divisor  */
+      rp = TMP_BALLOC_LIMBS (2 * dn + 1);
+      if (dn != qn)		/* FIXME: perhaps mpn_mu_div_qr should DTRT */
 	{
-	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
-	     canonically reduced, replace the returned value of B^(qn-dn)+eps
-	     by the largest possible value.  */
-	  mp_size_t i;
-	  for (i = 0; i < qn + 1; i++)
-	    tp[i] = GMP_NUMB_MAX;
+	  this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
+	  this_ip = ip + in - this_in;
+	  mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
+				this_ip, this_in, scratch);
 	}
+      else
+	MPN_COPY (rp + dn + 1, np + dn, dn);
+
+      MPN_COPY (rp + 1, np, dn);
+      rp[0] = 0;
+      this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+      this_ip = ip + in - this_in;
+      mpn_preinv_mu_divappr_q (tp, rp, 2*dn + 1, dp, dn, this_ip, this_in, scratch);
 
       /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
 	 greater than the max error, we cannot trust the quotient.  */
@@ -113,73 +131,27 @@ mpn_mu_div_q (mp_ptr qp,
 	}
       else
 	{
-	  mp_limb_t cy;
-	  mp_ptr pp;
-
-	  pp = rp;
-	  mpn_mul (pp, tp + 1, qn, dp, dn);
-
-	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
-
-	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
-	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
-	  else /* Same as above */
-	    MPN_COPY (qp, tp + 1, qn);
+	  /* Fall back to plain mpn_mu_div_qr.  */
+	  mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
 	}
     }
   else
     {
        /* |_______________________|   dividend
 		 |________________|   divisor  */
+      mpn_mu_divappr_q (tp, np + nn - (2*qn + 2), 2*qn + 2, dp + dn - (qn + 1), qn + 1, scratch);
 
-      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
-	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
-	 the most significant dn-1 limbs will actually be read, but it is not
-	 pretty.  */
-
-      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
-			     dp + dn - (qn + 1), qn + 1, scratch);
-
-      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
-         error from the divisor truncation.  */
-      if (tp[0] > 6)
+      if (tp[0] > 4)
 	{
 	  MPN_COPY (qp, tp + 1, qn);
 	}
       else
 	{
-	  mp_limb_t cy;
-
-	  /* FIXME: a shorter product should be enough; we may use already
-	     allocated space... */
-	  rp = TMP_BALLOC_LIMBS (nn);
-	  mpn_mul (rp, dp, dn, tp + 1, qn);
-
-	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
-
-	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
-	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
-	  else /* Same as above */
-	    MPN_COPY (qp, tp + 1, qn);
+	  rp = TMP_BALLOC_LIMBS (dn);
+	  mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
 	}
     }
 
   TMP_FREE;
-  return qh;
-}
-
-mp_size_t
-mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
-{
-  mp_size_t qn;
-
-  qn = nn - dn;
-  if (qn >= dn)
-    {
-      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
-    }
-  else
-    {
-      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
-    }
+  return 0;
 }
diff --git a/gmp/mpn/generic/mu_div_qr.c b/gmp/mpn/generic/mu_div_qr.c
index f4700a1ea6..9049e5907a 100644
--- a/gmp/mpn/generic/mu_div_qr.c
+++ b/gmp/mpn/generic/mu_div_qr.c
@@ -7,67 +7,87 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+/* We use the "misunderstanding algorithm" (MUA), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
 
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.  Based on the GMP divexact code and inspired by code
+   contributed to GMP by Karl Hasselstroem.
 */
 
-/* CAUTION: This code and the code in mu_divappr_q.c should be edited in sync.
+
+/* CAUTION: This code and the code in mu_divappr_q.c should be edited in lockstep.
 
  Things to work on:
 
+  * Passing k isn't a great interface.  Either 'in' should be passed, or
+    determined by the code.
+
+  * The current mpn_mu_div_qr_itch isn't exactly scientifically written.
+    Scratch space buffer overruns are not unlikely before some analysis is
+    applied.  Since scratch requirements are expected to change, such an
+    analysis will have to wait til things settle.
+
+  * This isn't optimal when the remainder isn't needed, since the final
+    multiplication could be made special and take O(1) time on average, in that
+    case.  This is particularly bad when qn << dn.  At some level, code as in
+    GMP 4 mpn_tdiv_qr should be used, effectively dividing the leading 2qn
+    dividend limbs by the qn divisor limbs.
+
   * This isn't optimal when the quotient isn't needed, as it might take a lot
-    of space.  The computation is always needed, though, so there is no time to
-    save with special code.
+    of space.  The computation is always needed, though, so there is not time
+    to save with special code.
 
   * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
-    demonstrated by the fact that the mpn_invertappr function's scratch needs
-    mean that we need to keep a large allocation long after it is needed.
-    Things are worse as mpn_mul_fft does not accept any scratch parameter,
-    which means we'll have a large memory hole while in mpn_mul_fft.  In
-    general, a peak scratch need in the beginning of a function isn't
-    well-handled by the itch/scratch scheme.
+    demonstrated by the fact that the mpn_inv function's scratch needs means
+    that we need to keep a large allocation long after it is needed.  Things
+    are worse as mpn_mul_fft does not accept any scratch parameter, which means
+    we'll have a large memory hole while in mpn_mul_fft.  In general, a peak
+    scratch need in the beginning of a function isn't well-handled by the
+    itch/scratch scheme.
+
+  * Some ideas from comments in divexact.c apply to this code too.
 */
 
+/* the NOSTAT stuff handles properly the case where files are concatenated */
+#ifdef NOSTAT
+#undef STAT
+#endif
+
 #ifdef STAT
 #undef STAT
 #define STAT(x) x
 #else
+#define NOSTAT
 #define STAT(x)
 #endif
 
@@ -76,98 +96,65 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 
-/* FIXME: The MU_DIV_QR_SKEW_THRESHOLD was not analysed properly.  It gives a
-   speedup according to old measurements, but does the decision mechanism
-   really make sense?  It seem like the quotient between dn and qn might be
-   what we really should be checking.  */
-#ifndef MU_DIV_QR_SKEW_THRESHOLD
-#define MU_DIV_QR_SKEW_THRESHOLD 100
-#endif
-
-#ifdef CHECK				/* FIXME: Enable in minithres */
-#undef  MU_DIV_QR_SKEW_THRESHOLD
-#define MU_DIV_QR_SKEW_THRESHOLD 1
-#endif
-
-
-static mp_limb_t mpn_mu_div_qr2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
-
-
-mp_limb_t
-mpn_mu_div_qr (mp_ptr qp,
-	       mp_ptr rp,
-	       mp_srcptr np,
-	       mp_size_t nn,
-	       mp_srcptr dp,
-	       mp_size_t dn,
-	       mp_ptr scratch)
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+mp_size_t
+mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
 {
-  mp_size_t qn;
-  mp_limb_t cy, qh;
+  mp_size_t in;
 
-  qn = nn - dn;
-  if (qn + MU_DIV_QR_SKEW_THRESHOLD < dn)
+  if (k == 0)
     {
-      /* |______________|_ign_first__|   dividend			  nn
-		|_______|_ign_first__|   divisor			  dn
-
-		|______|	     quotient (prel)			  qn
-
-		 |___________________|   quotient * ignored-divisor-part  dn-1
-      */
-
-      /* Compute a preliminary quotient and a partial remainder by dividing the
-	 most significant limbs of each operand.  */
-      qh = mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
-			   np + nn - (2 * qn + 1), 2 * qn + 1,
-			   dp + dn - (qn + 1), qn + 1,
-			   scratch);
-
-      /* Multiply the quotient by the divisor limbs ignored above.  */
-      if (dn - (qn + 1) > qn)
-	mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
-      else
-	mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
-
-      if (qh)
-	cy = mpn_add_n (scratch + qn, scratch + qn, dp, dn - (qn + 1));
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
       else
-	cy = 0;
-      scratch[dn - 1] = cy;
-
-      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
-      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
-		       rp + nn - (2 * qn + 1),
-		       scratch + nn - (2 * qn + 1),
-		       qn + 1, cy);
-      if (cy)
 	{
-	  qh -= mpn_sub_1 (qp, qp, qn, 1);
-	  mpn_add_n (rp, rp, dp, dn);
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
 	}
     }
   else
     {
-      qh = mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
     }
 
-  return qh;
+  return in;
 }
 
 static mp_limb_t
 mpn_mu_div_qr2 (mp_ptr qp,
 		mp_ptr rp,
-		mp_srcptr np,
+		mp_ptr np,
 		mp_size_t nn,
 		mp_srcptr dp,
 		mp_size_t dn,
 		mp_ptr scratch)
 {
   mp_size_t qn, in;
-  mp_limb_t cy, qh;
+  mp_limb_t cy;
   mp_ptr ip, tp;
 
-  ASSERT (dn > 1);
+  /* FIXME: We should probably not handle tiny operands, but do it for now.  */
+  if (dn == 1)
+    {
+      rp[0] = mpn_divrem_1 (scratch, 0L, np, nn, dp[0]);
+      MPN_COPY (qp, scratch, nn - 1);
+      return scratch[nn - 1];
+    }
 
   qn = nn - dn;
 
@@ -178,7 +165,7 @@ mpn_mu_div_qr2 (mp_ptr qp,
 #if 1
   /* This alternative inverse computation method gets slightly more accurate
      results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
-     not adapted (3) mpn_invertappr scratch needs not met.  */
+     not adapted (3) mpn_invert scratch needs not met.  */
   ip = scratch;
   tp = scratch + in + 1;
 
@@ -187,7 +174,7 @@ mpn_mu_div_qr2 (mp_ptr qp,
     {
       MPN_COPY (tp + 1, dp, in);
       tp[0] = 1;
-      mpn_invertappr (ip, tp, in + 1, NULL);
+      mpn_invert (ip, tp, in + 1, NULL);
       MPN_COPY_INCR (ip, ip + 1, in);
     }
   else
@@ -197,7 +184,7 @@ mpn_mu_div_qr2 (mp_ptr qp,
 	MPN_ZERO (ip, in);
       else
 	{
-	  mpn_invertappr (ip, tp, in + 1, NULL);
+	  mpn_invert (ip, tp, in + 1, NULL);
 	  MPN_COPY_INCR (ip, ip + 1, in);
 	}
     }
@@ -213,11 +200,11 @@ mpn_mu_div_qr2 (mp_ptr qp,
     {
       tp[in + 1] = 0;
       MPN_COPY (tp + in + 2, dp, in);
-      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+      mpn_invert (tp, tp + in + 1, in + 1, NULL);
     }
   else
     {
-      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+      mpn_invert (tp, dp + dn - (in + 1), in + 1, NULL);
     }
   cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
   if (UNLIKELY (cy != 0))
@@ -225,15 +212,24 @@ mpn_mu_div_qr2 (mp_ptr qp,
   MPN_COPY (ip, tp + 1, in);
 #endif
 
-  qh = mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+/* We can't really handle qh = 1 like this since we'd here clobber N, which is
+   not allowed in the way we've defined this function's API.  */
+#if 0
+  qh = mpn_cmp (np + qn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np + qn, np + qn, dp, dn);
+#endif
 
-  return qh;
+  mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+
+/*  return qh; */
+  return 0;
 }
 
-mp_limb_t
+void
 mpn_preinv_mu_div_qr (mp_ptr qp,
 		      mp_ptr rp,
-		      mp_srcptr np,
+		      mp_ptr np,
 		      mp_size_t nn,
 		      mp_srcptr dp,
 		      mp_size_t dn,
@@ -242,26 +238,24 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
 		      mp_ptr scratch)
 {
   mp_size_t qn;
-  mp_limb_t cy, cx, qh;
+  mp_limb_t cy;
+  mp_ptr tp;
   mp_limb_t r;
-  mp_size_t tn, wn;
-
-#define tp           scratch
-#define scratch_out  (scratch + tn)
 
   qn = nn - dn;
 
+  if (qn == 0)
+    {
+      MPN_COPY (rp, np, dn);
+      return;
+    }
+
+  tp = scratch;
+
   np += qn;
   qp += qn;
 
-  qh = mpn_cmp (np, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (rp, np, dp, dn);
-  else
-    MPN_COPY_INCR (rp, np, dn);
-
-  if (qn == 0)
-    return qh;			/* Degenerate use.  Should we allow this? */
+  MPN_COPY (rp, np, dn);
 
   while (qn > 0)
     {
@@ -277,30 +271,36 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
 	 by the upper part of the partial remainder R.  */
       mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
       cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
-      ASSERT_ALWAYS (cy == 0);
-
-      qn -= in;
+      ASSERT_ALWAYS (cy == 0);			/* FIXME */
 
       /* Compute the product of the quotient block and the divisor D, to be
 	 subtracted from the partial remainder combined with new limbs from the
-	 dividend N.  We only really need the low dn+1 limbs.  */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
-      else
+	 dividend N.  We only really need the low dn limbs.  */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  /* Use the wrap-around trick.  */
+	  mp_size_t m, wn;
+	  int k;
+
+	  k = mpn_fft_best_k (dn + 1, 0);
+	  m = mpn_fft_next_size (dn + 1, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
 	  if (wn > 0)
 	    {
-	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
-	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
-	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
-	      ASSERT_ALWAYS (cx >= cy);
-	      mpn_incr_u (tp, cx - cy);
+	      cy = mpn_add_n (tp, tp, rp + dn - wn, wn);
+	      mpn_incr_u (tp + wn, cy);
+
+	      cy = mpn_cmp (rp + dn - in, tp + dn, m - dn) < 0;
+	      mpn_decr_u (tp, cy);
 	    }
 	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
 
       r = rp[dn - in] - tp[dn];
 
@@ -352,65 +352,112 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
 		printf ("\n");
 	      }
 	    );
-    }
 
-  return qh;
+      qn -= in;
+    }
 }
 
-/* In case k=0 (automatic choice), we distinguish 3 cases:
-   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
-   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
-   (c) qn < dn/3:       in = qn
-   In all cases we have in <= dn.
- */
-mp_size_t
-mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
+#define THRES 100		/* FIXME: somewhat arbitrary */
+
+#ifdef CHECK
+#undef THRES
+#define THRES 1
+#endif
+
+mp_limb_t
+mpn_mu_div_qr (mp_ptr qp,
+	       mp_ptr rp,
+	       mp_ptr np,
+	       mp_size_t nn,
+	       mp_srcptr dp,
+	       mp_size_t dn,
+	       mp_ptr scratch)
 {
-  mp_size_t in;
+  mp_size_t qn;
 
-  if (k == 0)
+  qn = nn - dn;
+  if (qn + THRES < dn)
     {
-      mp_size_t b;
-      if (qn > dn)
-	{
-	  /* Compute an inverse size that is a nice partition of the quotient.  */
-	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-	}
-      else if (3 * qn > dn)
+      /* |______________|________|   dividend				  nn
+		|_______|________|   divisor				  dn
+
+		|______|	     quotient (prel)			  qn
+
+		 |_______________|   quotient * ignored-part-of(divisor)  dn-1
+      */
+
+      mp_limb_t cy, x;
+
+      if (mpn_cmp (np + nn - (qn + 1), dp + dn - (qn + 1), qn + 1) >= 0)
 	{
-	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	  /* Quotient is 111...111, could optimize this rare case at some point.  */
+	  mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+	  return 0;
 	}
+
+      /* Compute a preliminary quotient and a partial remainder by dividing the
+	 most significant limbs of each operand.  */
+      mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
+		      np + nn - (2 * qn + 1), 2 * qn + 1,
+		      dp + dn - (qn + 1), qn + 1,
+		      scratch);
+
+      /* Multiply the quotient by the divisor limbs ignored above.  */
+      if (dn - (qn + 1) > qn)
+	mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
       else
+	mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
+
+      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
+      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
+		       rp + nn - (2 * qn + 1),
+		       scratch + nn - (2 * qn + 1),
+		       qn, cy);
+      x = rp[dn - 1];
+      rp[dn - 1] = x - cy;
+      if (cy > x)
 	{
-	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	  mpn_decr_u (qp, 1);
+	  mpn_add_n (rp, rp, dp, dn);
 	}
     }
   else
     {
-      mp_size_t xn;
-      xn = MIN (dn, qn);
-      in = (xn - 1) / k + 1;
+      return mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
     }
 
-  return in;
+  return 0;			/* FIXME */
 }
 
 mp_size_t
 mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
-  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  mp_size_t qn, m;
+  int k;
 
-  return in + itch_local + itch_out;
-}
+  /* FIXME: This isn't very carefully written, and might grossly overestimate
+     the amount of scratch needed, and might perhaps also underestimate it,
+     leading to potential buffer overruns.  In particular k=0 might lead to
+     gross overestimates.  */
 
-mp_size_t
-mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
-{
-  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  if (dn == 1)
+    return nn;
 
-  return itch_local + itch_out;
+  qn = nn - dn;
+  if (qn >= dn)
+    {
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return (mua_k <= 1
+	      ? 6 * dn
+	      : m + 2 * dn);
+    }
+  else
+    {
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return (mua_k <= 1
+	      ? m + 4 * qn
+	      : m + 2 * qn);
+    }
 }
diff --git a/gmp/mpn/generic/mu_divappr_q.c b/gmp/mpn/generic/mu_divappr_q.c
index c218b59fee..0a0434399f 100644
--- a/gmp/mpn/generic/mu_divappr_q.c
+++ b/gmp/mpn/generic/mu_divappr_q.c
@@ -7,63 +7,87 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/* We use the "misunderstanding algorithm" (MUA), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
+
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.  Based on the GMP divexact code and inspired by code
+   contributed to GMP by Karl Hasselstroem.
 */
 
-/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
+
+/* CAUTION: This code and the code in mu_div_qr.c should be edited in lockstep.
 
  Things to work on:
 
+  * Passing k isn't a great interface.  Either 'in' should be passed, or
+    determined by the code.
+
+  * The current mpn_mu_div_qr_itch isn't exactly scientifically written.
+    Scratch space buffer overruns are not unlikely before some analysis is
+    applied.  Since scratch requirements are expected to change, such an
+    analysis will have to wait til things settle.
+
+  * This isn't optimal when the remainder isn't needed, since the final
+    multiplication could be made special and take O(1) time on average, in that
+    case.  This is particularly bad when qn << dn.  At some level, code as in
+    GMP 4 mpn_tdiv_qr should be used, effectively dividing the leading 2qn
+    dividend limbs by the qn divisor limbs.
+
+  * This isn't optimal when the quotient isn't needed, as it might take a lot
+    of space.  The computation is always needed, though, so there is not time
+    to save with special code.
+
   * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
-    demonstrated by the fact that the mpn_invertappr function's scratch needs
-    mean that we need to keep a large allocation long after it is needed.
-    Things are worse as mpn_mul_fft does not accept any scratch parameter,
-    which means we'll have a large memory hole while in mpn_mul_fft.  In
-    general, a peak scratch need in the beginning of a function isn't
-    well-handled by the itch/scratch scheme.
+    demonstrated by the fact that the mpn_inv function's scratch needs means
+    that we need to keep a large allocation long after it is needed.  Things
+    are worse as mpn_mul_fft does not accept any scratch parameter, which means
+    we'll have a large memory hole while in mpn_mul_fft.  In general, a peak
+    scratch need in the beginning of a function isn't well-handled by the
+    itch/scratch scheme.
+
+  * Some ideas from comments in divexact.c apply to this code too.
 */
 
+/* the NOSTAT stuff handles properly the case where files are concatenated */
+#ifdef NOSTAT
+#undef STAT
+#endif
+
 #ifdef STAT
 #undef STAT
 #define STAT(x) x
 #else
+#define NOSTAT
 #define STAT(x)
 #endif
 
@@ -72,22 +96,68 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+mp_size_t
+mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
+      else
+	{
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	}
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
 mp_limb_t
 mpn_mu_divappr_q (mp_ptr qp,
-		  mp_srcptr np,
+		  mp_ptr np,
 		  mp_size_t nn,
 		  mp_srcptr dp,
 		  mp_size_t dn,
 		  mp_ptr scratch)
 {
   mp_size_t qn, in;
-  mp_limb_t cy, qh;
+  mp_limb_t cy;
   mp_ptr ip, tp;
 
-  ASSERT (dn > 1);
+  /* FIXME: We should probably not handle tiny operands, but do it for now.  */
+  if (dn == 1)
+    {
+      mpn_divrem_1 (scratch, 0L, np, nn, dp[0]);
+      MPN_COPY (qp, scratch, nn - 1);
+      return scratch[nn - 1];
+    }
 
   qn = nn - dn;
 
+#if 1
   /* If Q is smaller than D, truncate operands. */
   if (qn + 1 < dn)
     {
@@ -95,7 +165,18 @@ mpn_mu_divappr_q (mp_ptr qp,
       nn -= dn - (qn + 1);
       dp += dn - (qn + 1);
       dn = qn + 1;
+
+      /* Since D is cut here, we can have a carry in N'/D' even if we don't
+	 have it for N/D.  */
+      if (mpn_cmp (np + nn - (qn + 1), dp, qn + 1) >= 0)
+	{ /* quotient is 111...111 */
+	  mp_size_t i;
+	  for (i = 0; i <= qn; i ++)
+	    qp[i] = ~ (mp_limb_t) 0;
+	  return 0;
+	}
     }
+#endif
 
   /* Compute the inverse size.  */
   in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
@@ -104,7 +185,7 @@ mpn_mu_divappr_q (mp_ptr qp,
 #if 1
   /* This alternative inverse computation method gets slightly more accurate
      results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
-     not adapted (3) mpn_invertappr scratch needs not met.  */
+     not adapted (3) mpn_invert scratch needs not met.  */
   ip = scratch;
   tp = scratch + in + 1;
 
@@ -113,7 +194,7 @@ mpn_mu_divappr_q (mp_ptr qp,
     {
       MPN_COPY (tp + 1, dp, in);
       tp[0] = 1;
-      mpn_invertappr (ip, tp, in + 1, NULL);
+      mpn_invert (ip, tp, in + 1, NULL);
       MPN_COPY_INCR (ip, ip + 1, in);
     }
   else
@@ -123,7 +204,7 @@ mpn_mu_divappr_q (mp_ptr qp,
 	MPN_ZERO (ip, in);
       else
 	{
-	  mpn_invertappr (ip, tp, in + 1, NULL);
+	  mpn_invert (ip, tp, in + 1, NULL);
 	  MPN_COPY_INCR (ip, ip + 1, in);
 	}
     }
@@ -139,11 +220,11 @@ mpn_mu_divappr_q (mp_ptr qp,
     {
       tp[in + 1] = 0;
       MPN_COPY (tp + in + 2, dp, in);
-      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+      mpn_invert (tp, tp + in + 1, in + 1, NULL);
     }
   else
     {
-      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+      mpn_invert (tp, dp + dn - (in + 1), in + 1, NULL);
     }
   cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
   if (UNLIKELY (cy != 0))
@@ -151,14 +232,23 @@ mpn_mu_divappr_q (mp_ptr qp,
   MPN_COPY (ip, tp + 1, in);
 #endif
 
-  qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
+/* We can't really handle qh = 1 like this since we'd here clobber N, which is
+   not allowed in the way we've defined this function's API.  */
+#if 0
+  qh = mpn_cmp (np + qn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np + qn, np + qn, dp, dn);
+#endif
+
+  mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
 
-  return qh;
+/*  return qh; */
+  return 0;
 }
 
-mp_limb_t
+void
 mpn_preinv_mu_divappr_q (mp_ptr qp,
-			 mp_srcptr np,
+			 mp_ptr np,
 			 mp_size_t nn,
 			 mp_srcptr dp,
 			 mp_size_t dn,
@@ -166,28 +256,24 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
 			 mp_size_t in,
 			 mp_ptr scratch)
 {
+  mp_ptr rp;
   mp_size_t qn;
-  mp_limb_t cy, cx, qh;
+  mp_limb_t cy;
+  mp_ptr tp;
   mp_limb_t r;
-  mp_size_t tn, wn;
-
-#define rp           scratch
-#define tp           (scratch + dn)
-#define scratch_out  (scratch + dn + tn)
 
   qn = nn - dn;
 
+  if (qn == 0)
+    return;
+
+  rp = scratch;
+  tp = scratch + dn;
+
   np += qn;
   qp += qn;
 
-  qh = mpn_cmp (np, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (rp, np, dp, dn);
-  else
-    MPN_COPY (rp, np, dn);
-
-  if (qn == 0)
-    return qh;			/* Degenerate use.  Should we allow this? */
+  MPN_COPY (rp, np, dn);
 
   while (qn > 0)
     {
@@ -203,7 +289,7 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
 	 by the upper part of the partial remainder R.  */
       mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
       cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
-      ASSERT_ALWAYS (cy == 0);
+      ASSERT_ALWAYS (cy == 0);			/* FIXME */
 
       qn -= in;
       if (qn == 0)
@@ -212,23 +298,31 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
       /* Compute the product of the quotient block and the divisor D, to be
 	 subtracted from the partial remainder combined with new limbs from the
 	 dividend N.  We only really need the low dn limbs.  */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
-      else
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  /* Use the wrap-around trick.  */
+	  mp_size_t m, wn;
+	  int k;
+
+	  k = mpn_fft_best_k (dn + 1, 0);
+	  m = mpn_fft_next_size (dn + 1, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
 	  if (wn > 0)
 	    {
-	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
-	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
-	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
-	      ASSERT_ALWAYS (cx >= cy);
-	      mpn_incr_u (tp, cx - cy);
+	      cy = mpn_add_n (tp, tp, rp + dn - wn, wn);
+	      mpn_incr_u (tp + wn, cy);
+
+	      cy = mpn_cmp (rp + dn - in, tp + dn, m - dn) < 0;
+	      mpn_decr_u (tp, cy);
 	    }
 	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
 
       r = rp[dn - in] - tp[dn];
 
@@ -284,80 +378,45 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
 
   /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
      quotient.  For now, just make sure the returned quotient is >= the real
-     quotient; add 3 with saturating arithmetic.  */
+     quotient.  */
   qn = nn - dn;
-  cy += mpn_add_1 (qp, qp, qn, 3);
+  cy = mpn_add_1 (qp, qp, qn, 3);
   if (cy != 0)
     {
-      if (qh != 0)
-	{
-	  /* Return a quotient of just 1-bits, with qh set.  */
-	  mp_size_t i;
-	  for (i = 0; i < qn; i++)
-	    qp[i] = GMP_NUMB_MAX;
-	}
-      else
-	{
-	  /* Propagate carry into qh.  */
-	  qh = 1;
-	}
+      MPN_ZERO (qp, qn);
+      mpn_sub_1 (qp, qp, qn, 1);
     }
-
-  return qh;
 }
 
-/* In case k=0 (automatic choice), we distinguish 3 cases:
-   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
-   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
-   (c) qn < dn/3:       in = qn
-   In all cases we have in <= dn.
- */
 mp_size_t
-mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
-  mp_size_t in;
+  mp_size_t qn, m;
+  int k;
 
-  if (k == 0)
-    {
-      mp_size_t b;
-      if (qn > dn)
-	{
-	  /* Compute an inverse size that is a nice partition of the quotient.  */
-	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-	}
-      else if (3 * qn > dn)
-	{
-	  in = (qn - 1) / 2 + 1;	/* b = 2 */
-	}
-      else
-	{
-	  in = (qn - 1) / 1 + 1;	/* b = 1 */
-	}
-    }
-  else
-    {
-      mp_size_t xn;
-      xn = MIN (dn, qn);
-      in = (xn - 1) / k + 1;
-    }
+  /* FIXME: This isn't very carefully written, and might grossly overestimate
+     the amount of scratch needed, and might perhaps also underestimate it,
+     leading to potential buffer overruns.  In particular k=0 might lead to
+     gross overestimates.  */
 
-  return in;
-}
-
-mp_size_t
-mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
-{
-  mp_size_t qn, in, itch_local, itch_out;
+  if (dn == 1)
+    return nn;
 
   qn = nn - dn;
-  if (qn + 1 < dn)
+  if (qn >= dn)
     {
-      dn = qn + 1;
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return dn + (mua_k <= 1
+		   ? 6 * dn
+		   : m + 2 * dn);
+    }
+  else
+    {
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return dn + (mua_k <= 1
+		   ? m + 4 * qn
+		   : m + 2 * qn);
     }
-  in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
-
-  itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
-  return in + dn + itch_local + itch_out;
 }
diff --git a/gmp/mpn/generic/mul.c b/gmp/mpn/generic/mul.c
index 2d72df3d4d..489e1f524f 100644
--- a/gmp/mpn/generic/mul.c
+++ b/gmp/mpn/generic/mul.c
@@ -2,34 +2,23 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
+2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -39,42 +28,6 @@ see https://www.gnu.org/licenses/.  */
 #define MUL_BASECASE_MAX_UN 500
 #endif
 
-/* Areas where the different toom algorithms can be called (extracted
-   from the t-toom*.c files, and ignoring small constant offsets):
-
-   1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un
-                                        4/7              6/7
-				       6/11
-                                       |--------------------| toom22 (small)
-                                                           || toom22 (large)
-                                                       |xxxx| toom22 called
-                      |-------------------------------------| toom32
-                                         |xxxxxxxxxxxxxxxx| | toom32 called
-                                               |------------| toom33
-                                                          |x| toom33 called
-             |---------------------------------|            | toom42
-	              |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called
-                                       |--------------------| toom43
-                                               |xxxxxxxxxx|   toom43 called
-         |-----------------------------|                      toom52 (unused)
-                                                   |--------| toom44
-						   |xxxxxxxx| toom44 called
-                              |--------------------|        | toom53
-                                        |xxxxxx|              toom53 called
-    |-------------------------|                               toom62 (unused)
-                                           |----------------| toom54 (unused)
-                      |--------------------|                  toom63
-	                      |xxxxxxxxx|                   | toom63 called
-                          |---------------------------------| toom6h
-						   |xxxxxxxx| toom6h called
-                                  |-------------------------| toom8h (32 bit)
-                 |------------------------------------------| toom8h (64 bit)
-						   |xxxxxxxx| toom8h called
-*/
-
-#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
-#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
-
 /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
    (pointed to by VP, with VN limbs), and store the result at PRODP.  The
    result is UN + VN limbs.  Return the most significant limb of the result.
@@ -87,34 +40,6 @@ see https://www.gnu.org/licenses/.  */
    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
       the multiplier and the multiplicand.  */
 
-/*
-  * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
-    ideal lines of the surrounding algorithms.  Is that optimal?
-
-  * The toomX3 code now uses a structure similar to the one of toomX2, except
-    that it loops longer in the unbalanced case.  The result is that the
-    remaining area might have un < vn.  Should we fix the toomX2 code in a
-    similar way?
-
-  * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
-    therefore calls mpn_mul recursively for certain cases.
-
-  * Allocate static temp space using THRESHOLD variables (except for toom44
-    when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
-
-  * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
-    have the same vn threshold.  This is not true, we should actually use
-    mul_basecase for slightly larger operands for toom32 than for toom22, and
-    even larger for toom42.
-
-  * That problem is even more prevalent for toomX3.  We therefore use special
-    THRESHOLD variables there.
-
-  * Is our ITCH allocation correct?
-*/
-
-#define ITCH (16*vn + 100)
-
 mp_limb_t
 mpn_mul (mp_ptr prodp,
 	 mp_srcptr up, mp_size_t un,
@@ -128,11 +53,13 @@ mpn_mul (mp_ptr prodp,
   if (un == vn)
     {
       if (up == vp)
-	mpn_sqr (prodp, up, un);
+	mpn_sqr_n (prodp, up, un);
       else
 	mpn_mul_n (prodp, up, vp, un);
+      return prodp[2 * un - 1];
     }
-  else if (vn < MUL_TOOM22_THRESHOLD)
+
+  if (vn < MUL_KARATSUBA_THRESHOLD)
     { /* plain schoolbook multiplication */
 
       /* Unless un is very large, or else if have an applicable mpn_mul_N,
@@ -171,9 +98,9 @@ mpn_mul (mp_ptr prodp,
 	    The parts marked with X are the parts whose sums are copied into
 	    the temporary buffer.  */
 
-	  mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
+	  mp_limb_t tp[MUL_KARATSUBA_THRESHOLD_LIMIT];
 	  mp_limb_t cy;
-	  ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
+          ASSERT (MUL_KARATSUBA_THRESHOLD <= MUL_KARATSUBA_THRESHOLD_LIMIT);
 
 	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
 	  prodp += MUL_BASECASE_MAX_UN;
@@ -184,7 +111,7 @@ mpn_mul (mp_ptr prodp,
 	    {
 	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
 	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
-	      mpn_incr_u (prodp + vn, cy);
+	      mpn_incr_u (prodp + vn, cy);		/* safe? */
 	      prodp += MUL_BASECASE_MAX_UN;
 	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
 	      up += MUL_BASECASE_MAX_UN;
@@ -196,233 +123,100 @@ mpn_mul (mp_ptr prodp,
 	    }
 	  else
 	    {
-	      ASSERT (un > 0);
+	      ASSERT_ALWAYS (un > 0);
 	      mpn_mul_basecase (prodp, vp, vn, up, un);
 	    }
 	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
-	  mpn_incr_u (prodp + vn, cy);
+	  mpn_incr_u (prodp + vn, cy);		/* safe? */
 	}
+      return prodp[un + vn - 1];
     }
-  else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
-    {
-      /* Use ToomX2 variants */
-      mp_ptr scratch;
-      TMP_SDECL; TMP_SMARK;
-
-      scratch = TMP_SALLOC_LIMBS (ITCH);
-
-      /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
-	 square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
-	 wise; we would get better balance by slightly moving the bound.  We
-	 will sometimes end up with un < vn, like in the X3 arm below.  */
-      if (un >= 3 * vn)
-	{
-	  mp_limb_t cy;
-	  mp_ptr ws;
-
-	  /* The maximum ws usage is for the mpn_mul result.  */
-	  ws = TMP_SALLOC_LIMBS (4 * vn);
 
-	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
-	  un -= 2 * vn;
-	  up += 2 * vn;
-	  prodp += 2 * vn;
-
-	  while (un >= 3 * vn)
-	    {
-	      mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
-	      un -= 2 * vn;
-	      up += 2 * vn;
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, 2 * vn);
-	      mpn_incr_u (prodp + vn, cy);
-	      prodp += 2 * vn;
-	    }
-
-	  /* vn <= un < 3vn */
-
-	  if (4 * un < 5 * vn)
-	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
-	  else if (4 * un < 7 * vn)
-	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
-	  else
-	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
-
-	  cy = mpn_add_n (prodp, prodp, ws, vn);
-	  MPN_COPY (prodp + vn, ws + vn, un);
-	  mpn_incr_u (prodp + vn, cy);
-	}
-      else
-	{
-	  if (4 * un < 5 * vn)
-	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
-	  else if (4 * un < 7 * vn)
-	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
-	  else
-	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-	}
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
-	   BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
+  if (ABOVE_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) &&
+      ABOVE_THRESHOLD (vn, MUL_FFT_THRESHOLD / 3)) /* FIXME */
     {
-      /* Handle the largest operands that are not in the FFT range.  The 2nd
-	 condition makes very unbalanced operands avoid the FFT code (except
-	 perhaps as coefficient products of the Toom code.  */
-
-      if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
-	{
-	  /* Use ToomX3 variants */
-	  mp_ptr scratch;
-	  TMP_SDECL; TMP_SMARK;
-
-	  scratch = TMP_SALLOC_LIMBS (ITCH);
-
-	  if (2 * un >= 5 * vn)
-	    {
-	      mp_limb_t cy;
-	      mp_ptr ws;
-
-	      /* The maximum ws usage is for the mpn_mul result.  */
-	      ws = TMP_SALLOC_LIMBS (7 * vn >> 1);
-
-	      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
-		mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
-	      else
-		mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
-	      un -= 2 * vn;
-	      up += 2 * vn;
-	      prodp += 2 * vn;
-
-	      while (2 * un >= 5 * vn)	/* un >= 2.5vn */
-		{
-		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
-		    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
-		  else
-		    mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
-		  un -= 2 * vn;
-		  up += 2 * vn;
-		  cy = mpn_add_n (prodp, prodp, ws, vn);
-		  MPN_COPY (prodp + vn, ws + vn, 2 * vn);
-		  mpn_incr_u (prodp + vn, cy);
-		  prodp += 2 * vn;
-		}
-
-	      /* vn / 2 <= un < 2.5vn */
-
-	      if (un < vn)
-		mpn_mul (ws, vp, vn, up, un);
-	      else
-		mpn_mul (ws, up, un, vp, vn);
-
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
-	  else
-	    {
-	      if (6 * un < 7 * vn)
-		mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
-	      else if (2 * un < 3 * vn)
-		{
-		  if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
-		    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
-		  else
-		    mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
-		}
-	      else if (6 * un < 11 * vn)
-		{
-		  if (4 * un < 7 * vn)
-		    {
-		      if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
-			mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
-		      else
-			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
-		    }
-		  else
-		    {
-		      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
-			mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-		      else
-			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
-		    }
-		}
-	      else
-		{
-		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
-		    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-		  else
-		    mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
-		}
-	    }
-	  TMP_SFREE;
-	}
-      else
-	{
-	  mp_ptr scratch;
-	  TMP_DECL; TMP_MARK;
-
-	  if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
-	    {
-	      scratch = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
-	      mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
-	    }
-	  else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
-	    {
-	      scratch = TMP_ALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
-	      mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
-	    }
-	  else
-	    {
-	      scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
-	      mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
-	    }
-	  TMP_FREE;
-	}
+      mpn_mul_fft_full (prodp, up, un, vp, vn);
+      return prodp[un + vn - 1];
     }
-  else
-    {
-      if (un >= 8 * vn)
-	{
-	  mp_limb_t cy;
-	  mp_ptr ws;
-	  TMP_DECL; TMP_MARK;
-
-	  /* The maximum ws usage is for the mpn_mul result.  */
-	  ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
-
-	  mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
-	  un -= 3 * vn;
-	  up += 3 * vn;
-	  prodp += 3 * vn;
 
-	  while (2 * un >= 7 * vn)	/* un >= 3.5vn  */
-	    {
-	      mpn_fft_mul (ws, up, 3 * vn, vp, vn);
-	      un -= 3 * vn;
-	      up += 3 * vn;
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, 3 * vn);
-	      mpn_incr_u (prodp + vn, cy);
-	      prodp += 3 * vn;
-	    }
-
-	  /* vn / 2 <= un < 3.5vn */
-
-	  if (un < vn)
-	    mpn_mul (ws, vp, vn, up, un);
-	  else
-	    mpn_mul (ws, up, un, vp, vn);
-
-	  cy = mpn_add_n (prodp, prodp, ws, vn);
-	  MPN_COPY (prodp + vn, ws + vn, un);
-	  mpn_incr_u (prodp + vn, cy);
-
-	  TMP_FREE;
-	}
-      else
-	mpn_fft_mul (prodp, up, un, vp, vn);
-    }
+  {
+    mp_ptr ws;
+    mp_ptr scratch;
+#if WANT_ASSERT
+    mp_ptr ssssp;
+#endif
+    TMP_DECL;
+    TMP_MARK;
+
+#define WSALL (4 * vn)
+    ws = TMP_SALLOC_LIMBS (WSALL + 1);
+
+#define ITCH ((un + vn) * 4 + 100)
+    scratch = TMP_ALLOC_LIMBS (ITCH + 1);
+#if WANT_ASSERT
+    ssssp = scratch + ITCH;
+    ws[WSALL] = 0xbabecafe;
+    ssssp[0] = 0xbeef;
+#endif
 
-  return prodp[un + vn - 1];	/* historic */
+    if (un >= 3 * vn)
+      {
+	mp_limb_t cy;
+
+	mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	un -= 2 * vn;
+	up += 2 * vn;
+	prodp += 2 * vn;
+
+	while (un >= 3 * vn)
+	  {
+	    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+	    un -= 2 * vn;
+	    up += 2 * vn;
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+	    mpn_incr_u (prodp + vn, cy);
+	    prodp += 2 * vn;
+	  }
+
+	if (5 * un > 9 * vn)
+	  {
+	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, un);
+	    mpn_incr_u (prodp + vn, cy);
+	  }
+	else if (9 * un > 10 * vn)
+	  {
+	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, un);
+	    mpn_incr_u (prodp + vn, cy);
+	  }
+	else
+	  {
+	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, un);
+	    mpn_incr_u (prodp + vn, cy);
+	  }
+
+	ASSERT (ws[WSALL] == 0xbabecafe);
+	ASSERT (ssssp[0] == 0xbeef);
+	TMP_FREE;
+	return prodp[un + vn - 1];
+      }
+
+    if (un * 5 > vn * 9)
+      mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+    else if (9 * un > 10 * vn)
+      mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+    else
+      mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+
+    ASSERT (ws[WSALL] == 0xbabecafe);
+    ASSERT (ssssp[0] == 0xbeef);
+    TMP_FREE;
+    return prodp[un + vn - 1];
+  }
 }
diff --git a/gmp/mpn/generic/mul_1.c b/gmp/mpn/generic/mul_1.c
index 6b2ee59a2c..b8290cc6af 100644
--- a/gmp/mpn/generic/mul_1.c
+++ b/gmp/mpn/generic/mul_1.c
@@ -1,33 +1,23 @@
 /* mpn_mul_1 -- Multiply a limb vector with a single limb and store the
    product in a second limb vector.
 
-Copyright 1991-1994, 1996, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/mul_basecase.c b/gmp/mpn/generic/mul_basecase.c
index 9309ef72c8..4f02545d57 100644
--- a/gmp/mpn/generic/mul_basecase.c
+++ b/gmp/mpn/generic/mul_basecase.c
@@ -4,33 +4,24 @@
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright 1991-1994, 1996, 1997, 2000-2002 Free Software Foundation, Inc.
+
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -42,7 +33,7 @@ see https://www.gnu.org/licenses/.  */
    Note that prodp gets usize+vsize limbs stored, even if the actual result
    only needs usize+vsize-1.
 
-   There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
+   There's no good reason to call here with vsize>=MUL_KARATSUBA_THRESHOLD.
    Currently this is allowed, but it might not be in the future.
 
    This is the most critical code for multiplication.  All multiplies rely
diff --git a/gmp/mpn/generic/mul_fft.c b/gmp/mpn/generic/mul_fft.c
index 5e763a3a73..836a89a001 100644
--- a/gmp/mpn/generic/mul_fft.c
+++ b/gmp/mpn/generic/mul_fft.c
@@ -6,33 +6,23 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 1998-2010, 2012, 2013 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* References:
@@ -70,79 +60,76 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 #ifdef WANT_ADDSUB
-#include "generic/add_n_sub_n.c"
-#define HAVE_NATIVE_mpn_add_n_sub_n 1
+#include "generic/addsub_n.c"
+#define HAVE_NATIVE_mpn_addsub_n 1
 #endif
 
-static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
-				       mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
-				       mp_size_t, mp_size_t, int **, mp_ptr, int);
-static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, mp_size_t, mp_size_t, mp_srcptr,
-				   mp_size_t, mp_size_t, mp_size_t, mp_ptr);
+static mp_limb_t mpn_mul_fft_internal
+__GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, int, int, mp_ptr *, mp_ptr *,
+	      mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr,
+	      int));
 
 
 /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
-   We have sqr=0 if for a multiply, sqr=1 for a square.
-   There are three generations of this code; we keep the old ones as long as
-   some gmp-mparam.h is not updated.  */
-
-
-/*****************************************************************************/
-
-#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
+   sqr==0 if for a multiply, sqr==1 for a square.
+   Don't declare it static since it is needed by tuneup.
+*/
+#ifdef MUL_FFT_TABLE2
 
-#ifndef FFT_TABLE3_SIZE		/* When tuning this is defined in gmp-impl.h */
-#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
-#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
-#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
+#if defined (MUL_FFT_TABLE2_SIZE) && defined (SQR_FFT_TABLE2_SIZE)
+#if MUL_FFT_TABLE2_SIZE > SQR_FFT_TABLE2_SIZE
+#define FFT_TABLE2_SIZE MUL_FFT_TABLE2_SIZE
 #else
-#define FFT_TABLE3_SIZE SQR_FFT_TABLE3_SIZE
-#endif
+#define FFT_TABLE2_SIZE SQR_FFT_TABLE2_SIZE
 #endif
 #endif
 
-#ifndef FFT_TABLE3_SIZE
-#define FFT_TABLE3_SIZE 200
+#ifndef FFT_TABLE2_SIZE
+#define FFT_TABLE2_SIZE 200
 #endif
 
-FFT_TABLE_ATTRS struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE] =
+/* FIXME: The format of this should change to need less space.
+   Perhaps put n and k in the same 32-bit word, with n shifted-down
+   (k-2) steps, and k using the 4-5 lowest bits.  That's possible since
+   n-1 is highly divisible.
+   Alternatively, separate n and k out into separate arrays.  */
+struct nk {
+  unsigned int n:27;
+  unsigned int k:5;
+};
+
+static struct nk mpn_fft_table2[2][FFT_TABLE2_SIZE] =
 {
-  MUL_FFT_TABLE3,
-  SQR_FFT_TABLE3
+  MUL_FFT_TABLE2,
+  SQR_FFT_TABLE2
 };
 
 int
 mpn_fft_best_k (mp_size_t n, int sqr)
 {
-  FFT_TABLE_ATTRS struct fft_table_nk *fft_tab, *tab;
-  mp_size_t tab_n, thres;
+  struct nk *tab;
   int last_k;
 
-  fft_tab = mpn_fft_table3[sqr];
-  last_k = fft_tab->k;
-  for (tab = fft_tab + 1; ; tab++)
+  last_k = 4;
+  for (tab = mpn_fft_table2[sqr] + 1; ; tab++)
     {
-      tab_n = tab->n;
-      thres = tab_n << last_k;
-      if (n <= thres)
+      if (n < tab->n)
 	break;
       last_k = tab->k;
     }
   return last_k;
 }
-
-#define MPN_FFT_BEST_READY 1
 #endif
 
-/*****************************************************************************/
-
-#if ! defined (MPN_FFT_BEST_READY)
+#if !defined (MUL_FFT_TABLE2) || TUNE_PROGRAM_BUILD
 FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] =
 {
   MUL_FFT_TABLE,
   SQR_FFT_TABLE
 };
+#endif
 
+#if !defined (MUL_FFT_TABLE2)
 int
 mpn_fft_best_k (mp_size_t n, int sqr)
 {
@@ -160,9 +147,6 @@ mpn_fft_best_k (mp_size_t n, int sqr)
 }
 #endif
 
-/*****************************************************************************/
-
-
 /* Returns smallest possible number of limbs >= pl for a fft of size 2^k,
    i.e. smallest multiple of 2^k >= pl.
 
@@ -196,97 +180,137 @@ mpn_fft_initl (int **l, int k)
     }
 }
 
+/* Shift {up, n} of cnt bits to the left, store the complemented result
+   in {rp, n}, and output the shifted bits (not complemented).
+   Same as:
+     cc = mpn_lshift (rp, up, n, cnt);
+     mpn_com_n (rp, rp, n);
+     return cc;
 
-/* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
+   Assumes n >= 1, 1 < cnt < GMP_NUMB_BITS, rp >= up.
+*/
+#ifndef HAVE_NATIVE_mpn_lshiftc
+#undef mpn_lshiftc
+static mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+      high_limb = low_limb << cnt;
+    }
+  *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+  return retval;
+}
+#endif
+
+/* r <- a*2^e mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
    Assumes a is semi-normalized, i.e. a[n] <= 1.
    r and a must have n+1 limbs, and not overlap.
 */
 static void
-mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n)
 {
-  unsigned int sh;
-  mp_size_t m;
+  int sh, negate;
   mp_limb_t cc, rd;
 
   sh = d % GMP_NUMB_BITS;
-  m = d / GMP_NUMB_BITS;
+  d /= GMP_NUMB_BITS;
+  negate = d >= n;
+  if (negate)
+    d -= n;
 
-  if (m >= n)			/* negate */
+  if (negate)
     {
-      /* r[0..m-1]  <-- lshift(a[n-m]..a[n-1], sh)
-	 r[m..n-1]  <-- -lshift(a[0]..a[n-m-1],  sh) */
-
-      m -= n;
+      /* r[0..d-1]  <-- lshift(a[n-d]..a[n-1], sh)
+	 r[d..n-1]  <-- -lshift(a[0]..a[n-d-1],  sh) */
       if (sh != 0)
 	{
 	  /* no out shift below since a[n] <= 1 */
-	  mpn_lshift (r, a + n - m, m + 1, sh);
-	  rd = r[m];
-	  cc = mpn_lshiftc (r + m, a, n - m, sh);
+	  mpn_lshift (r, a + n - d, d + 1, sh);
+	  rd = r[d];
+	  cc = mpn_lshiftc (r + d, a, n - d, sh);
 	}
       else
 	{
-	  MPN_COPY (r, a + n - m, m);
+	  MPN_COPY (r, a + n - d, d);
 	  rd = a[n];
-	  mpn_com (r + m, a, n - m);
+	  mpn_com_n (r + d, a, n - d);
 	  cc = 0;
 	}
 
-      /* add cc to r[0], and add rd to r[m] */
+      /* add cc to r[0], and add rd to r[d] */
 
-      /* now add 1 in r[m], subtract 1 in r[n], i.e. add 1 in r[0] */
+      /* now add 1 in r[d], subtract 1 in r[n], i.e. add 1 in r[0] */
 
       r[n] = 0;
       /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
       cc++;
       mpn_incr_u (r, cc);
 
-      rd++;
+      rd ++;
       /* rd might overflow when sh=GMP_NUMB_BITS-1 */
       cc = (rd == 0) ? 1 : rd;
-      r = r + m + (rd == 0);
+      r = r + d + (rd == 0);
       mpn_incr_u (r, cc);
+
+      return;
+    }
+
+  /* if negate=0,
+	r[0..d-1]  <-- -lshift(a[n-d]..a[n-1], sh)
+	r[d..n-1]  <-- lshift(a[0]..a[n-d-1],  sh)
+  */
+  if (sh != 0)
+    {
+      /* no out bits below since a[n] <= 1 */
+      mpn_lshiftc (r, a + n - d, d + 1, sh);
+      rd = ~r[d];
+      /* {r, d+1} = {a+n-d, d+1} << sh */
+      cc = mpn_lshift (r + d, a, n - d, sh); /* {r+d, n-d} = {a, n-d}<<sh */
     }
   else
     {
-      /* r[0..m-1]  <-- -lshift(a[n-m]..a[n-1], sh)
-	 r[m..n-1]  <-- lshift(a[0]..a[n-m-1],  sh)  */
-      if (sh != 0)
-	{
-	  /* no out bits below since a[n] <= 1 */
-	  mpn_lshiftc (r, a + n - m, m + 1, sh);
-	  rd = ~r[m];
-	  /* {r, m+1} = {a+n-m, m+1} << sh */
-	  cc = mpn_lshift (r + m, a, n - m, sh); /* {r+m, n-m} = {a, n-m}<<sh */
-	}
-      else
-	{
-	  /* r[m] is not used below, but we save a test for m=0 */
-	  mpn_com (r, a + n - m, m + 1);
-	  rd = a[n];
-	  MPN_COPY (r + m, a, n - m);
-	  cc = 0;
-	}
+      /* r[d] is not used below, but we save a test for d=0 */
+      mpn_com_n (r, a + n - d, d + 1);
+      rd = a[n];
+      MPN_COPY (r + d, a, n - d);
+      cc = 0;
+    }
 
-      /* now complement {r, m}, subtract cc from r[0], subtract rd from r[m] */
+  /* now complement {r, d}, subtract cc from r[0], subtract rd from r[d] */
 
-      /* if m=0 we just have r[0]=a[n] << sh */
-      if (m != 0)
-	{
-	  /* now add 1 in r[0], subtract 1 in r[m] */
-	  if (cc-- == 0) /* then add 1 to r[0] */
-	    cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
-	  cc = mpn_sub_1 (r, r, m, cc) + 1;
-	  /* add 1 to cc instead of rd since rd might overflow */
-	}
+  /* if d=0 we just have r[0]=a[n] << sh */
+  if (d != 0)
+    {
+      /* now add 1 in r[0], subtract 1 in r[d] */
+      if (cc-- == 0) /* then add 1 to r[0] */
+	cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
+      cc = mpn_sub_1 (r, r, d, cc) + 1;
+      /* add 1 to cc instead of rd since rd might overflow */
+    }
 
-      /* now subtract cc and rd from r[m..n] */
+  /* now subtract cc and rd from r[d..n] */
 
-      r[n] = -mpn_sub_1 (r + m, r + m, n - m, cc);
-      r[n] -= mpn_sub_1 (r + m, r + m, n - m, rd);
-      if (r[n] & GMP_LIMB_HIGHBIT)
-	r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
-    }
+  r[n] = -mpn_sub_1 (r + d, r + d, n - d, cc);
+  r[n] -= mpn_sub_1 (r + d, r + d, n - d, rd);
+  if (r[n] & GMP_LIMB_HIGHBIT)
+    r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
 }
 
 
@@ -294,7 +318,7 @@ mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
    Assumes a and b are semi-normalized.
 */
 static inline void
-mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
 {
   mp_limb_t c, x;
 
@@ -325,7 +349,7 @@ mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
    Assumes a and b are semi-normalized.
 */
 static inline void
-mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
 {
   mp_limb_t c, x;
 
@@ -363,8 +387,8 @@ mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
   if (K == 2)
     {
       mp_limb_t cy;
-#if HAVE_NATIVE_mpn_add_n_sub_n
-      cy = mpn_add_n_sub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
+#if HAVE_NATIVE_mpn_addsub_n
+      cy = mpn_addsub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
 #else
       MPN_COPY (tp, Ap[0], n + 1);
       mpn_add_n (Ap[0], Ap[0], Ap[inc], n + 1);
@@ -377,14 +401,14 @@ mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
     }
   else
     {
-      mp_size_t j, K2 = K >> 1;
+      int j;
       int *lk = *ll;
 
-      mpn_fft_fft (Ap,     K2, ll-1, 2 * omega, n, inc * 2, tp);
-      mpn_fft_fft (Ap+inc, K2, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap,     K >> 1, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap+inc, K >> 1, ll-1, 2 * omega, n, inc * 2, tp);
       /* A[2*j*inc]   <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
 	 A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
-      for (j = 0; j < K2; j++, lk += 2, Ap += 2 * inc)
+      for (j = 0; j < (K >> 1); j++, lk += 2, Ap += 2 * inc)
 	{
 	  /* Ap[inc] <- Ap[0] + Ap[inc] * 2^(lk[1] * omega)
 	     Ap[0]   <- Ap[0] + Ap[inc] * 2^(lk[0] * omega) */
@@ -429,7 +453,7 @@ mpn_fft_normalize (mp_ptr ap, mp_size_t n)
 
 /* a[i] <- a[i]*b[i] mod 2^(n*GMP_NUMB_BITS)+1 for 0 <= i < K */
 static void
-mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
+mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, int K)
 {
   int i;
   int sqr = (ap == bp);
@@ -439,13 +463,12 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
 
   if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
     {
-      mp_size_t K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
-      int k;
-      int **fft_l, *tmp;
+      int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+      int **_fft_l;
       mp_ptr *Ap, *Bp, A, B, T;
 
       k = mpn_fft_best_k (n, sqr);
-      K2 = (mp_size_t) 1 << k;
+      K2 = 1 << k;
       ASSERT_ALWAYS((n & (K2 - 1)) == 0);
       maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
       M2 = n * GMP_NUMB_BITS >> k;
@@ -457,10 +480,10 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
       /* we should ensure that nprime2 is a multiple of the next K */
       if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
 	{
-	  mp_size_t K3;
+	  unsigned long K3;
 	  for (;;)
 	    {
-	      K3 = (mp_size_t) 1 << mpn_fft_best_k (nprime2, sqr);
+	      K3 = 1L << mpn_fft_best_k (nprime2, sqr);
 	      if ((nprime2 & (K3 - 1)) == 0)
 		break;
 	      nprime2 = (nprime2 + K3 - 1) & -K3;
@@ -472,53 +495,41 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
 
       Mp2 = Nprime2 >> k;
 
-      Ap = TMP_BALLOC_MP_PTRS (K2);
-      Bp = TMP_BALLOC_MP_PTRS (K2);
-      A = TMP_BALLOC_LIMBS (2 * (nprime2 + 1) << k);
-      T = TMP_BALLOC_LIMBS (2 * (nprime2 + 1));
-      B = A + ((nprime2 + 1) << k);
-      fft_l = TMP_BALLOC_TYPE (k + 1, int *);
-      tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+      Ap = TMP_ALLOC_MP_PTRS (K2);
+      Bp = TMP_ALLOC_MP_PTRS (K2);
+      A = TMP_ALLOC_LIMBS (2 * K2 * (nprime2 + 1));
+      T = TMP_ALLOC_LIMBS (2 * (nprime2 + 1));
+      B = A + K2 * (nprime2 + 1);
+      _fft_l = TMP_ALLOC_TYPE (k + 1, int *);
       for (i = 0; i <= k; i++)
-	{
-	  fft_l[i] = tmp;
-	  tmp += (mp_size_t) 1 << i;
-	}
+	_fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+      mpn_fft_initl (_fft_l, k);
 
-      mpn_fft_initl (fft_l, k);
-
-      TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n,
+      TRACE (printf ("recurse: %ldx%ld limbs -> %d times %dx%d (%1.2f)\n", n,
 		    n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
       for (i = 0; i < K; i++, ap++, bp++)
 	{
-	  mp_limb_t cy;
 	  mpn_fft_normalize (*ap, n);
 	  if (!sqr)
 	    mpn_fft_normalize (*bp, n);
-
-	  mpn_mul_fft_decompose (A, Ap, K2, nprime2, *ap, (l << k) + 1, l, Mp2, T);
-	  if (!sqr)
-	    mpn_mul_fft_decompose (B, Bp, K2, nprime2, *bp, (l << k) + 1, l, Mp2, T);
-
-	  cy = mpn_mul_fft_internal (*ap, n, k, Ap, Bp, A, B, nprime2,
-				     l, Mp2, fft_l, T, sqr);
-	  (*ap)[n] = cy;
+	  mpn_mul_fft_internal (*ap, *ap, *bp, n, k, K2, Ap, Bp, A, B, nprime2,
+				l, Mp2, _fft_l, T, 1);
 	}
     }
   else
     {
       mp_ptr a, b, tp, tpn;
       mp_limb_t cc;
-      mp_size_t n2 = 2 * n;
-      tp = TMP_BALLOC_LIMBS (n2);
+      int n2 = 2 * n;
+      tp = TMP_ALLOC_LIMBS (n2);
       tpn = tp + n;
-      TRACE (printf ("  mpn_mul_n %ld of %ld limbs\n", K, n));
+      TRACE (printf ("  mpn_mul_n %d of %ld limbs\n", K, n));
       for (i = 0; i < K; i++)
 	{
 	  a = *ap++;
 	  b = *bp++;
 	  if (sqr)
-	    mpn_sqr (tp, a, n);
+	    mpn_sqr_n (tp, a, n);
 	  else
 	    mpn_mul_n (tp, b, a, n);
 	  if (a[n] != 0)
@@ -546,13 +557,13 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
    This condition is also fulfilled at exit.
 */
 static void
-mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp)
+mpn_fft_fftinv (mp_ptr *Ap, int K, mp_size_t omega, mp_size_t n, mp_ptr tp)
 {
   if (K == 2)
     {
       mp_limb_t cy;
-#if HAVE_NATIVE_mpn_add_n_sub_n
-      cy = mpn_add_n_sub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
+#if HAVE_NATIVE_mpn_addsub_n
+      cy = mpn_addsub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
 #else
       MPN_COPY (tp, Ap[0], n + 1);
       mpn_add_n (Ap[0], Ap[0], Ap[1], n + 1);
@@ -565,7 +576,7 @@ mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp
     }
   else
     {
-      mp_size_t j, K2 = K >> 1;
+      int j, K2 = K >> 1;
 
       mpn_fft_fftinv (Ap,      K2, 2 * omega, n, tp);
       mpn_fft_fftinv (Ap + K2, K2, 2 * omega, n, tp);
@@ -583,14 +594,15 @@ mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp
 }
 
 
-/* R <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
+/* A <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
 static void
-mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
+mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n)
 {
-  mp_bitcnt_t i;
+  int i;
 
   ASSERT (r != a);
-  i = (mp_bitcnt_t) 2 * n * GMP_NUMB_BITS - k;
+  i = 2 * n * GMP_NUMB_BITS;
+  i = (i - k) % i;		/* FIXME: This % looks superfluous */
   mpn_fft_mul_2exp_modF (r, a, i, n);
   /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */
   /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */
@@ -602,11 +614,13 @@ mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
    Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
    then {rp,n}=0.
 */
-static mp_size_t
+static int
 mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
 {
-  mp_size_t l, m, rpn;
+  mp_size_t l;
+  long int m;
   mp_limb_t cc;
+  int rpn;
 
   ASSERT ((n <= an) && (an <= 3 * n));
   m = an - 2 * n;
@@ -640,11 +654,10 @@ mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
    We must have nl <= 2*K*l.
 */
 static void
-mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
-		       mp_srcptr n, mp_size_t nl, mp_size_t l, mp_size_t Mp,
-		       mp_ptr T)
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n,
+		       mp_size_t nl, int l, int Mp, mp_ptr T)
 {
-  mp_size_t i, j;
+  int i, j;
   mp_ptr tmp;
   mp_size_t Kl = K * l;
   TMP_DECL;
@@ -655,7 +668,7 @@ mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
       mp_size_t dif = nl - Kl;
       mp_limb_signed_t cy;
 
-      tmp = TMP_BALLOC_LIMBS(Kl + 1);
+      tmp = TMP_ALLOC_LIMBS(Kl + 1);
 
       if (dif > Kl)
 	{
@@ -717,30 +730,48 @@ mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
 }
 
 /* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*GMP_NUMB_BITS
-   op is pl limbs, its high bit is returned.
+   n and m have respectively nl and ml limbs
+   op must have space for pl+1 limbs if rec=1 (and pl limbs if rec=0).
    One must have pl = mpn_fft_next_size (pl, k).
    T must have space for 2 * (nprime + 1) limbs.
+
+   If rec=0, then store only the pl low bits of the result, and return
+   the out carry.
 */
 
 static mp_limb_t
-mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
-		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+mpn_mul_fft_internal (mp_ptr op, mp_srcptr n, mp_srcptr m, mp_size_t pl,
+		      int k, int K,
+		      mp_ptr *Ap, mp_ptr *Bp,
+		      mp_ptr A, mp_ptr B,
 		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
-		      int **fft_l, mp_ptr T, int sqr)
+		      int **_fft_l,
+		      mp_ptr T, int rec)
 {
-  mp_size_t K, i, pla, lo, sh, j;
+  int i, sqr, pla, lo, sh, j;
   mp_ptr p;
   mp_limb_t cc;
 
-  K = (mp_size_t) 1 << k;
+  sqr = n == m;
+
+  TRACE (printf ("pl=%ld k=%d K=%d np=%ld l=%ld Mp=%ld rec=%d sqr=%d\n",
+		 pl,k,K,nprime,l,Mp,rec,sqr));
+
+  /* decomposition of inputs into arrays Ap[i] and Bp[i] */
+  if (rec)
+    {
+      mpn_mul_fft_decompose (A, Ap, K, nprime, n, K * l + 1, l, Mp, T);
+      if (!sqr)
+	mpn_mul_fft_decompose (B, Bp, K, nprime, m, K * l + 1, l, Mp, T);
+    }
 
   /* direct fft's */
-  mpn_fft_fft (Ap, K, fft_l + k, 2 * Mp, nprime, 1, T);
+  mpn_fft_fft (Ap, K, _fft_l + k, 2 * Mp, nprime, 1, T);
   if (!sqr)
-    mpn_fft_fft (Bp, K, fft_l + k, 2 * Mp, nprime, 1, T);
+    mpn_fft_fft (Bp, K, _fft_l + k, 2 * Mp, nprime, 1, T);
 
   /* term to term multiplications */
-  mpn_fft_mul_modF_K (Ap, sqr ? Ap : Bp, nprime, K);
+  mpn_fft_mul_modF_K (Ap, (sqr) ? Ap : Bp, nprime, K);
 
   /* inverse fft's */
   mpn_fft_fftinv (Ap, K, 2 * Mp, nprime, T);
@@ -804,14 +835,18 @@ mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
   /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
      < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
      < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
-  return mpn_fft_norm_modF (op, pl, p, pla);
+  i = mpn_fft_norm_modF (op, pl, p, pla);
+  if (rec) /* store the carry out */
+    op[pl] = i;
+
+  return i;
 }
 
 /* return the lcm of a and 2^k */
-static mp_bitcnt_t
-mpn_mul_fft_lcm (mp_bitcnt_t a, int k)
+static unsigned long int
+mpn_mul_fft_lcm (unsigned long int a, unsigned int k)
 {
-  mp_bitcnt_t l = k;
+  unsigned long int l = k;
 
   while (a % 2 == 0 && k > 0)
     {
@@ -828,11 +863,10 @@ mpn_mul_fft (mp_ptr op, mp_size_t pl,
 	     mp_srcptr m, mp_size_t ml,
 	     int k)
 {
-  int i;
-  mp_size_t K, maxLK;
+  int K, maxLK, i;
   mp_size_t N, Nprime, nprime, M, Mp, l;
   mp_ptr *Ap, *Bp, A, T, B;
-  int **fft_l, *tmp;
+  int **_fft_l;
   int sqr = (n == m && nl == ml);
   mp_limb_t h;
   TMP_DECL;
@@ -842,72 +876,63 @@ mpn_mul_fft (mp_ptr op, mp_size_t pl,
 
   TMP_MARK;
   N = pl * GMP_NUMB_BITS;
-  fft_l = TMP_BALLOC_TYPE (k + 1, int *);
-  tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+  _fft_l = TMP_ALLOC_TYPE (k + 1, int *);
   for (i = 0; i <= k; i++)
-    {
-      fft_l[i] = tmp;
-      tmp += (mp_size_t) 1 << i;
-    }
-
-  mpn_fft_initl (fft_l, k);
-  K = (mp_size_t) 1 << k;
+    _fft_l[i] = TMP_ALLOC_TYPE (1 << i, int);
+  mpn_fft_initl (_fft_l, k);
+  K = 1 << k;
   M = N >> k;	/* N = 2^k M */
   l = 1 + (M - 1) / GMP_NUMB_BITS;
-  maxLK = mpn_mul_fft_lcm (GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
 
   Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
   /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
   nprime = Nprime / GMP_NUMB_BITS;
-  TRACE (printf ("N=%ld K=%ld, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n",
+  TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%d, Np=%ld, np=%ld\n",
 		 N, K, M, l, maxLK, Nprime, nprime));
   /* we should ensure that recursively, nprime is a multiple of the next K */
   if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
     {
-      mp_size_t K2;
+      unsigned long K2;
       for (;;)
 	{
-	  K2 = (mp_size_t) 1 << mpn_fft_best_k (nprime, sqr);
+	  K2 = 1L << mpn_fft_best_k (nprime, sqr);
 	  if ((nprime & (K2 - 1)) == 0)
 	    break;
 	  nprime = (nprime + K2 - 1) & -K2;
 	  Nprime = nprime * GMP_LIMB_BITS;
 	  /* warning: since nprime changed, K2 may change too! */
 	}
-      TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+      TRACE (printf ("new maxLK=%d, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
     }
   ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
 
-  T = TMP_BALLOC_LIMBS (2 * (nprime + 1));
+  T = TMP_ALLOC_LIMBS (2 * (nprime + 1));
   Mp = Nprime >> k;
 
-  TRACE (printf ("%ldx%ld limbs -> %ld times %ldx%ld limbs (%1.2f)\n",
+  TRACE (printf ("%ldx%ld limbs -> %d times %ldx%ld limbs (%1.2f)\n",
 		pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K);
 	 printf ("   temp space %ld\n", 2 * K * (nprime + 1)));
 
-  A = TMP_BALLOC_LIMBS (K * (nprime + 1));
-  Ap = TMP_BALLOC_MP_PTRS (K);
+  A = __GMP_ALLOCATE_FUNC_LIMBS (2 * K * (nprime + 1));
+  B = A + K * (nprime + 1);
+  Ap = TMP_ALLOC_MP_PTRS (K);
+  Bp = TMP_ALLOC_MP_PTRS (K);
+
+  /* special decomposition for main call */
+  /* nl is the number of significant limbs in n */
   mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
-  if (sqr)
-    {
-      mp_size_t pla;
-      pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
-      B = TMP_BALLOC_LIMBS (pla);
-      Bp = TMP_BALLOC_MP_PTRS (K);
-    }
-  else
-    {
-      B = TMP_BALLOC_LIMBS (K * (nprime + 1));
-      Bp = TMP_BALLOC_MP_PTRS (K);
-      mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
-    }
-  h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
+  if (n != m)
+    mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
+
+  h = mpn_mul_fft_internal (op, n, m, pl, k, K, Ap, Bp, A, B, nprime, l, Mp, _fft_l, T, 0);
 
   TMP_FREE;
+  __GMP_FREE_FUNC_LIMBS (A, 2 * K * (nprime + 1));
+
   return h;
 }
 
-#if WANT_OLD_FFT_FULL
 /* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */
 void
 mpn_mul_fft_full (mp_ptr op,
@@ -916,9 +941,9 @@ mpn_mul_fft_full (mp_ptr op,
 {
   mp_ptr pad_op;
   mp_size_t pl, pl2, pl3, l;
-  mp_size_t cc, c2, oldcc;
   int k2, k3;
   int sqr = (n == m && nl == ml);
+  int cc, c2, oldcc;
 
   pl = nl + ml; /* total number of limbs of the result */
 
@@ -935,7 +960,7 @@ mpn_mul_fft_full (mp_ptr op,
   pl2 = (2 * pl - 1) / 5; /* ceil (2pl/5) - 1 */
   do
     {
-      pl2++;
+      pl2 ++;
       k2 = mpn_fft_best_k (pl2, sqr); /* best fft size for pl2 limbs */
       pl2 = mpn_fft_next_size (pl2, k2);
       pl3 = 3 * pl2 / 2; /* since k>=FFT_FIRST_K=4, pl2 is a multiple of 2^4,
@@ -949,23 +974,23 @@ mpn_mul_fft_full (mp_ptr op,
 
   ASSERT_ALWAYS(pl3 <= pl);
   cc = mpn_mul_fft (op, pl3, n, nl, m, ml, k3);     /* mu */
-  ASSERT(cc == 0);
+  ASSERT_ALWAYS(cc == 0);
   pad_op = __GMP_ALLOCATE_FUNC_LIMBS (pl2);
   cc = mpn_mul_fft (pad_op, pl2, n, nl, m, ml, k2); /* lambda */
   cc = -cc + mpn_sub_n (pad_op, pad_op, op, pl2);    /* lambda - low(mu) */
   /* 0 <= cc <= 1 */
-  ASSERT(0 <= cc && cc <= 1);
+  ASSERT_ALWAYS(0 <= cc && cc <= 1);
   l = pl3 - pl2; /* l = pl2 / 2 since pl3 = 3/2 * pl2 */
   c2 = mpn_add_n (pad_op, pad_op, op + pl2, l);
   cc = mpn_add_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2) - cc;
-  ASSERT(-1 <= cc && cc <= 1);
+  ASSERT_ALWAYS(-1 <= cc && cc <= 1);
   if (cc < 0)
     cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
-  ASSERT(0 <= cc && cc <= 1);
+  ASSERT_ALWAYS(0 <= cc && cc <= 1);
   /* now lambda-mu = {pad_op, pl2} - cc mod 2^(pl2*GMP_NUMB_BITS)+1 */
   oldcc = cc;
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  c2 = mpn_add_n_sub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
+#if HAVE_NATIVE_mpn_addsub_n
+  c2 = mpn_addsub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
   /* c2 & 1 is the borrow, c2 & 2 is the carry */
   cc += c2 >> 1; /* carry out from high <- low + high */
   c2 = c2 & 1; /* borrow out from low <- low - high */
@@ -975,7 +1000,7 @@ mpn_mul_fft_full (mp_ptr op,
     TMP_DECL;
 
     TMP_MARK;
-    tmp = TMP_BALLOC_LIMBS (l);
+    tmp = TMP_ALLOC_LIMBS (l);
     MPN_COPY (tmp, pad_op, l);
     c2 = mpn_sub_n (pad_op,      pad_op, pad_op + l, l);
     cc += mpn_add_n (pad_op + l, tmp,    pad_op + l, l);
@@ -1011,4 +1036,3 @@ mpn_mul_fft_full (mp_ptr op,
   /* since the final result has at most pl limbs, no carry out below */
   mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
 }
-#endif
diff --git a/gmp/mpn/generic/mul_n.c b/gmp/mpn/generic/mul_n.c
index 5df8b16fa0..4aa25f9b58 100644
--- a/gmp/mpn/generic/mul_n.c
+++ b/gmp/mpn/generic/mul_n.c
@@ -1,38 +1,695 @@
-/* mpn_mul_n -- multiply natural numbers.
+/* mpn_mul_n and helper function -- Multiply/square natural numbers.
 
-Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
-Foundation, Inc.
+   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n) ARE
+   INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH
+   DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE
+   OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
+
+/* Multiplies using 3 half-sized mults and so on recursively.
+ * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
+ * No overlap of p[...] with a[...] or b[...].
+ * ws is workspace.
+ */
+
+void
+mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
+{
+  mp_limb_t w, w0, w1;
+  mp_size_t n2;
+  mp_srcptr x, y;
+  mp_size_t i;
+  int sign;
+
+  n2 = n >> 1;
+  ASSERT (n2 > 0);
+
+  if ((n & 1) != 0)
+    {
+      /* Odd length. */
+      mp_size_t n1, n3, nm1;
+
+      n3 = n - n2;
+
+      sign = 0;
+      w = a[n2];
+      if (w != 0)
+	w -= mpn_sub_n (p, a, a + n3, n2);
+      else
+	{
+	  i = n2;
+	  do
+	    {
+	      --i;
+	      w0 = a[i];
+	      w1 = a[n3 + i];
+	    }
+	  while (w0 == w1 && i != 0);
+	  if (w0 < w1)
+	    {
+	      x = a + n3;
+	      y = a;
+	      sign = ~0;
+	    }
+	  else
+	    {
+	      x = a;
+	      y = a + n3;
+	    }
+	  mpn_sub_n (p, x, y, n2);
+	}
+      p[n2] = w;
+
+      w = b[n2];
+      if (w != 0)
+	w -= mpn_sub_n (p + n3, b, b + n3, n2);
+      else
+	{
+	  i = n2;
+	  do
+	    {
+	      --i;
+	      w0 = b[i];
+	      w1 = b[n3 + i];
+	    }
+	  while (w0 == w1 && i != 0);
+	  if (w0 < w1)
+	    {
+	      x = b + n3;
+	      y = b;
+	      sign = ~sign;
+	    }
+	  else
+	    {
+	      x = b;
+	      y = b + n3;
+	    }
+	  mpn_sub_n (p + n3, x, y, n2);
+	}
+      p[n] = w;
+
+      n1 = n + 1;
+      if (n2 < MUL_KARATSUBA_THRESHOLD)
+	{
+	  if (n3 < MUL_KARATSUBA_THRESHOLD)
+	    {
+	      mpn_mul_basecase (ws, p, n3, p + n3, n3);
+	      mpn_mul_basecase (p, a, n3, b, n3);
+	    }
+	  else
+	    {
+	      mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+	      mpn_kara_mul_n (p, a, b, n3, ws + n1);
+	    }
+	  mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);
+	}
+      else
+	{
+	  mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+	  mpn_kara_mul_n (p, a, b, n3, ws + n1);
+	  mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);
+	}
+
+      if (sign)
+	mpn_add_n (ws, p, ws, n1);
+      else
+	mpn_sub_n (ws, p, ws, n1);
+
+      nm1 = n - 1;
+      if (mpn_add_n (ws, p + n1, ws, nm1))
+	{
+	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;
+	  ws[nm1] = x;
+	  if (x == 0)
+	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;
+	}
+      if (mpn_add_n (p + n3, p + n3, ws, n1))
+	{
+	  mpn_incr_u (p + n1 + n3, 1);
+	}
+    }
+  else
+    {
+      /* Even length. */
+      i = n2;
+      do
+	{
+	  --i;
+	  w0 = a[i];
+	  w1 = a[n2 + i];
+	}
+      while (w0 == w1 && i != 0);
+      sign = 0;
+      if (w0 < w1)
+	{
+	  x = a + n2;
+	  y = a;
+	  sign = ~0;
+	}
+      else
+	{
+	  x = a;
+	  y = a + n2;
+	}
+      mpn_sub_n (p, x, y, n2);
+
+      i = n2;
+      do
+	{
+	  --i;
+	  w0 = b[i];
+	  w1 = b[n2 + i];
+	}
+      while (w0 == w1 && i != 0);
+      if (w0 < w1)
+	{
+	  x = b + n2;
+	  y = b;
+	  sign = ~sign;
+	}
+      else
+	{
+	  x = b;
+	  y = b + n2;
+	}
+      mpn_sub_n (p + n2, x, y, n2);
+
+      /* Pointwise products. */
+      if (n2 < MUL_KARATSUBA_THRESHOLD)
+	{
+	  mpn_mul_basecase (ws, p, n2, p + n2, n2);
+	  mpn_mul_basecase (p, a, n2, b, n2);
+	  mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);
+	}
+      else
+	{
+	  mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);
+	  mpn_kara_mul_n (p, a, b, n2, ws + n);
+	  mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);
+	}
+
+      /* Interpolate. */
+      if (sign)
+	w = mpn_add_n (ws, p, ws, n);
+      else
+	w = -mpn_sub_n (ws, p, ws, n);
+      w += mpn_add_n (ws, p + n, ws, n);
+      w += mpn_add_n (p + n2, p + n2, ws, n);
+      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);
+    }
+}
+
+void
+mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
+{
+  mp_limb_t w, w0, w1;
+  mp_size_t n2;
+  mp_srcptr x, y;
+  mp_size_t i;
+
+  n2 = n >> 1;
+  ASSERT (n2 > 0);
+
+  if ((n & 1) != 0)
+    {
+      /* Odd length. */
+      mp_size_t n1, n3, nm1;
+
+      n3 = n - n2;
+
+      w = a[n2];
+      if (w != 0)
+	w -= mpn_sub_n (p, a, a + n3, n2);
+      else
+	{
+	  i = n2;
+	  do
+	    {
+	      --i;
+	      w0 = a[i];
+	      w1 = a[n3 + i];
+	    }
+	  while (w0 == w1 && i != 0);
+	  if (w0 < w1)
+	    {
+	      x = a + n3;
+	      y = a;
+	    }
+	  else
+	    {
+	      x = a;
+	      y = a + n3;
+	    }
+	  mpn_sub_n (p, x, y, n2);
+	}
+      p[n2] = w;
+
+      n1 = n + 1;
+
+      /* n2 is always either n3 or n3-1 so maybe the two sets of tests here
+	 could be combined.  But that's not important, since the tests will
+	 take a miniscule amount of time compared to the function calls.  */
+      if (BELOW_THRESHOLD (n3, SQR_BASECASE_THRESHOLD))
+	{
+	  mpn_mul_basecase (ws, p, n3, p, n3);
+	  mpn_mul_basecase (p,  a, n3, a, n3);
+	}
+      else if (BELOW_THRESHOLD (n3, SQR_KARATSUBA_THRESHOLD))
+	{
+	  mpn_sqr_basecase (ws, p, n3);
+	  mpn_sqr_basecase (p,  a, n3);
+	}
+      else
+	{
+	  mpn_kara_sqr_n   (ws, p, n3, ws + n1);	 /* (x-y)^2 */
+	  mpn_kara_sqr_n   (p,  a, n3, ws + n1);	 /* x^2	    */
+	}
+      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))
+	mpn_mul_basecase (p + n1, a + n3, n2, a + n3, n2);
+      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))
+	mpn_sqr_basecase (p + n1, a + n3, n2);
+      else
+	mpn_kara_sqr_n   (p + n1, a + n3, n2, ws + n1);	 /* y^2	    */
+
+      /* Since x^2+y^2-(x-y)^2 = 2xy >= 0 there's no need to track the
+	 borrow from mpn_sub_n.	 If it occurs then it'll be cancelled by a
+	 carry from ws[n].  Further, since 2xy fits in n1 limbs there won't
+	 be any carry out of ws[n] other than cancelling that borrow. */
+
+      mpn_sub_n (ws, p, ws, n1);	     /* x^2-(x-y)^2 */
+
+      nm1 = n - 1;
+      if (mpn_add_n (ws, p + n1, ws, nm1))   /* x^2+y^2-(x-y)^2 = 2xy */
+	{
+	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;
+	  ws[nm1] = x;
+	  if (x == 0)
+	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;
+	}
+      if (mpn_add_n (p + n3, p + n3, ws, n1))
+	{
+	  mpn_incr_u (p + n1 + n3, 1);
+	}
+    }
+  else
+    {
+      /* Even length. */
+      i = n2;
+      do
+	{
+	  --i;
+	  w0 = a[i];
+	  w1 = a[n2 + i];
+	}
+      while (w0 == w1 && i != 0);
+      if (w0 < w1)
+	{
+	  x = a + n2;
+	  y = a;
+	}
+      else
+	{
+	  x = a;
+	  y = a + n2;
+	}
+      mpn_sub_n (p, x, y, n2);
+
+      /* Pointwise products. */
+      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))
+	{
+	  mpn_mul_basecase (ws,    p,      n2, p,      n2);
+	  mpn_mul_basecase (p,     a,      n2, a,      n2);
+	  mpn_mul_basecase (p + n, a + n2, n2, a + n2, n2);
+	}
+      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))
+	{
+	  mpn_sqr_basecase (ws,    p,      n2);
+	  mpn_sqr_basecase (p,     a,      n2);
+	  mpn_sqr_basecase (p + n, a + n2, n2);
+	}
+      else
+	{
+	  mpn_kara_sqr_n (ws,    p,      n2, ws + n);
+	  mpn_kara_sqr_n (p,     a,      n2, ws + n);
+	  mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
+	}
+
+      /* Interpolate. */
+      w = -mpn_sub_n (ws, p, ws, n);
+      w += mpn_add_n (ws, p + n, ws, n);
+      w += mpn_add_n (p + n2, p + n2, ws, n);
+      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);
+    }
+}
+
+/******************************************************************************
+ *                                                                            *
+ *              Toom 3-way multiplication and squaring                        *
+ *                                                                            *
+ *****************************************************************************/
+
+/* Starts from:
+   {v0,2k}    (stored in {c,2k})
+   {vm1,2k+1} (which sign is sa, and absolute value is stored in {vm1,2k+1})
+   {v1,2k+1}  (stored in {c+2k,2k+1})
+   {v2,2k+1}
+   {vinf,twor}  (stored in {c+4k,twor}, except the first limb, saved in vinf0)
+
+   ws is temporary space, and should have at least twor limbs.
+
+   put in {c, 2n} where n = 2k+twor the value of {v0,2k} (already in place)
+   + B^k * {tm1, 2k+1}
+   + B^(2k) * {t1, 2k+1}
+   + B^(3k) * {t2, 2k+1}
+   + B^(4k) * {vinf,twor} (high twor-1 limbs already in place)
+   where {t1, 2k+1} = ({v1, 2k+1} + sa * {vm1, 2k+1}- 2*{v0,2k})/2-*{vinf,twor}
+	 {t2, 2k+1} = (3*({v1,2k+1}-{v0,2k})-sa*{vm1,2k+1}+{v2,2k+1})/6-2*{vinf,twor}
+	 {tm1,2k+1} = ({v1,2k+1}-sa*{vm1,2k+1}/2-{t2,2k+1}
+
+   Exact sequence described in a comment in mpn_toom3_mul_n.
+   mpn_toom3_mul_n() and mpn_toom3_sqr_n() implement steps 1-2.
+   mpn_toom_interpolate_5pts() implements steps 3-4.
+
+   Reference: What About Toom-Cook Matrices Optimality? Marco Bodrato
+   and Alberto Zanoni, October 19, 2006, http://bodrato.it/papers/#CIVV2006
+
+   ************* saved note ****************
+   Think about:
+
+   The evaluated point a-b+c stands a good chance of having a zero carry
+   limb, a+b+c would have a 1/4 chance, and 4*a+2*b+c a 1/8 chance, roughly.
+   Perhaps this could be tested and stripped.  Doing so before recursing
+   would be better than stripping at the start of mpn_toom3_mul_n/sqr_n,
+   since then the recursion could be based on the new size.  Although in
+   truth the kara vs toom3 crossover is never so exact that one limb either
+   way makes a difference.
+
+   A small value like 1 or 2 for the carry could perhaps also be handled
+   with an add_n or addlsh1_n.  Would that be faster than an extra limb on a
+   (recursed) multiply/square?
+*/
+
+#define TOOM3_MUL_REC(p, a, b, n, ws) \
+  do {								\
+    if (MUL_TOOM3_THRESHOLD / 3 < MUL_KARATSUBA_THRESHOLD	\
+	&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))	\
+      mpn_mul_basecase (p, a, n, b, n);				\
+    else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))		\
+      mpn_kara_mul_n (p, a, b, n, ws);				\
+    else							\
+      mpn_toom3_mul_n (p, a, b, n, ws);				\
+  } while (0)
+
+#define TOOM3_SQR_REC(p, a, n, ws)				\
+  do {								\
+    if (SQR_TOOM3_THRESHOLD / 3 < SQR_BASECASE_THRESHOLD	\
+	&& BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))		\
+      mpn_mul_basecase (p, a, n, a, n);				\
+    else if (SQR_TOOM3_THRESHOLD / 3 < SQR_KARATSUBA_THRESHOLD	\
+	&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))	\
+      mpn_sqr_basecase (p, a, n);				\
+    else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
+      mpn_kara_sqr_n (p, a, n, ws);				\
+    else							\
+      mpn_toom3_sqr_n (p, a, n, ws);				\
+  } while (0)
+
+/* The necessary temporary space T(n) satisfies T(n)=0 for n < THRESHOLD,
+   and T(n) <= max(2n+2, 6k+3, 4k+3+T(k+1)) otherwise, where k = ceil(n/3).
+
+   Assuming T(n) >= 2n, 6k+3 <= 4k+3+T(k+1).
+   Similarly, 2n+2 <= 6k+2 <= 4k+3+T(k+1).
+
+   With T(n) = 2n+S(n), this simplifies to S(n) <= 9 + S(k+1).
+   Since THRESHOLD >= 17, we have n/(k+1) >= 19/8
+   thus S(n) <= S(n/(19/8)) + 9 thus S(n) <= 9*log(n)/log(19/8) <= 8*log2(n).
+*/
+
+void
+mpn_toom3_mul_n (mp_ptr c, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr t)
+{
+  mp_size_t k, k1, kk1, r, twok, twor;
+  mp_limb_t cy, cc, saved, vinf0;
+  mp_ptr trec;
+  int sa, sb;
+  mp_ptr c1, c2, c3, c4, c5;
+
+  ASSERT(GMP_NUMB_BITS >= 6);
+  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */
+
+  /*
+  The algorithm is the following:
+
+  0. k = ceil(n/3), r = n - 2k, B = 2^(GMP_NUMB_BITS), t = B^k
+  1. split a and b in three parts each a0, a1, a2 and b0, b1, b2
+     with a0, a1, b0, b1 of k limbs, and a2, b2 of r limbs
+  2. Evaluation: vm1 may be negative, the other can not.
+     v0   <- a0*b0
+     v1   <- (a0+a1+a2)*(b0+b1+b2)
+     v2   <- (a0+2*a1+4*a2)*(b0+2*b1+4*b2)
+     vm1  <- (a0-a1+a2)*(b0-b1+b2)
+     vinf <- a2*b2
+  3. Interpolation: every result is positive, all divisions are exact
+     t2   <- (v2 - vm1)/3
+     tm1  <- (v1 - vm1)/2
+     t1   <- (v1 - v0)
+     t2   <- (t2 - t1)/2
+     t1   <- (t1 - tm1 - vinf)
+     t2   <- (t2 - 2*vinf)
+     tm1  <- (tm1 - t2)
+  4. result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where
+     c0   <- v0
+     c1   <- tm1
+     c2   <- t1
+     c3   <- t2
+     c4   <- vinf
+  */
+
+  k = (n + 2) / 3; /* ceil(n/3) */
+  twok = 2 * k;
+  k1 = k + 1;
+  kk1 = k + k1;
+  r = n - twok;   /* last chunk */
+  twor = 2 * r;
+
+  c1 = c + k;
+  c2 = c1 + k;
+  c3 = c2 + k;
+  c4 = c3 + k;
+  c5 = c4 + k;
+
+  trec = t + 4 * k + 3; /* trec = v2 + (2k+2) */
+
+  /* put a0+a2 in {c, k+1}, and b0+b2 in {c+4k+2, k+1};
+     put a0+a1+a2 in {t, k+1} and b0+b1+b2 in {t+k+1,k+1}
+     [????requires 5k+3 <= 2n, ie. n >= 9] */
+  cy = mpn_add_n (c,      a, a + twok, r);
+  cc = mpn_add_n (c4 + 2, b, b + twok, r);
+  if (r < k)
+    {
+      __GMPN_ADD_1 (cy, c + r,      a + r, k - r, cy);
+      __GMPN_ADD_1 (cc, c4 + 2 + r, b + r, k - r, cc);
+    }
+
+  /* Put in {t, k+1} the sum
+   * (a_0+a_2) - stored in {c, k+1} -
+   * +
+   * a_1       - stored in {a+k, k} */
+  t[k] = (c1[0] = cy) + mpn_add_n (t, c, a + k, k);
+  /*          ^              ^
+   * carry of a_0 + a_2    carry of (a_0+a_2) + a_1
+
+   */
+
+  /* Put in {t+k+1, k+1} the sum of the two values
+   * (b_0+b_2) - stored in {c1+1, k+1} -
+   * +
+   * b_1       - stored in {b+k, k} */
+  t[kk1] = (c5[3] = cc) + mpn_add_n (t + k1, c4 + 2, b + k, k);
+  /*          ^              ^
+   * carry of b_0 + b_2    carry of (b_0+b_2) + b_1 */
+
+#define v2 (t+2*k+1)
+
+  /* compute v1 := (a0+a1+a2)*(b0+b1+b2) in {t, 2k+1};
+     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */
+  TOOM3_MUL_REC (c2, t, t + k1, k1, trec);
+
+  /*   c         c2    c4                 t
+     {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+		 v1                                            */
+
+  /* put |a0-a1+a2| in {c, k+1} and |b0-b1+b2| in {c+4k+2,k+1} */
+  /* (They're already there, actually)                         */
+
+  /* sa = sign(a0-a1+a2) */
+  sa   = (cy != 0) ? 1 : mpn_cmp (c, a + k, k);
+  c[k] = (sa >= 0) ? cy - mpn_sub_n (c, c, a + k, k)
+		   : mpn_sub_n (c, a + k, c, k);
+
+  sb    = (cc != 0) ? 1 : mpn_cmp (c4 + 2, b + k, k);
+  c5[2] = (sb >= 0) ? cc - mpn_sub_n (c4 + 2, c4 + 2, b + k, k)
+		    : mpn_sub_n (c4 + 2, b + k, c4 + 2, k);
+  sa *= sb; /* sign of vm1 */
+
+  /* compute vm1 := (a0-a1+a2)*(b0-b1+b2) in {t, 2k+1};
+     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */
+  TOOM3_MUL_REC (t, c, c4 + 2, k1, trec);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+		v1                      vm1
+  */
+
+  /* compute a0+2a1+4a2 in {c, k+1} and b0+2b1+4b2 in {c+4k+2, k+1}
+     [requires 5k+3 <= 2n, i.e. n >= 17] */
+#ifdef HAVE_NATIVE_mpn_addlsh1_n
+  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
+  c5[2] = mpn_addlsh1_n (c4 + 2, b + k, b + twok, r);
+  if (r < k)
+    {
+      __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
+      __GMPN_ADD_1 (c5[2], c4 + 2 + r, b + k + r, k - r, c5[2]);
+    }
+  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
+  c5[2] = 2 * c5[2] + mpn_addlsh1_n (c4 + 2, b, c4 + 2, k);
+#else
+  c[r] = mpn_lshift (c, a + twok, r, 1);
+  c4[r + 2] = mpn_lshift (c4 + 2, b + twok, r, 1);
+  if (r < k)
+    {
+      MPN_ZERO(c + r + 1, k - r);
+      MPN_ZERO(c4 + r + 3, k - r);
+    }
+  c1[0] += mpn_add_n (c, c, a + k, k);
+  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b + k, k);
+  mpn_lshift (c, c, k1, 1);
+  mpn_lshift (c4 + 2, c4 + 2, k1, 1);
+  c1[0] += mpn_add_n (c, c, a, k);
+  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b, k);
+#endif
+
+  /* compute v2 := (a0+2a1+4a2)*(b0+2b1+4b2) in {t+2k+1, 2k+1}
+     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */
+  TOOM3_MUL_REC (v2, c, c4 + 2, k1, trec);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+		v1                      vm1         v2
+  */
+
+  /* compute v0 := a0*b0 in {c, 2k} */
+  TOOM3_MUL_REC (c, a, b, k, trec);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1                      vm1       v2                   */
+
+  /* compute vinf := a2*b2 in {t+4k+2, 2r}: in {c4, 2r} */
+
+  saved = c4[0];              /* Remember v1's highest byte (will be overwritten). */
+  TOOM3_MUL_REC (c4, a + twok, b + twok, r, trec);           /* Overwrites c4[0].  */
+  vinf0 = c4[0];              /* Remember vinf's lowest byte (will be overwritten).*/
+  c4[0] = saved;              /* Overwriting. Now v1 value is correct.             */
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1       vinf[1..]      vm1       v2               */
+
+  mpn_toom_interpolate_5pts (c, v2, t, k, 2*r, sa, vinf0, trec);
+
+#undef v2
+}
+
+void
+mpn_toom3_sqr_n (mp_ptr c, mp_srcptr a, mp_size_t n, mp_ptr t)
+{
+  mp_size_t k, k1, kk1, r, twok, twor;
+  mp_limb_t cy, saved, vinf0;
+  mp_ptr trec;
+  int sa;
+  mp_ptr c1, c2, c3, c4;
+
+  ASSERT(GMP_NUMB_BITS >= 6);
+  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */
+
+  /* the algorithm is the same as mpn_toom3_mul_n, with b=a */
+
+  k = (n + 2) / 3; /* ceil(n/3) */
+  twok = 2 * k;
+  k1 = k + 1;
+  kk1 = k + k1;
+  r = n - twok;   /* last chunk */
+  twor = 2 * r;
+
+  c1 = c + k;
+  c2 = c1 + k;
+  c3 = c2 + k;
+  c4 = c3 + k;
+
+  trec = t + 4 * k + 3; /* trec = v2 + (2k+2) */
+
+  cy = mpn_add_n (c, a, a + twok, r);
+  if (r < k)
+    __GMPN_ADD_1 (cy, c + r, a + r, k - r, cy);
+  t[k] = (c1[0] = cy) + mpn_add_n (t, c, a + k, k);
+
+#define v2 (t+2*k+1)
+
+  TOOM3_SQR_REC (c2, t, k1, trec);
+
+  sa = (cy != 0) ? 1 : mpn_cmp (c, a + k, k);
+  c[k] = (sa >= 0) ? cy - mpn_sub_n (c, c, a + k, k)
+    : mpn_sub_n (c, a + k, c, k);
+
+  TOOM3_SQR_REC (t, c, k1, trec);
+
+#ifdef HAVE_NATIVE_mpn_addlsh1_n
+  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
+  if (r < k)
+    __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
+  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
+#else
+  c[r] = mpn_lshift (c, a + twok, r, 1);
+  if (r < k)
+    MPN_ZERO(c + r + 1, k - r);
+  c1[0] += mpn_add_n (c, c, a + k, k);
+  mpn_lshift (c, c, k1, 1);
+  c1[0] += mpn_add_n (c, c, a, k);
+#endif
+
+  TOOM3_SQR_REC (v2, c, k1, trec);
+
+  TOOM3_SQR_REC (c, a, k, trec);
+
+  saved = c4[0];
+  TOOM3_SQR_REC (c4, a + twok, r, trec);
+  vinf0 = c4[0];
+  c4[0] = saved;
+
+  mpn_toom_interpolate_5pts (c, v2, t, k, 2*r,  1, vinf0, trec);
+
+#undef v2
+}
+
 void
 mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
 {
@@ -40,28 +697,31 @@ mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
   ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
   ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));
 
-  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+  if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))
     {
       mpn_mul_basecase (p, a, n, b, n);
     }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))
+  else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))
     {
       /* Allocate workspace of fixed size on stack: fast! */
-      mp_limb_t ws[mpn_toom22_mul_itch (MUL_TOOM33_THRESHOLD_LIMIT-1,
-					MUL_TOOM33_THRESHOLD_LIMIT-1)];
-      ASSERT (MUL_TOOM33_THRESHOLD <= MUL_TOOM33_THRESHOLD_LIMIT);
-      mpn_toom22_mul (p, a, n, b, n, ws);
+      mp_limb_t ws[MPN_KARA_MUL_N_TSIZE (MUL_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (MUL_TOOM3_THRESHOLD <= MUL_TOOM3_THRESHOLD_LIMIT);
+      mpn_kara_mul_n (p, a, b, n, ws);
     }
   else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))
     {
       mp_ptr ws;
       TMP_SDECL;
       TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
-      mpn_toom33_mul (p, a, n, b, n, ws);
+      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (n));
+      mpn_toom3_mul_n (p, a, b, n, ws);
       TMP_SFREE;
     }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+#else
+  else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
+#endif
     {
       mp_ptr ws;
       TMP_SDECL;
@@ -70,28 +730,91 @@ mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
       mpn_toom44_mul (p, a, n, b, n, ws);
       TMP_SFREE;
     }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+  else
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+    {
+      /* The current FFT code allocates its own space.  That should probably
+	 change.  */
+      mpn_mul_fft_full (p, a, n, b, n);
+    }
+#else
+    {
+      /* Toom4 for large operands.  */
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_BALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
+      mpn_toom44_mul (p, a, n, b, n, ws);
+      TMP_FREE;
+    }
+#endif
+}
+
+void
+mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+
+#if 0
+  /* FIXME: Can this be removed? */
+  if (n == 0)
+    return;
+#endif
+
+  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
+      mpn_mul_basecase (p, a, n, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))
+    {
+      mpn_sqr_basecase (p, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
+      mpn_kara_sqr_n (p, a, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
     {
       mp_ptr ws;
       TMP_SDECL;
       TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom6_mul_n_itch (n));
-      mpn_toom6h_mul (p, a, n, b, n, ws);
+      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
+      mpn_toom3_sqr_n (p, a, n, ws);
       TMP_SFREE;
     }
-  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+#else
+  else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
+#endif
     {
       mp_ptr ws;
-      TMP_DECL;
-      TMP_MARK;
-      ws = TMP_ALLOC_LIMBS (mpn_toom8_mul_n_itch (n));
-      mpn_toom8h_mul (p, a, n, b, n, ws);
-      TMP_FREE;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_SFREE;
     }
   else
+#if WANT_FFT || TUNE_PROGRAM_BUILD
     {
       /* The current FFT code allocates its own space.  That should probably
 	 change.  */
-      mpn_fft_mul (p, a, n, b, n);
+      mpn_mul_fft_full (p, a, n, a, n);
+    }
+#else
+    {
+      /* Toom4 for large operands.  */
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_BALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_FREE;
     }
+#endif
 }
diff --git a/gmp/mpn/generic/mullo_basecase.c b/gmp/mpn/generic/mullo_basecase.c
deleted file mode 100644
index 2120f44c3d..0000000000
--- a/gmp/mpn/generic/mullo_basecase.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* mpn_mullo_basecase -- Internal routine to multiply two natural
-   numbers of length m and n and return the low part.
-
-   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-
-Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  FIXME: Should use mpn_addmul_2 (and higher).
-*/
-
-void
-mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  mp_size_t i;
-
-  mpn_mul_1 (rp, up, n, vp[0]);
-
-  for (i = 1; i < n; i++)
-    mpn_addmul_1 (rp + i, up, n - i, vp[i]);
-}
diff --git a/gmp/mpn/generic/mullo_n.c b/gmp/mpn/generic/mullo_n.c
deleted file mode 100644
index dad75ee8f7..0000000000
--- a/gmp/mpn/generic/mullo_n.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/* mpn_mullo_n -- multiply two n-limb numbers and return the low n limbs
-   of their products.
-
-   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
-   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
-   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#ifndef MULLO_BASECASE_THRESHOLD
-#define MULLO_BASECASE_THRESHOLD 0	/* never use mpn_mul_basecase */
-#endif
-
-#ifndef MULLO_DC_THRESHOLD
-#define MULLO_DC_THRESHOLD 3*MUL_TOOM22_THRESHOLD
-#endif
-
-#ifndef MULLO_MUL_N_THRESHOLD
-#define MULLO_MUL_N_THRESHOLD MUL_FFT_THRESHOLD
-#endif
-
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
-#define MAYBE_range_basecase 1
-#define MAYBE_range_toom22   1
-#else
-#define MAYBE_range_basecase                                           \
-  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
-#define MAYBE_range_toom22                                             \
-  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
-#endif
-
-/*  THINK: The DC strategy uses different constants in different Toom's
-	 ranges. Something smoother?
-*/
-
-/*
-  Compute the least significant half of the product {xy,n}*{yp,n}, or
-  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
-
-  Above the given threshold, the Divide and Conquer strategy is used.
-  The operands are split in two, and a full product plus two mullo
-  are used to obtain the final result. The more natural strategy is to
-  split in two halves, but this is far from optimal when a
-  sub-quadratic multiplication is used.
-
-  Mulders suggests an unbalanced split in favour of the full product,
-  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
-
-  To compute the value of a, we assume that the cost of mullo for a
-  given size ML(n) is a fraction of the cost of a full product with
-  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
-  then we can write:
-
-  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
-
-  Given a value for e, want to minimise the value of k, i.e. the
-  function k=(1-a)^e/(1-2*a^e).
-
-  With e=2, the exponent for schoolbook multiplication, the minimum is
-  given by the values a=1-a=1/2.
-
-  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
-  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
-
-  Other possible approximations follow:
-  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
-  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
-  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
-  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
-
-  The values above where obtained with the following trivial commands
-  in the gp-pari shell:
-
-fun(e,a)=(1-a)^e/(1-2*a^e)
-mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
-contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
-contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
-contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
-contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
-contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
-
-  ,
-  |\
-  | \
-  +----,
-  |    |
-  |    |
-  |    |\
-  |    | \
-  +----+--`
-  ^ n2 ^n1^
-
-  For an actual implementation, the assumption that M(n)=n^e is
-  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
-  with a constant k is wrong.
-
-  But theory suggest us two things:
-  - the best the multiplication product is (lower e), the more k
-    approaches 1, and a approaches 0.
-
-  - A value for a smaller than optimal is probably less bad than a
-    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
-    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
-    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
-*/
-
-static mp_size_t
-mpn_mullo_n_itch (mp_size_t n)
-{
-  return 2*n;
-}
-
-/*
-    mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
-    It accepts tp == rp.
-*/
-static void
-mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
-{
-  mp_size_t n2, n1;
-  ASSERT (n >= 2);
-  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
-
-  /* Divide-and-conquer */
-
-  /* We need fractional approximation of the value 0 < a <= 1/2
-     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
-  */
-  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
-    n1 = n >> 1;
-  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
-    n1 = n * 11 / (size_t) 36;	/* n1 ~= n*(1-.694...) */
-  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
-    n1 = n * 9 / (size_t) 40;	/* n1 ~= n*(1-.775...) */
-  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD*10/9))
-    n1 = n * 7 / (size_t) 39;	/* n1 ~= n*(1-.821...) */
-  /* n1 = n * 4 / (size_t) 31;	// n1 ~= n*(1-.871...) [TOOM66] */
-  else
-    n1 = n / (size_t) 10;		/* n1 ~= n*(1-.899...) [TOOM88] */
-
-  n2 = n - n1;
-
-  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
-	      y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
-
-  /* x0 * y0 */
-  mpn_mul_n (tp, xp, yp, n2);
-  MPN_COPY (rp, tp, n2);
-
-  /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
-  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
-    mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
-  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
-    mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
-  else
-    mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
-  mpn_add_n (rp + n2, tp + n2, tp + n, n1);
-
-  /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
-  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
-    mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
-  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
-    mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
-  else
-    mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
-  mpn_add_n (rp + n2, rp + n2, tp + n, n1);
-}
-
-/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
-#define MUL_BASECASE_ALLOC \
- (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
-
-/* FIXME: This function should accept a temporary area; dc_mullow_n
-   accepts a pointer tp, and handle the case tp == rp, do the same here.
-   Maybe recombine the two functions.
-   THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
-	  (typically thanks to mpn_addmul_2) should we unconditionally use
-	  mpn_mul_n?
-*/
-
-void
-mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
-{
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-
-  if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
-    {
-      /* Allocate workspace of fixed size on stack: fast! */
-      mp_limb_t tp[MUL_BASECASE_ALLOC];
-      mpn_mul_basecase (tp, xp, n, yp, n);
-      MPN_COPY (rp, tp, n);
-    }
-  else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
-    {
-      mpn_mullo_basecase (rp, xp, yp, n);
-    }
-  else
-    {
-      mp_ptr tp;
-      TMP_DECL;
-      TMP_MARK;
-      tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
-      if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
-	{
-	  mpn_dc_mullo_n (rp, xp, yp, n, tp);
-	}
-      else
-	{
-	  /* For really large operands, use plain mpn_mul_n but throw away upper n
-	     limbs of result.  */
-#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
-	  mpn_fft_mul (tp, xp, n, yp, n);
-#else
-	  mpn_mul_n (tp, xp, yp, n);
-#endif
-	  MPN_COPY (rp, tp, n);
-	}
-      TMP_FREE;
-    }
-}
diff --git a/gmp/mpn/generic/mullow_basecase.c b/gmp/mpn/generic/mullow_basecase.c
new file mode 100644
index 0000000000..72c48f65b4
--- /dev/null
+++ b/gmp/mpn/generic/mullow_basecase.c
@@ -0,0 +1,41 @@
+/* mpn_mullow_basecase -- Internal routine to multiply two natural
+   numbers of length m and n and return the low part.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  FIXME: Should use mpn_addmul_2 (and higher).
+*/
+
+void
+mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_size_t i;
+
+  mpn_mul_1 (rp, up, n, vp[0]);
+
+  for (i = 1; i < n; i++)
+    mpn_addmul_1 (rp + i, up, n - i, vp[i]);
+}
diff --git a/gmp/mpn/generic/mullow_n.c b/gmp/mpn/generic/mullow_n.c
new file mode 100644
index 0000000000..e92a554616
--- /dev/null
+++ b/gmp/mpn/generic/mullow_n.c
@@ -0,0 +1,111 @@
+/* mpn_mullow_n -- multiply two n-limb nunbers and return the low n limbs
+   of their products.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifndef MULLOW_BASECASE_THRESHOLD
+#define MULLOW_BASECASE_THRESHOLD 0	/* never use mpn_mul_basecase */
+#endif
+
+#ifndef MULLOW_DC_THRESHOLD
+#define MULLOW_DC_THRESHOLD 3*MUL_KARATSUBA_THRESHOLD
+#endif
+
+#ifndef MULLOW_MUL_N_THRESHOLD
+#define MULLOW_MUL_N_THRESHOLD 10*MULLOW_DC_THRESHOLD
+#endif
+
+/* Avoid zero allocations when MULLOW_BASECASE_THRESHOLD is 0.  */
+#define MUL_BASECASE_ALLOC \
+ (MULLOW_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLOW_BASECASE_THRESHOLD_LIMIT)
+
+/*
+  FIXME: This function should accept a temporary area.
+  FIXME: Perhaps call mpn_kara_mul_n instead of mpn_mul_n?
+  THINK: If mpn_mul_basecase is always faster than mpn_mullow_basecase
+         (typically thanks to mpn_addmul_2) should we unconditionally use
+         mpn_mul_n?
+  FIXME: The recursive calls to mpn_mullow_n use sizes n/2 (one uses floor(n/2)
+         and the other ceil(n/2)).  Depending on the values of the various
+         _THRESHOLDs, this may never trigger MULLOW_BASECASE_THRESHOLD.
+	 Should we worry about this overhead?
+*/
+
+void
+mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+  if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[MUL_BASECASE_ALLOC];
+      mpn_mul_basecase (ws, xp, n, yp, n);
+      MPN_COPY (rp, ws, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))
+    {
+      mpn_mullow_basecase (rp, xp, yp, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD))
+    {
+      /* Divide-and-conquer */
+      mp_size_t n2 = n >> 1;		/* floor(n/2) */
+      mp_size_t n1 = n - n2;		/* ceil(n/2) */
+      mp_ptr tp;
+      TMP_SDECL;
+      TMP_SMARK;
+      tp = TMP_SALLOC_LIMBS (n1);
+
+      /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0,
+                  y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+      /* x0 * y0 */
+      mpn_mul_n (rp, xp, yp, n2);
+      if (n1 != n2)
+	rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]);
+
+      /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */
+      mpn_mullow_n (tp, xp + n1, yp, n2);
+      mpn_add_n (rp + n1, rp + n1, tp, n2);
+
+      /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+      mpn_mullow_n (tp, yp + n2, xp, n1);
+      mpn_add_n (rp + n2, rp + n2, tp, n1);
+      TMP_SFREE;
+    }
+  else
+    {
+      /* For really large operands, use plain mpn_mul_n but throw away upper n
+	 limbs of result.  */
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (2 * n);
+
+      mpn_mul_n (tp, xp, yp, n);
+      MPN_COPY (rp, tp, n);
+      TMP_FREE;
+    }
+}
diff --git a/gmp/mpn/generic/mulmid.c b/gmp/mpn/generic/mulmid.c
deleted file mode 100644
index 6b4ea3253d..0000000000
--- a/gmp/mpn/generic/mulmid.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/* mpn_mulmid -- middle product
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
-
-
-void
-mpn_mulmid (mp_ptr rp,
-            mp_srcptr ap, mp_size_t an,
-            mp_srcptr bp, mp_size_t bn)
-{
-  mp_size_t rn, k;
-  mp_ptr scratch, temp;
-
-  ASSERT (an >= bn);
-  ASSERT (bn >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
-  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
-
-  if (bn < MULMID_TOOM42_THRESHOLD)
-    {
-      /* region not tall enough to make toom42 worthwhile for any portion */
-
-      if (an < CHUNK)
-	{
-	  /* region not too wide either, just call basecase directly */
-	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
-	  return;
-	}
-
-      /* Region quite wide. For better locality, use basecase on chunks:
-
-	 AAABBBCC..
-	 .AAABBBCC.
-	 ..AAABBBCC
-      */
-
-      k = CHUNK - bn + 1;    /* number of diagonals per chunk */
-
-      /* first chunk (marked A in the above diagram) */
-      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
-
-      /* remaining chunks (B, C, etc) */
-      an -= k;
-
-      while (an >= CHUNK)
-	{
-	  mp_limb_t t0, t1, cy;
-	  ap += k, rp += k;
-	  t0 = rp[0], t1 = rp[1];
-	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
-	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
-	  an -= k;
-	}
-
-      if (an >= bn)
-	{
-	  /* last remaining chunk */
-	  mp_limb_t t0, t1, cy;
-	  ap += k, rp += k;
-	  t0 = rp[0], t1 = rp[1];
-	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);
-	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
-	}
-
-      return;
-    }
-
-  /* region is tall enough for toom42 */
-
-  rn = an - bn + 1;
-
-  if (rn < MULMID_TOOM42_THRESHOLD)
-    {
-      /* region not wide enough to make toom42 worthwhile for any portion */
-
-      TMP_DECL;
-
-      if (bn < CHUNK)
-	{
-	  /* region not too tall either, just call basecase directly */
-	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
-	  return;
-	}
-
-      /* Region quite tall. For better locality, use basecase on chunks:
-
-	 AAAAA....
-	 .AAAAA...
-	 ..BBBBB..
-	 ...BBBBB.
-	 ....CCCCC
-      */
-
-      TMP_MARK;
-
-      temp = TMP_ALLOC_LIMBS (rn + 2);
-
-      /* first chunk (marked A in the above diagram) */
-      bp += bn - CHUNK, an -= bn - CHUNK;
-      mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
-
-      /* remaining chunks (B, C, etc) */
-      bn -= CHUNK;
-
-      while (bn >= CHUNK)
-	{
-	  ap += CHUNK, bp -= CHUNK;
-	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
-	  mpn_add_n (rp, rp, temp, rn + 2);
-	  bn -= CHUNK;
-	}
-
-      if (bn)
-	{
-	  /* last remaining chunk */
-	  ap += CHUNK, bp -= bn;
-	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
-	  mpn_add_n (rp, rp, temp, rn + 2);
-	}
-
-      TMP_FREE;
-      return;
-    }
-
-  /* we're definitely going to use toom42 somewhere */
-
-  if (bn > rn)
-    {
-      /* slice region into chunks, use toom42 on all chunks except possibly
-	 the last:
-
-         AA....
-         .AA...
-         ..BB..
-         ...BB.
-         ....CC
-      */
-
-      TMP_DECL;
-      TMP_MARK;
-
-      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
-      scratch = temp + rn + 2;
-
-      /* first chunk (marked A in the above diagram) */
-      bp += bn - rn;
-      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
-
-      /* remaining chunks (B, C, etc) */
-      bn -= rn;
-
-      while (bn >= rn)
-        {
-          ap += rn, bp -= rn;
-	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
-          mpn_add_n (rp, rp, temp, rn + 2);
-          bn -= rn;
-        }
-
-      if (bn)
-        {
-          /* last remaining chunk */
-          ap += rn, bp -= bn;
-	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
-          mpn_add_n (rp, rp, temp, rn + 2);
-        }
-
-      TMP_FREE;
-    }
-  else
-    {
-      /* slice region into chunks, use toom42 on all chunks except possibly
-	 the last:
-
-         AAABBBCC..
-         .AAABBBCC.
-         ..AAABBBCC
-      */
-
-      TMP_DECL;
-      TMP_MARK;
-
-      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
-
-      /* first chunk (marked A in the above diagram) */
-      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
-
-      /* remaining chunks (B, C, etc) */
-      rn -= bn;
-
-      while (rn >= bn)
-        {
-	  mp_limb_t t0, t1, cy;
-          ap += bn, rp += bn;
-          t0 = rp[0], t1 = rp[1];
-          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
-	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
-	  rn -= bn;
-        }
-
-      TMP_FREE;
-
-      if (rn)
-        {
-          /* last remaining chunk */
-	  mp_limb_t t0, t1, cy;
-          ap += bn, rp += bn;
-          t0 = rp[0], t1 = rp[1];
-          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);
-	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
-        }
-    }
-}
diff --git a/gmp/mpn/generic/mulmid_basecase.c b/gmp/mpn/generic/mulmid_basecase.c
deleted file mode 100644
index 400e976424..0000000000
--- a/gmp/mpn/generic/mulmid_basecase.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* mpn_mulmid_basecase -- classical middle product algorithm
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
-   Must have un >= vn >= 1.
-
-   Neither input buffer may overlap with the output buffer. */
-
-void
-mpn_mulmid_basecase (mp_ptr rp,
-                     mp_srcptr up, mp_size_t un,
-                     mp_srcptr vp, mp_size_t vn)
-{
-  mp_limb_t lo, hi;  /* last two limbs of output */
-  mp_limb_t cy;
-
-  ASSERT (un >= vn);
-  ASSERT (vn >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
-  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
-
-  up += vn - 1;
-  un -= vn - 1;
-
-  /* multiply by first limb, store result */
-  lo = mpn_mul_1 (rp, up, un, vp[0]);
-  hi = 0;
-
-  /* accumulate remaining rows */
-  for (vn--; vn; vn--)
-    {
-      up--, vp++;
-      cy = mpn_addmul_1 (rp, up, un, vp[0]);
-      add_ssaaaa (hi, lo, hi, lo, CNST_LIMB(0), cy);
-    }
-
-  /* store final limbs */
-#if GMP_NAIL_BITS != 0
-  hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
-  lo &= GMP_NUMB_MASK;
-#endif
-
-  rp[un] = lo;
-  rp[un + 1] = hi;
-}
diff --git a/gmp/mpn/generic/mulmid_n.c b/gmp/mpn/generic/mulmid_n.c
deleted file mode 100644
index 2280ba3a36..0000000000
--- a/gmp/mpn/generic/mulmid_n.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* mpn_mulmid_n -- balanced middle product
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-void
-mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
-
-  if (n < MULMID_TOOM42_THRESHOLD)
-    {
-      mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
-    }
-  else
-    {
-      mp_ptr scratch;
-      TMP_DECL;
-      TMP_MARK;
-      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
-      mpn_toom42_mulmid (rp, ap, bp, n, scratch);
-      TMP_FREE;
-    }
-}
diff --git a/gmp/mpn/generic/mulmod_bnm1.c b/gmp/mpn/generic/mulmod_bnm1.c
deleted file mode 100644
index 8710324583..0000000000
--- a/gmp/mpn/generic/mulmod_bnm1.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/* mulmod_bnm1.c -- multiplication mod B^n-1.
-
-   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
-   Marco Bodrato.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
-   mod B^rn - 1, and values are semi-normalised; zero is represented
-   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
-   tp==rp is allowed. */
-void
-mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
-		    mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_mul_n (tp, ap, bp, rn);
-  cy = mpn_add_n (rp, tp, tp + rn, rn);
-  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
-   * be no overflow when adding in the carry. */
-  MPN_INCR_U (rp, rn, cy);
-}
-
-
-/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
-   semi-normalised representation, computation is mod B^rn + 1. Needs
-   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
-   Output is normalised. */
-static void
-mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
-		    mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_mul_n (tp, ap, bp, rn + 1);
-  ASSERT (tp[2*rn+1] == 0);
-  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
-  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
-  rp[rn] = 0;
-  MPN_INCR_U (rp, rn+1, cy );
-}
-
-
-/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
- *
- * The result is expected to be ZERO if and only if one of the operand
- * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
- * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
- * combine results and obtain a natural number when one knows in
- * advance that the final value is less than (B^rn-1).
- * Moreover it should not be a problem if mulmod_bnm1 is used to
- * compute the full product with an+bn <= rn, because this condition
- * implies (B^an-1)(B^bn-1) < (B^rn-1) .
- *
- * Requires 0 < bn <= an <= rn and an + bn > rn/2
- * Scratch need: rn + (need for recursive call OR rn + 4). This gives
- *
- * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
- */
-void
-mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
-{
-  ASSERT (0 < bn);
-  ASSERT (bn <= an);
-  ASSERT (an <= rn);
-
-  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
-    {
-      if (UNLIKELY (bn < rn))
-	{
-	  if (UNLIKELY (an + bn <= rn))
-	    {
-	      mpn_mul (rp, ap, an, bp, bn);
-	    }
-	  else
-	    {
-	      mp_limb_t cy;
-	      mpn_mul (tp, ap, an, bp, bn);
-	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
-	      MPN_INCR_U (rp, rn, cy);
-	    }
-	}
-      else
-	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
-    }
-  else
-    {
-      mp_size_t n;
-      mp_limb_t cy;
-      mp_limb_t hi;
-
-      n = rn >> 1;
-
-      /* We need at least an + bn >= n, to be able to fit one of the
-	 recursive products at rp. Requiring strict inequality makes
-	 the coded slightly simpler. If desired, we could avoid this
-	 restriction by initially halving rn as long as rn is even and
-	 an + bn <= rn/2. */
-
-      ASSERT (an + bn > n);
-
-      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
-	 and crt together as
-
-	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
-      */
-
-#define a0 ap
-#define a1 (ap + n)
-#define b0 bp
-#define b1 (bp + n)
-
-#define xp  tp	/* 2n + 2 */
-      /* am1  maybe in {xp, n} */
-      /* bm1  maybe in {xp + n, n} */
-#define sp1 (tp + 2*n + 2)
-      /* ap1  maybe in {sp1, n + 1} */
-      /* bp1  maybe in {sp1 + n + 1, n + 1} */
-
-      {
-	mp_srcptr am1, bm1;
-	mp_size_t anm, bnm;
-	mp_ptr so;
-
-	bm1 = b0;
-	bnm = bn;
-	if (LIKELY (an > n))
-	  {
-	    am1 = xp;
-	    cy = mpn_add (xp, a0, n, a1, an - n);
-	    MPN_INCR_U (xp, n, cy);
-	    anm = n;
-	    so = xp + n;
-	    if (LIKELY (bn > n))
-	      {
-		bm1 = so;
-		cy = mpn_add (so, b0, n, b1, bn - n);
-		MPN_INCR_U (so, n, cy);
-		bnm = n;
-		so += n;
-	      }
-	  }
-	else
-	  {
-	    so = xp;
-	    am1 = a0;
-	    anm = an;
-	  }
-
-	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
-      }
-
-      {
-	int       k;
-	mp_srcptr ap1, bp1;
-	mp_size_t anp, bnp;
-
-	bp1 = b0;
-	bnp = bn;
-	if (LIKELY (an > n)) {
-	  ap1 = sp1;
-	  cy = mpn_sub (sp1, a0, n, a1, an - n);
-	  sp1[n] = 0;
-	  MPN_INCR_U (sp1, n + 1, cy);
-	  anp = n + ap1[n];
-	  if (LIKELY (bn > n)) {
-	    bp1 = sp1 + n + 1;
-	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
-	    sp1[2*n+1] = 0;
-	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
-	    bnp = n + bp1[n];
-	  }
-	} else {
-	  ap1 = a0;
-	  anp = an;
-	}
-
-	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
-	  k=0;
-	else
-	  {
-	    int mask;
-	    k = mpn_fft_best_k (n, 0);
-	    mask = (1<<k) - 1;
-	    while (n & mask) {k--; mask >>=1;};
-	  }
-	if (k >= FFT_FIRST_K)
-	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
-	else if (UNLIKELY (bp1 == b0))
-	  {
-	    ASSERT (anp + bnp <= 2*n+1);
-	    ASSERT (anp + bnp > n);
-	    ASSERT (anp >= bnp);
-	    mpn_mul (xp, ap1, anp, bp1, bnp);
-	    anp = anp + bnp - n;
-	    ASSERT (anp <= n || xp[2*n]==0);
-	    anp-= anp > n;
-	    cy = mpn_sub (xp, xp, n, xp + n, anp);
-	    xp[n] = 0;
-	    MPN_INCR_U (xp, n+1, cy);
-	  }
-	else
-	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
-      }
-
-      /* Here the CRT recomposition begins.
-
-	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
-	 Division by 2 is a bitwise rotation.
-
-	 Assumes xp normalised mod (B^n+1).
-
-	 The residue class [0] is represented by [B^n-1]; except when
-	 both input are ZERO.
-      */
-
-#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
-#if HAVE_NATIVE_mpn_rsh1add_nc
-      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
-      hi = cy << (GMP_NUMB_BITS - 1);
-      cy = 0;
-      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
-	 overflows, i.e. a further increment will not overflow again. */
-#else /* ! _nc */
-      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
-	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
-#endif
-#if GMP_NAIL_BITS == 0
-      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
-#else
-      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
-      rp[n-1] ^= hi;
-#endif
-#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
-#if HAVE_NATIVE_mpn_add_nc
-      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
-#else /* ! _nc */
-      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
-#endif
-      cy += (rp[0]&1);
-      mpn_rshift(rp, rp, n, 1);
-      ASSERT (cy <= 2);
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* We can have cy != 0 only if hi = 0... */
-      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
-      rp[n-1] |= hi;
-      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
-#endif
-      ASSERT (cy <= 1);
-      /* Next increment can not overflow, read the previous comments about cy. */
-      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
-      MPN_INCR_U(rp, n, cy);
-
-      /* Compute the highest half:
-	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
-       */
-      if (UNLIKELY (an + bn < rn))
-	{
-	  /* Note that in this case, the only way the result can equal
-	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
-	     then the output of both the recursive calls and this CRT
-	     reconstruction is zero, not B^{rn} - 1. Which is good,
-	     since the latter representation doesn't fit in the output
-	     area.*/
-	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
-
-	  /* FIXME: This subtraction of the high parts is not really
-	     necessary, we do it to get the carry out, and for sanity
-	     checking. */
-	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
-				   xp + an + bn - n, rn - (an + bn), cy);
-	  ASSERT (an + bn == rn - 1 ||
-		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
-	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
-	  ASSERT (cy == (xp + an + bn - n)[0]);
-	}
-      else
-	{
-	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
-	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
-	     DECR will affect _at most_ the lowest n limbs. */
-	  MPN_DECR_U (rp, 2*n, cy);
-	}
-#undef a0
-#undef a1
-#undef b0
-#undef b1
-#undef xp
-#undef sp1
-    }
-}
-
-mp_size_t
-mpn_mulmod_bnm1_next_size (mp_size_t n)
-{
-  mp_size_t nh;
-
-  if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
-    return n;
-  if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (2-1)) & (-2);
-  if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (4-1)) & (-4);
-
-  nh = (n + 1) >> 1;
-
-  if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
-    return (n + (8-1)) & (-8);
-
-  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
-}
diff --git a/gmp/mpn/generic/neg.c b/gmp/mpn/generic/neg.c
deleted file mode 100644
index 2d752e912d..0000000000
--- a/gmp/mpn/generic/neg.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* mpn_neg - negate an mpn.
-
-Copyright 2001, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define __GMP_FORCE_mpn_neg 1
-
-#include "gmp.h"
-#include "gmp-impl.h"
diff --git a/gmp/mpn/generic/neg_n.c b/gmp/mpn/generic/neg_n.c
new file mode 100644
index 0000000000..1609204c90
--- /dev/null
+++ b/gmp/mpn/generic/neg_n.c
@@ -0,0 +1,23 @@
+/* mpn_neg_n - negate an mpn.
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_neg_n 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/gmp/mpn/generic/nussbaumer_mul.c b/gmp/mpn/generic/nussbaumer_mul.c
deleted file mode 100644
index d2bf19ad56..0000000000
--- a/gmp/mpn/generic/nussbaumer_mul.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* mpn_nussbaumer_mul -- Multiply {ap,an} and {bp,bn} using
-   Nussbaumer's negacyclic convolution.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Multiply {ap,an} by {bp,bn}, and put the result in {pp, an+bn} */
-void
-mpn_nussbaumer_mul (mp_ptr pp,
-		    mp_srcptr ap, mp_size_t an,
-		    mp_srcptr bp, mp_size_t bn)
-{
-  mp_size_t rn;
-  mp_ptr tp;
-  TMP_DECL;
-
-  ASSERT (an >= bn);
-  ASSERT (bn > 0);
-
-  TMP_MARK;
-
-  if ((ap == bp) && (an == bn))
-    {
-      rn = mpn_sqrmod_bnm1_next_size (2*an);
-      tp = TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (rn, an));
-      mpn_sqrmod_bnm1 (pp, rn, ap, an, tp);
-    }
-  else
-    {
-      rn = mpn_mulmod_bnm1_next_size (an + bn);
-      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (rn, an, bn));
-      mpn_mulmod_bnm1 (pp, rn, ap, an, bp, bn, tp);
-    }
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/perfpow.c b/gmp/mpn/generic/perfpow.c
deleted file mode 100644
index bbed6309d5..0000000000
--- a/gmp/mpn/generic/perfpow.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/* mpn_perfect_power_p -- mpn perfect power detection.
-
-   Contributed to the GNU project by Martin Boij.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#define SMALL 20
-#define MEDIUM 100
-
-/* Return non-zero if {np,nn} == {xp,xn} ^ k.
-   Algorithm:
-       For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
-       {xp,xn}^k. Stop if they don't match the s least significant limbs of
-       {np,nn}.
-
-   FIXME: Low xn limbs can be expected to always match, if computed as a mod
-   B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
-   most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
-   compare to {np, nn}. Or use an even cruder approximation based on fix-point
-   base 2 logarithm.  */
-static int
-pow_equals (mp_srcptr np, mp_size_t n,
-	    mp_srcptr xp,mp_size_t xn,
-	    mp_limb_t k, mp_bitcnt_t f,
-	    mp_ptr tp)
-{
-  mp_limb_t *tp2;
-  mp_bitcnt_t y, z;
-  mp_size_t i, bn;
-  int ans;
-  mp_limb_t h, l;
-  TMP_DECL;
-
-  ASSERT (n > 1 || (n == 1 && np[0] > 1));
-  ASSERT (np[n - 1] > 0);
-  ASSERT (xn > 0);
-
-  if (xn == 1 && xp[0] == 1)
-    return 0;
-
-  z = 1 + (n >> 1);
-  for (bn = 1; bn < z; bn <<= 1)
-    {
-      mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
-      if (mpn_cmp (tp, np, bn) != 0)
-	return 0;
-    }
-
-  TMP_MARK;
-
-  /* Final check. Estimate the size of {xp,xn}^k before computing the power
-     with full precision.  Optimization: It might pay off to make a more
-     accurate estimation of the logarithm of {xp,xn}, rather than using the
-     index of the MSB.  */
-
-  MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
-  y -= 1;  /* msb_index (xp, xn) */
-
-  umul_ppmm (h, l, k, y);
-  h -= l == 0;  l--;	/* two-limb decrement */
-
-  z = f - 1; /* msb_index (np, n) */
-  if (h == 0 && l <= z)
-    {
-      mp_limb_t size;
-      size = l + k;
-      ASSERT_ALWAYS (size >= k);
-
-      y = 2 + size / GMP_LIMB_BITS;
-      tp2 = TMP_ALLOC_LIMBS (y);
-
-      i = mpn_pow_1 (tp, xp, xn, k, tp2);
-      if (i == n && mpn_cmp (tp, np, n) == 0)
-	ans = 1;
-      else
-	ans = 0;
-    }
-  else
-    {
-      ans = 0;
-    }
-
-  TMP_FREE;
-  return ans;
-}
-
-
-/* Return non-zero if N = {np,n} is a kth power.
-   I = {ip,n} = N^(-1) mod B^n.  */
-static int
-is_kth_power (mp_ptr rp, mp_srcptr np,
-	      mp_limb_t k, mp_srcptr ip,
-	      mp_size_t n, mp_bitcnt_t f,
-	      mp_ptr tp)
-{
-  mp_bitcnt_t b;
-  mp_size_t rn, xn;
-
-  ASSERT (n > 0);
-  ASSERT ((k & 1) != 0 || k == 2);
-  ASSERT ((np[0] & 1) != 0);
-
-  if (k == 2)
-    {
-      b = (f + 1) >> 1;
-      rn = 1 + b / GMP_LIMB_BITS;
-      if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
-	{
-	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-	  xn = rn;
-	  MPN_NORMALIZE (rp, xn);
-	  if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
-	    return 1;
-
-	  /* Check if (2^b - r)^2 == n */
-	  mpn_neg (rp, rp, rn);
-	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-	  MPN_NORMALIZE (rp, rn);
-	  if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
-	    return 1;
-	}
-    }
-  else
-    {
-      b = 1 + (f - 1) / k;
-      rn = 1 + (b - 1) / GMP_LIMB_BITS;
-      mpn_brootinv (rp, ip, rn, k, tp);
-      if ((b % GMP_LIMB_BITS) != 0)
-	rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-      MPN_NORMALIZE (rp, rn);
-      if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
-	return 1;
-    }
-  MPN_ZERO (rp, rn); /* Untrash rp */
-  return 0;
-}
-
-static int
-perfpow (mp_srcptr np, mp_size_t n,
-	 mp_limb_t ub, mp_limb_t g,
-	 mp_bitcnt_t f, int neg)
-{
-  mp_ptr ip, tp, rp;
-  mp_limb_t k;
-  int ans;
-  mp_bitcnt_t b;
-  gmp_primesieve_t ps;
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT ((np[0] & 1) != 0);
-  ASSERT (ub > 0);
-
-  TMP_MARK;
-  gmp_init_primesieve (&ps);
-  b = (f + 3) >> 1;
-
-  ip = TMP_ALLOC_LIMBS (n);
-  rp = TMP_ALLOC_LIMBS (n);
-  tp = TMP_ALLOC_LIMBS (5 * n);		/* FIXME */
-  MPN_ZERO (rp, n);
-
-  /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
-     roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
-     similarly for nth roots. It should be more efficient to compute n^{1/2} as
-     n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
-     similar for kth roots if we switch to an iteration converging to n^{1/k -
-     1}, and we can then eliminate this binvert call. */
-  mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
-  if (b % GMP_LIMB_BITS)
-    ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-
-  if (neg)
-    gmp_nextprime (&ps);
-
-  ans = 0;
-  if (g > 0)
-    {
-      ub = MIN (ub, g + 1);
-      while ((k = gmp_nextprime (&ps)) < ub)
-	{
-	  if ((g % k) == 0)
-	    {
-	      if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
-		{
-		  ans = 1;
-		  goto ret;
-		}
-	    }
-	}
-    }
-  else
-    {
-      while ((k = gmp_nextprime (&ps)) < ub)
-	{
-	  if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
-	    {
-	      ans = 1;
-	      goto ret;
-	    }
-	}
-    }
- ret:
-  TMP_FREE;
-  return ans;
-}
-
-static const unsigned short nrtrial[] = { 100, 500, 1000 };
-
-/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
-   number.  */
-static const double logs[] =
-  { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
-
-int
-mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
-{
-  mp_size_t ncn, s, pn, xn;
-  mp_limb_t *nc, factor, g;
-  mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
-  mp_bitcnt_t twos, count;
-  int ans, where, neg, trial;
-  TMP_DECL;
-
-  nc = (mp_ptr) np;
-
-  neg = 0;
-  if (n < 0)
-    {
-      neg = 1;
-      n = -n;
-    }
-
-  if (n == 0 || (n == 1 && np[0] == 1))
-    return 1;
-
-  TMP_MARK;
-
-  g = 0;
-
-  ncn = n;
-  twos = mpn_scan1 (np, 0);
-  if (twos > 0)
-    {
-      if (twos == 1)
-	{
-	  ans = 0;
-	  goto ret;
-	}
-      s = twos / GMP_LIMB_BITS;
-      if (s + 1 == n && POW2_P (np[s]))
-	{
-	  ans = ! (neg && POW2_P (twos));
-	  goto ret;
-	}
-      count = twos % GMP_LIMB_BITS;
-      ncn = n - s;
-      nc = TMP_ALLOC_LIMBS (ncn);
-      if (count > 0)
-	{
-	  mpn_rshift (nc, np + s, ncn, count);
-	  ncn -= (nc[ncn - 1] == 0);
-	}
-      else
-	{
-	  MPN_COPY (nc, np + s, ncn);
-	}
-      g = twos;
-    }
-
-  if (ncn <= SMALL)
-    trial = 0;
-  else if (ncn <= MEDIUM)
-    trial = 1;
-  else
-    trial = 2;
-
-  where = 0;
-  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
-
-  if (factor != 0)
-    {
-      if (twos == 0)
-	{
-	  nc = TMP_ALLOC_LIMBS (ncn);
-	  MPN_COPY (nc, np, ncn);
-	}
-
-      /* Remove factors found by trialdiv.  Optimization: Perhaps better to use
-	 the strategy in mpz_remove ().  */
-      prev = TMP_ALLOC_LIMBS (ncn + 2);
-      next = TMP_ALLOC_LIMBS (ncn + 2);
-      tp = TMP_ALLOC_LIMBS (4 * ncn);
-
-      do
-	{
-	  binvert_limb (d, factor);
-	  prev[0] = d;
-	  pn = 1;
-	  exp = 1;
-	  while (2 * pn - 1 <= ncn)
-	    {
-	      mpn_sqr (next, prev, pn);
-	      xn = 2 * pn;
-	      xn -= (next[xn - 1] == 0);
-
-	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
-		break;
-
-	      exp <<= 1;
-	      pn = xn;
-	      MP_PTR_SWAP (next, prev);
-	    }
-
-	  /* Binary search for the exponent */
-	  l = exp + 1;
-	  r = 2 * exp - 1;
-	  while (l <= r)
-	    {
-	      c = (l + r) >> 1;
-	      if (c - exp > 1)
-		{
-		  xn = mpn_pow_1 (tp, &d, 1, c - exp, next);
-		  if (pn + xn - 1 > ncn)
-		    {
-		      r = c - 1;
-		      continue;
-		    }
-		  mpn_mul (next, prev, pn, tp, xn);
-		  xn += pn;
-		  xn -= (next[xn - 1] == 0);
-		}
-	      else
-		{
-		  cry = mpn_mul_1 (next, prev, pn, d);
-		  next[pn] = cry;
-		  xn = pn + (cry != 0);
-		}
-
-	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
-		{
-		  r = c - 1;
-		}
-	      else
-		{
-		  exp = c;
-		  l = c + 1;
-		  MP_PTR_SWAP (next, prev);
-		  pn = xn;
-		}
-	    }
-
-	  if (g == 0)
-	    g = exp;
-	  else
-	    g = mpn_gcd_1 (&g, 1, exp);
-
-	  if (g == 1)
-	    {
-	      ans = 0;
-	      goto ret;
-	    }
-
-	  mpn_divexact (next, nc, ncn, prev, pn);
-	  ncn = ncn - pn;
-	  ncn += next[ncn] != 0;
-	  MPN_COPY (nc, next, ncn);
-
-	  if (ncn == 1 && nc[0] == 1)
-	    {
-	      ans = ! (neg && POW2_P (g));
-	      goto ret;
-	    }
-
-	  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
-	}
-      while (factor != 0);
-    }
-
-  MPN_SIZEINBASE_2EXP(count, nc, ncn, 1);   /* log (nc) + 1 */
-  d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
-  ans = perfpow (nc, ncn, d, g, count, neg);
-
- ret:
-  TMP_FREE;
-  return ans;
-}
diff --git a/gmp/mpn/generic/perfsqr.c b/gmp/mpn/generic/perfsqr.c
index bdd82ccd96..1995a944df 100644
--- a/gmp/mpn/generic/perfsqr.c
+++ b/gmp/mpn/generic/perfsqr.c
@@ -1,34 +1,23 @@
 /* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
    zero otherwise.
 
-Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h> /* for NULL */
 #include "gmp.h"
@@ -113,20 +102,20 @@ see https://www.gnu.org/licenses/.  */
 /* FIXME: The %= here isn't good, and might destroy any savings from keeping
    the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
    Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
-   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
-   normal case, so lets not worry too much about mod_1.  */
-#define PERFSQR_MOD_PP(r, up, usize)					\
-  do {									\
-    if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))	\
-      {									\
-	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,		\
-				PERFSQR_PP_INVERTED);			\
-	(r) %= PERFSQR_PP;						\
-      }									\
-    else								\
-      {									\
-	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);			\
-      }									\
+   and a shift count, like mpn_preinv_divrem_1.	 But mod_34lsub1 is our
+   normal case, so lets not worry too much about mod_1.	 */
+#define PERFSQR_MOD_PP(r, up, usize)				\
+  do {								\
+    if (USE_PREINV_MOD_1)					\
+      {								\
+	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,	\
+				PERFSQR_PP_INVERTED);		\
+	(r) %= PERFSQR_PP;					\
+      }								\
+    else							\
+      {								\
+	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);		\
+      }								\
   } while (0)
 
 #define PERFSQR_MOD_IDX(idx, r, d, inv)				\
@@ -156,7 +145,7 @@ see https://www.gnu.org/licenses/.  */
   } while (0)
 
 /* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
-   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
+   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.	*/
 #define PERFSQR_MOD_2(r, d, inv, mhi, mlo)			\
   do {								\
     mp_limb_t  m;						\
@@ -196,7 +185,7 @@ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
   /* Check that we have even multiplicity of 2, and then check that the rest is
      a possible perfect square.  Leave disabled until we can determine this
      really is an improvement.  It it is, it could completely replace the
-     simple probe above, since this should throw out more non-squares, but at
+     simple probe above, since this should through out more non-squares, but at
      the expense of somewhat more cycles.  */
   {
     mp_limb_t lo;
@@ -229,7 +218,7 @@ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
     TMP_DECL;
 
     TMP_MARK;
-    root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
+    root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB);
 
     /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
     res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
diff --git a/gmp/mpn/generic/popham.c b/gmp/mpn/generic/popham.c
index 13e529b7cd..be7c525036 100644
--- a/gmp/mpn/generic/popham.c
+++ b/gmp/mpn/generic/popham.c
@@ -1,33 +1,21 @@
 /* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
 
-Copyright 1994, 1996, 2000-2002, 2005, 2011, 2012 Free Software Foundation,
-Inc.
+Copyright 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -42,14 +30,14 @@ see https://www.gnu.org/licenses/.  */
 #define POPHAM(u,v) u ^ v
 #endif
 
-mp_bitcnt_t
+unsigned long
 FNAME (mp_srcptr up,
 #if OPERATION_hamdist
        mp_srcptr vp,
 #endif
-       mp_size_t n) __GMP_NOTHROW
+       mp_size_t n)
 {
-  mp_bitcnt_t result = 0;
+  unsigned long result = 0;
   mp_limb_t p0, p1, p2, p3, x, p01, p23;
   mp_size_t i;
 
diff --git a/gmp/mpn/generic/pow_1.c b/gmp/mpn/generic/pow_1.c
index 2333206554..4bc9f434bc 100644
--- a/gmp/mpn/generic/pow_1.c
+++ b/gmp/mpn/generic/pow_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2002, 2014 Free Software Foundation, Inc.
+Copyright 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -45,9 +34,6 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
   mp_size_t rn;
   int par;
 
-  ASSERT (bn >= 1);
-  /* FIXME: Add operand overlap criteria */
-
   if (exp <= 1)
     {
       if (exp == 0)
@@ -68,13 +54,11 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
      so much time that the slowness of this code will be negligible.  */
   par = 0;
   cnt = GMP_LIMB_BITS;
-  x = exp;
-  do
+  for (x = exp; x != 0; x >>= 1)
     {
-      par ^= x;
+      par ^= x & 1;
       cnt--;
-      x >>= 1;
-    } while (x != 0);
+    }
   exp <<= cnt;
 
   if (bn == 1)
@@ -84,7 +68,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
       if ((cnt & 1) != 0)
 	MP_PTR_SWAP (rp, tp);
 
-      mpn_sqr (rp, bp, bn);
+      mpn_sqr_n (rp, bp, bn);
       rn = 2 * bn; rn -= rp[rn - 1] == 0;
 
       for (i = GMP_LIMB_BITS - cnt - 1;;)
@@ -99,7 +83,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
 	  if (--i == 0)
 	    break;
 
-	  mpn_sqr (tp, rp, rn);
+	  mpn_sqr_n (tp, rp, rn);
 	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
 	  MP_PTR_SWAP (rp, tp);
 	}
@@ -109,7 +93,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
       if (((par ^ cnt) & 1) == 0)
 	MP_PTR_SWAP (rp, tp);
 
-      mpn_sqr (rp, bp, bn);
+      mpn_sqr_n (rp, bp, bn);
       rn = 2 * bn; rn -= rp[rn - 1] == 0;
 
       for (i = GMP_LIMB_BITS - cnt - 1;;)
@@ -124,7 +108,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
 	  if (--i == 0)
 	    break;
 
-	  mpn_sqr (tp, rp, rn);
+	  mpn_sqr_n (tp, rp, rn);
 	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
 	  MP_PTR_SWAP (rp, tp);
 	}
diff --git a/gmp/mpn/generic/powlo.c b/gmp/mpn/generic/powlo.c
index adcd96eb51..ca3e1e9448 100644
--- a/gmp/mpn/generic/powlo.c
+++ b/gmp/mpn/generic/powlo.c
@@ -1,32 +1,21 @@
-/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
+/* mpn_powlo -- Compute R = U^E mod R^n, where R is the limb base.
 
-Copyright 2007-2009, 2012 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -38,7 +27,7 @@ see https://www.gnu.org/licenses/.  */
   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
 
 static inline mp_limb_t
-getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+getbits (const mp_limb_t *p, unsigned long bi, int nbits)
 {
   int nbits_in_r;
   mp_limb_t r;
@@ -51,10 +40,10 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
   else
     {
       bi -= nbits;			/* bit index of low bit to extract */
-      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
-      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;		/* bit index in low word */
       r = p[i] >> bi;			/* extract (low) bits */
-      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
       if (nbits_in_r < nbits)		/* did we get enough bits? */
 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
       return r & (((mp_limb_t ) 1 << nbits) - 1);
@@ -62,16 +51,16 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
 }
 
 static inline int
-win_size (mp_bitcnt_t eb)
+win_size (unsigned long eb)
 {
   int k;
-  static mp_bitcnt_t x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
   for (k = 0; eb > x[k]; k++)
     ;
   return k;
 }
 
-/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
+/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod R^n, R is the limb base.
    Requires that ep[en-1] is non-zero.
    Uses scratch space tp[3n-1..0], i.e., 3n words.  */
 void
@@ -80,7 +69,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 	   mp_size_t n, mp_ptr tp)
 {
   int cnt;
-  mp_bitcnt_t ebi;
+  long ebi;
   int windowsize, this_windowsize;
   mp_limb_t expbits;
   mp_limb_t *pp, *this_pp, *last_pp;
@@ -92,11 +81,12 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 
   TMP_MARK;
 
-  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = en * GMP_LIMB_BITS - cnt;
 
   windowsize = win_size (ebi);
 
-  pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)) + n); /* + n is for mullo ign part */
+  pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)) + n); /* + n is for mullow ign part */
 
   this_pp = pp;
 
@@ -105,7 +95,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
   b2p = tp + 2*n;
 
   /* Store b^2 in b2.  */
-  mpn_sqr (tp, bp, n);	/* FIXME: Use "mpn_sqrlo" */
+  mpn_sqr_n (tp, bp, n);	/* FIXME: Use "mpn_sqrlo" */
   MPN_COPY (b2p, tp, n);
 
   /* Precompute odd powers of b and put them in the temporary area at pp.  */
@@ -113,14 +103,13 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
     {
       last_pp = this_pp;
       this_pp += n;
-      mpn_mullo_n (this_pp, last_pp, b2p, n);
+      mpn_mullow_n (this_pp, last_pp, b2p, n);
     }
 
   expbits = getbits (ep, ebi, windowsize);
-  if (ebi < windowsize)
+  ebi -= windowsize;
+  if (ebi < 0)
     ebi = 0;
-  else
-    ebi -= windowsize;
 
   count_trailing_zeros (cnt, expbits);
   ebi += cnt;
@@ -132,7 +121,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
     {
       while (getbit (ep, ebi) == 0)
 	{
-	  mpn_sqr (tp, rp, n);	/* FIXME: Use "mpn_sqrlo" */
+	  mpn_sqr_n (tp, rp, n);	/* FIXME: Use "mpn_sqrlo" */
 	  MPN_COPY (rp, tp, n);
 	  ebi--;
 	  if (ebi == 0)
@@ -143,14 +132,13 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 	 bits <= windowsize, and such that the least significant bit is 1.  */
 
       expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
       this_windowsize = windowsize;
-      if (ebi < windowsize)
+      if (ebi < 0)
 	{
-	  this_windowsize -= windowsize - ebi;
+	  this_windowsize += ebi;
 	  ebi = 0;
 	}
-      else
-	ebi -= windowsize;
 
       count_trailing_zeros (cnt, expbits);
       this_windowsize -= cnt;
@@ -159,13 +147,13 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 
       do
 	{
-	  mpn_sqr (tp, rp, n);
+	  mpn_sqr_n (tp, rp, n);
 	  MPN_COPY (rp, tp, n);
 	  this_windowsize--;
 	}
       while (this_windowsize != 0);
 
-      mpn_mullo_n (tp, rp, pp + n * (expbits >> 1), n);
+      mpn_mullow_n (tp, rp, pp + n * (expbits >> 1), n);
       MPN_COPY (rp, tp, n);
     }
 
diff --git a/gmp/mpn/generic/powm.c b/gmp/mpn/generic/powm.c
index 9968116016..c057ec2156 100644
--- a/gmp/mpn/generic/powm.c
+++ b/gmp/mpn/generic/powm.c
@@ -1,51 +1,37 @@
 /* mpn_powm -- Compute R = U^E mod M.
 
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2007-2012 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /*
-  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+  BASIC ALGORITHM, Compute b^e mod n, where n is odd.
 
-  1. W <- U
+  1. w <- b
 
-  2. T <- (B^n * U) mod M                Convert to REDC form
+  2. While w^2 < n (and there are more bits in e)
+       w <- power left-to-right base-2 without reduction
 
-  3. Compute table U^1, U^3, U^5... of E-dependent size
+  3. t <- (B^n * b) / n                Convert to REDC form
 
-  4. While there are more bits in E
-       W <- power left-to-right base-k
+  4. Compute power table of e-dependent size
+
+  5. While there are more bits in e
+       w <- power left-to-right base-k with reduction
 
 
   TODO:
@@ -54,64 +40,51 @@ see https://www.gnu.org/licenses/.  */
      That will simplify the code using getbits.  (Perhaps make getbits' sibling
      getbit then have similar form, for symmetry.)
 
-   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
-     pp area is allocated locally anyway?
+   * Write an itch function.
 
    * Choose window size without looping.  (Superoptimize or think(tm).)
 
-   * Handle small bases with initial, reduction-free exponentiation.
+   * How do we handle small bases?
+
+   * This is slower than old mpz code, in particular if we base it on redc_1
+     (use: #undef HAVE_NATIVE_mpn_addmul_2).  Why?
+
+   * Make it sub-quadratic.
 
    * Call new division functions, not mpn_tdiv_qr.
 
+   * Is redc obsolete with improved SB division?
+
    * Consider special code for one-limb M.
 
-   * How should we handle the redc1/redc2/redc_n choice?
-     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
-     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
-     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
+   * CRT for N = odd*2^t:
+      Using Newton's method and 2-adic arithmetic:
+        m1_inv_m2 = 1/odd mod 2^t
+      Plain 2-adic (REDC) modexp:
+        r1 = a ^ b mod odd
+      Mullo+sqrlo-based modexp:
+        r2 = a ^ b mod 2^t
+      mullo, mul, add:
+        r = ((r2 - r1) * m1_i_m2 mod 2^t) * odd + r1
+
+   * How should we handle the redc1/redc2/redc2/redc4/redc_subquad choice?
+     - redc1: T(binvert_1limb)  + e * (n)   * (T(mullo1x1) + n*T(addmul_1))
+     - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo2x2) + n*T(addmul_2))
+     - redc3: T(binvert_3limbs) + e * (n/3) * (T(mullo3x3) + n*T(addmul_3))
      This disregards the addmul_N constant term, but we could think of
-     that as part of the respective mullo.
-
-   * When U (the base) is small, we should start the exponentiation with plain
-     operations, then convert that partial result to REDC form.
-
-   * When U is just one limb, should it be handled without the k-ary tricks?
-     We could keep a factor of B^n in W, but use U' = BU as base.  After
-     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
-     mod M.
+     that as part of the respective mulloNxN.
 */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#undef MPN_REDC_1
-#define MPN_REDC_1(rp, up, mp, n, invm)					\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
-    if (cy != 0)							\
-      mpn_sub_n (rp, rp, mp, n);					\
-  } while (0)
-
-#undef MPN_REDC_2
-#define MPN_REDC_2(rp, up, mp, n, mip)					\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
-    if (cy != 0)							\
-      mpn_sub_n (rp, rp, mp, n);					\
-  } while (0)
-
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
 
 #define getbit(p,bi) \
   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
 
 static inline mp_limb_t
-getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+getbits (const mp_limb_t *p, unsigned long bi, int nbits)
 {
   int nbits_in_r;
   mp_limb_t r;
@@ -124,27 +97,49 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
   else
     {
       bi -= nbits;			/* bit index of low bit to extract */
-      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
-      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;		/* bit index in low word */
       r = p[i] >> bi;			/* extract (low) bits */
-      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
       if (nbits_in_r < nbits)		/* did we get enough bits? */
 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
       return r & (((mp_limb_t ) 1 << nbits) - 1);
     }
 }
 
+#undef HAVE_NATIVE_mpn_addmul_2
+
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#define REDC_2_THRESHOLD		MP_SIZE_T_MAX
+#endif
+
+#ifndef REDC_2_THRESHOLD
+#define REDC_2_THRESHOLD		4
+#endif
+
+static void mpn_redc_n () {ASSERT_ALWAYS(0);}
+
 static inline int
-win_size (mp_bitcnt_t eb)
+win_size (unsigned long eb)
 {
   int k;
-  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
-  for (k = 1; eb > x[k]; k++)
+  static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
+  for (k = 0; eb > x[k]; k++)
     ;
   return k;
 }
 
-/* Convert U to REDC form, U_r = B^n * U mod M */
+#define MPN_REDC_X(rp, tp, mp, n, mip)					\
+  do {									\
+    if (redc_x == 1)							\
+      mpn_redc_1 (rp, tp, mp, n, mip[0]);				\
+    else if (redc_x == 2)						\
+      mpn_redc_2 (rp, tp, mp, n, mip);					\
+    else								\
+      mpn_redc_n (rp, tp, mp, n, mip);					\
+  } while (0)
+
+  /* Convert U to REDC form, U_r = B^n * U mod M */
 static void
 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
 {
@@ -164,19 +159,21 @@ redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
    Requires that mp[n-1..0] is odd.
    Requires that ep[en-1..0] is > 1.
-   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+   Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
 void
 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 	  mp_srcptr ep, mp_size_t en,
 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
 {
-  mp_limb_t ip[2], *mip;
+  mp_limb_t mip[2];
   int cnt;
-  mp_bitcnt_t ebi;
+  long ebi;
   int windowsize, this_windowsize;
   mp_limb_t expbits;
-  mp_ptr pp, this_pp;
+  mp_ptr pp, this_pp, last_pp;
+  mp_ptr b2p;
   long i;
+  int redc_x;
   TMP_DECL;
 
   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
@@ -184,7 +181,8 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
   TMP_MARK;
 
-  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = en * GMP_LIMB_BITS - cnt;
 
 #if 0
   if (bn < n)
@@ -193,7 +191,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 	 until the result is greater than the mod argument.  */
       for (;;)
 	{
-	  mpn_sqr (tp, this_pp, tn);
+	  mpn_sqr_n (tp, this_pp, tn);
 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
 	  if (getbit (ep, ebi) != 0)
 	    mpn_mul (..., tp, tn, bp, bn);
@@ -204,75 +202,49 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
   windowsize = win_size (ebi);
 
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+  if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
     {
-      mip = ip;
       binvert_limb (mip[0], mp[0]);
       mip[0] = -mip[0];
+      redc_x = 1;
     }
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+#if defined (HAVE_NATIVE_mpn_addmul_2)
+  else
     {
-      mip = ip;
       mpn_binvert (mip, mp, 2, tp);
       mip[0] = -mip[0]; mip[1] = ~mip[1];
-    }
-#else
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    {
-      mip = ip;
-      binvert_limb (mip[0], mp[0]);
-      mip[0] = -mip[0];
+      redc_x = 2;
     }
 #endif
-  else
-    {
-      mip = TMP_ALLOC_LIMBS (n);
-      mpn_binvert (mip, mp, n, tp);
-    }
+#if 0
+  mpn_binvert (mip, mp, n, tp);
+  redc_x = 0;
+#endif
 
   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
 
   this_pp = pp;
   redcify (this_pp, bp, bn, mp, n);
 
-  /* Store b^2 at rp.  */
-  mpn_sqr (tp, this_pp, n);
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    MPN_REDC_2 (rp, tp, mp, n, mip);
-#else
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-#endif
-  else
-    mpn_redc_n (rp, tp, mp, n, mip);
+  b2p = tp + 2*n;
+
+  /* Store b^2 in b2.  */
+  mpn_sqr_n (tp, this_pp, n);
+  MPN_REDC_X (b2p, tp, mp, n, mip);
 
   /* Precompute odd powers of b and put them in the temporary area at pp.  */
   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
     {
-      mpn_mul_n (tp, this_pp, rp, n);
+      last_pp = this_pp;
       this_pp += n;
-#if WANT_REDC_2
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
-      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	MPN_REDC_2 (this_pp, tp, mp, n, mip);
-#else
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
-#endif
-      else
-	mpn_redc_n (this_pp, tp, mp, n, mip);
+      mpn_mul_n (tp, last_pp, b2p, n);
+      MPN_REDC_X (this_pp, tp, mp, n, mip);
     }
 
   expbits = getbits (ep, ebi, windowsize);
-  if (ebi < windowsize)
+  ebi -= windowsize;
+  if (ebi < 0)
     ebi = 0;
-  else
-    ebi -= windowsize;
 
   count_trailing_zeros (cnt, expbits);
   ebi += cnt;
@@ -280,311 +252,51 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
   MPN_COPY (rp, pp + n * (expbits >> 1), n);
 
-#define INNERLOOP							\
-  while (ebi != 0)							\
-    {									\
-      while (getbit (ep, ebi) == 0)					\
-	{								\
-	  MPN_SQR (tp, rp, n);						\
-	  MPN_REDUCE (rp, tp, mp, n, mip);				\
-	  ebi--;							\
-	  if (ebi == 0)							\
-	    goto done;							\
-	}								\
-									\
-      /* The next bit of the exponent is 1.  Now extract the largest	\
-	 block of bits <= windowsize, and such that the least		\
-	 significant bit is 1.  */					\
-									\
-      expbits = getbits (ep, ebi, windowsize);				\
-      this_windowsize = windowsize;					\
-      if (ebi < windowsize)						\
-	{								\
-	  this_windowsize -= windowsize - ebi;				\
-	  ebi = 0;							\
-	}								\
-      else								\
-        ebi -= windowsize;						\
-									\
-      count_trailing_zeros (cnt, expbits);				\
-      this_windowsize -= cnt;						\
-      ebi += cnt;							\
-      expbits >>= cnt;							\
-									\
-      do								\
-	{								\
-	  MPN_SQR (tp, rp, n);						\
-	  MPN_REDUCE (rp, tp, mp, n, mip);				\
-	  this_windowsize--;						\
-	}								\
-      while (this_windowsize != 0);					\
-									\
-      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
-      MPN_REDUCE (rp, tp, mp, n, mip);					\
-    }
-
-
-#if WANT_REDC_2
-  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
-    {
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	{
-	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
-	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-      else
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-    }
-  else
+  while (ebi != 0)
     {
-      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+      while (getbit (ep, ebi) == 0)
 	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	  INNERLOOP;
-	}
-      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-      else
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
+	  mpn_sqr_n (tp, rp, n);
+	  MPN_REDC_X (rp, tp, mp, n, mip);
+	  ebi--;
+	  if (ebi == 0)
+	    goto done;
 	}
-    }
 
-#else  /* WANT_REDC_2 */
+      /* The next bit of the exponent is 1.  Now extract the largest block of
+	 bits <= windowsize, and such that the least significant bit is 1.  */
 
-  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
-    {
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	{
-	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+      expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
+      this_windowsize = windowsize;
+      if (ebi < 0)
 	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
+	  this_windowsize += ebi;
+	  ebi = 0;
 	}
-      else
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-    }
-  else
-    {
-      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
-	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	  INNERLOOP;
-	}
-      else
+
+      count_trailing_zeros (cnt, expbits);
+      this_windowsize -= cnt;
+      ebi += cnt;
+      expbits >>= cnt;
+
+      do
 	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
+	  mpn_sqr_n (tp, rp, n);
+	  MPN_REDC_X (rp, tp, mp, n, mip);
+	  this_windowsize--;
 	}
+      while (this_windowsize != 0);
+
+      mpn_mul_n (tp, rp, pp + n * (expbits >> 1), n);
+      MPN_REDC_X (rp, tp, mp, n, mip);
     }
-#endif  /* WANT_REDC_2 */
 
  done:
-
   MPN_COPY (tp, rp, n);
   MPN_ZERO (tp + n, n);
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    MPN_REDC_2 (rp, tp, mp, n, mip);
-#else
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-#endif
-  else
-    mpn_redc_n (rp, tp, mp, n, mip);
-
+  MPN_REDC_X (rp, tp, mp, n, mip);
   if (mpn_cmp (rp, mp, n) >= 0)
     mpn_sub_n (rp, rp, mp, n);
-
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/powm_sec.c b/gmp/mpn/generic/powm_sec.c
new file mode 100644
index 0000000000..26d77b5c81
--- /dev/null
+++ b/gmp/mpn/generic/powm_sec.c
@@ -0,0 +1,272 @@
+/* mpn_powm_sec -- Compute R = U^E mod M.  Safe variant, not leaking time info.
+
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute b^e mod n, where n is odd.
+
+  1. w <- b
+
+  2. While w^2 < n (and there are more bits in e)
+       w <- power left-to-right base-2 without reduction
+
+  3. t <- (B^n * b) / n                Convert to REDC form
+
+  4. Compute power table of e-dependent size
+
+  5. While there are more bits in e
+       w <- power left-to-right base-k with reduction
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Write an itch function.
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * Make it sub-quadratic.
+
+   * Call new division functions, not mpn_tdiv_qr.
+
+   * Is redc obsolete with improved SB division?
+
+   * Consider special code for one-limb M.
+
+   * Handle even M (in mpz_powm_sec) with two modexps and CRT.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define WANT_CACHE_SECURITY 1
+
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, unsigned long bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+#undef HAVE_NATIVE_mpn_addmul_2
+
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#define REDC_2_THRESHOLD		MP_SIZE_T_MAX
+#endif
+
+#ifndef REDC_2_THRESHOLD
+#define REDC_2_THRESHOLD		4
+#endif
+
+static void mpn_redc_n () {ASSERT_ALWAYS(0);}
+
+static inline int
+win_size (unsigned long eb)
+{
+  int k;
+  static unsigned long x[] = {1,4,27,100,325,1026,2905,7848,20457,51670,~0ul};
+  for (k = 0; eb > x[k]; k++)
+    ;
+  return k;
+}
+
+#define MPN_REDC_X(rp, tp, mp, n, mip)					\
+  do {									\
+    if (redc_x == 1)							\
+      mpn_redc_1 (rp, tp, mp, n, mip[0]);				\
+    else if (redc_x == 2)						\
+      mpn_redc_2 (rp, tp, mp, n, mip);					\
+    else								\
+      mpn_redc_n (rp, tp, mp, n, mip);					\
+  } while (0)
+
+  /* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
+{
+  mp_ptr tp, qp;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (un + n);
+  qp = TMP_ALLOC_LIMBS (un + 1);	/* FIXME: Put at tp+? */
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+  TMP_FREE;
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
+void
+mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+	      mp_srcptr ep, mp_size_t en,
+	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t mip[2];
+  int cnt;
+  long ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp, last_pp;
+  long i;
+  int redc_x;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+  TMP_MARK;
+
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = en * GMP_LIMB_BITS - cnt;
+
+  windowsize = win_size (ebi);
+
+  if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
+    {
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+      redc_x = 1;
+    }
+#if defined (HAVE_NATIVE_mpn_addmul_2)
+  else
+    {
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+      redc_x = 2;
+    }
+#endif
+#if 0
+  mpn_binvert (mip, mp, n, tp);
+  redc_x = 0;
+#endif
+
+  pp = TMP_ALLOC_LIMBS (n << windowsize);
+
+  this_pp = pp;
+  this_pp[n] = 1;
+  redcify (this_pp, this_pp + n, 1, mp, n);
+  this_pp += n;
+  redcify (this_pp, bp, bn, mp, n);
+
+  /* Precompute powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << windowsize) - 2; i > 0; i--)
+    {
+      last_pp = this_pp;
+      this_pp += n;
+      mpn_mul_n (tp, last_pp, pp + n, n);
+      MPN_REDC_X (this_pp, tp, mp, n, mip);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  ebi -= windowsize;
+  if (ebi < 0)
+    ebi = 0;
+
+  MPN_COPY (rp, pp + n * expbits, n);
+
+  while (ebi != 0)
+    {
+      expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
+      this_windowsize = windowsize;
+      if (ebi < 0)
+	{
+	  this_windowsize += ebi;
+	  ebi = 0;
+	}
+
+      do
+	{
+	  mpn_sqr_n (tp, rp, n);
+	  MPN_REDC_X (rp, tp, mp, n, mip);
+	  this_windowsize--;
+	}
+      while (this_windowsize != 0);
+
+#if WANT_CACHE_SECURITY
+      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
+      mpn_mul_n (tp, rp, tp + 2*n, n);
+#else
+      mpn_mul_n (tp, rp, pp + n * expbits, n);
+#endif
+      MPN_REDC_X (rp, tp, mp, n, mip);
+    }
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+  MPN_REDC_X (rp, tp, mp, n, mip);
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+  TMP_FREE;
+}
+
+#if ! HAVE_NATIVE_mpn_tabselect
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+   limbs.  Store the selected entry at rp.  Reads entire table to avoid
+   sideband information leaks.  O(n*nents).  */
+
+void
+mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
+	       mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask;
+  volatile mp_limb_t *tp;
+
+  for (k = 0; k < nents; k++)
+    {
+      mask = -(mp_limb_t) (which == k);
+      tp = tab + n * k;
+      for (i = 0; i < n; i++)
+	{
+	  rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
+	}
+    }
+}
+#endif
diff --git a/gmp/mpn/generic/pre_divrem_1.c b/gmp/mpn/generic/pre_divrem_1.c
index 8027f0216e..6badf63192 100644
--- a/gmp/mpn/generic/pre_divrem_1.c
+++ b/gmp/mpn/generic/pre_divrem_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2003 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -43,8 +32,8 @@ see https://www.gnu.org/licenses/.  */
 /* Same test here for skipping one divide step as in mpn_divrem_1.
 
    The main reason for a separate shift==0 case is that not all CPUs give
-   zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
-   code used on shift==0.  shift==0 is also reasonably common in mp_bases
+   zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the general case
+   code used on shift==0.  shift==0 is also reasonably common in __mp_bases
    big_base, for instance base==10 on a 64-bit limb.
 
    Under shift!=0 it would be possible to call mpn_lshift to adjust the
@@ -117,14 +106,14 @@ mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
 	}
 
       n1 = ap[size-1];
-      r |= n1 >> (GMP_LIMB_BITS - shift);
+      r |= n1 >> (BITS_PER_MP_LIMB - shift);
 
       for (i = size-2; i >= 0; i--)
 	{
 	  ASSERT (r < d);
 	  n0 = ap[i];
 	  udiv_qrnnd_preinv (*qp, r, r,
-			     ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
+			     ((n1 << shift) | (n0 >> (BITS_PER_MP_LIMB - shift))),
 			     d, dinv);
 	  qp--;
 	  n1 = n0;
diff --git a/gmp/mpn/generic/pre_mod_1.c b/gmp/mpn/generic/pre_mod_1.c
index cb38f4a48f..961733ba34 100644
--- a/gmp/mpn/generic/pre_mod_1.c
+++ b/gmp/mpn/generic/pre_mod_1.c
@@ -2,34 +2,23 @@
    DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.
    Return the single-limb remainder.
 
-Copyright 1991, 1993, 1994, 2000-2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2004, 2005 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -45,6 +34,7 @@ mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
 {
   mp_size_t i;
   mp_limb_t n0, r;
+  mp_limb_t dummy;
 
   ASSERT (un >= 1);
   ASSERT (d & GMP_LIMB_HIGHBIT);
@@ -56,7 +46,7 @@ mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
   for (i = un - 2; i >= 0; i--)
     {
       n0 = up[i];
-      udiv_rnnd_preinv (r, r, n0, d, dinv);
+      udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);
     }
   return r;
 }
diff --git a/gmp/mpn/generic/random.c b/gmp/mpn/generic/random.c
index 5489becf4d..c0b85ea075 100644
--- a/gmp/mpn/generic/random.c
+++ b/gmp/mpn/generic/random.c
@@ -5,28 +5,17 @@ Copyright 2001, 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/random2.c b/gmp/mpn/generic/random2.c
index 980b15367f..e29238c514 100644
--- a/gmp/mpn/generic/random2.c
+++ b/gmp/mpn/generic/random2.c
@@ -1,38 +1,28 @@
 /* mpn_random2 -- Generate random numbers with relatively long strings
    of ones and zeroes.  Suitable for border testing.
 
-Copyright 1992-1994, 1996, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
+static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, unsigned long int));
 
 /* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
    Thus, we get the same random number sequence in the common cases.
@@ -64,15 +54,15 @@ mpn_random2 (mp_ptr rp, mp_size_t n)
 }
 
 static void
-gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)
 {
-  mp_bitcnt_t bi;
+  unsigned long int bi;
   mp_limb_t ranm;		/* buffer for random bits */
   unsigned cap_chunksize, chunksize;
   mp_size_t i;
 
   /* Set entire result to 111..1  */
-  i = BITS_TO_LIMBS (nbits) - 1;
+  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;
   rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
   for (i = i - 1; i >= 0; i--)
     rp[i] = GMP_NUMB_MAX;
diff --git a/gmp/mpn/generic/redc_1.c b/gmp/mpn/generic/redc_1.c
index 0d33421f63..47bee8220b 100644
--- a/gmp/mpn/generic/redc_1.c
+++ b/gmp/mpn/generic/redc_1.c
@@ -1,57 +1,43 @@
-/* mpn_redc_1.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_1.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright (C) 2000-2002, 2004, 2008, 2009, 2012 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-mp_limb_t
+void
 mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
 {
   mp_size_t j;
   mp_limb_t cy;
 
-  ASSERT (n > 0);
   ASSERT_MPN (up, 2*n);
 
   for (j = n - 1; j >= 0; j--)
     {
-      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
-      ASSERT (up[0] == 0);
-      up[0] = cy;
+      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
       up++;
     }
-
   cy = mpn_add_n (rp, up, up - n, n);
-  return cy;
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
 }
diff --git a/gmp/mpn/generic/redc_2.c b/gmp/mpn/generic/redc_2.c
index 07d90fa20d..0efbd9d4c7 100644
--- a/gmp/mpn/generic/redc_2.c
+++ b/gmp/mpn/generic/redc_2.c
@@ -1,36 +1,25 @@
-/* mpn_redc_2.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_2.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright (C) 2000-2002, 2004, 2008, 2012 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -44,8 +33,7 @@ you lose
 /* For testing purposes, define our own mpn_addmul_2 if there is none already
    available.  */
 #ifndef HAVE_NATIVE_mpn_addmul_2
-#undef mpn_addmul_2
-static mp_limb_t
+mp_limb_t
 mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
 {
   rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
@@ -53,7 +41,7 @@ mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
 }
 #endif
 
-#if defined (__GNUC__) && defined (__ia64) && W_TYPE_SIZE == 64
+#if defined (__ia64) && W_TYPE_SIZE == 64
 #define umul2low(ph, pl, uh, ul, vh, vl) \
   do {									\
     mp_limb_t _ph, _pl;							\
@@ -78,7 +66,7 @@ mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
   } while (0)
 #endif
 
-mp_limb_t
+void
 mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
 {
   mp_limb_t q[2];
@@ -86,7 +74,6 @@ mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
   mp_limb_t upn;
   mp_limb_t cy;
 
-  ASSERT (n > 0);
   ASSERT_MPN (up, 2*n);
 
   if ((n & 1) != 0)
@@ -104,7 +91,7 @@ mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
       up[n] = upn;
       up += 2;
     }
-
   cy = mpn_add_n (rp, up, up - n, n);
-  return cy;
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
 }
diff --git a/gmp/mpn/generic/redc_n.c b/gmp/mpn/generic/redc_n.c
deleted file mode 100644
index c3d0cfe7fa..0000000000
--- a/gmp/mpn/generic/redc_n.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
-   mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
-
-   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  TODO
-
-  * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a
-    future mpn_mulhi) for the range we will be called.  Follow up that
-    assumption.
-
-  * Decrease scratch usage.
-
-  * Consider removing the residue canonicalisation.
-*/
-
-void
-mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
-{
-  mp_ptr xp, yp, scratch;
-  mp_limb_t cy;
-  mp_size_t rn;
-  TMP_DECL;
-  TMP_MARK;
-
-  ASSERT (n > 8);
-
-  rn = mpn_mulmod_bnm1_next_size (n);
-
-  scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
-
-  xp = scratch;
-  mpn_mullo_n (xp, up, ip, n);
-
-  yp = scratch + n;
-  mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);
-
-  ASSERT_ALWAYS (2 * n > rn);				/* could handle this */
-
-  cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn);		/* undo wrap around */
-  MPN_DECR_U (yp + 2*n - rn, rn, cy);
-
-  cy = mpn_sub_n (rp, up + n, yp + n, n);
-  if (cy != 0)
-    mpn_add_n (rp, rp, mp, n);
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/remove.c b/gmp/mpn/generic/remove.c
deleted file mode 100644
index ef1a06ea14..0000000000
--- a/gmp/mpn/generic/remove.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* mpn_remove -- divide out all multiples of odd mpn number from another mpn
-   number.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2009, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if GMP_LIMB_BITS > 50
-#define LOG 50
-#else
-#define LOG GMP_LIMB_BITS
-#endif
-
-
-/* Input: U = {up,un}, V = {vp,vn} must be odd, cap
-   Ouput  W = {wp,*wn} allocation need is exactly *wn
-
-   Set W = U / V^k, where k is the largest integer <= cap such that the
-   division yields an integer.
-
-   FIXME: We currently allow any operand overlap.  This is quite non mpn-ish
-   and might be changed, since it cost significant temporary space.
-   * If we require W to have space for un + 1 limbs, we could save qp or qp2
-     (but we will still need to copy things into wp 50% of the time).
-   * If we allow ourselves to clobber U, we could save the other of qp and qp2,
-     and the initial COPY (but also here we would need un + 1 limbs).
-*/
-
-/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface.  This need
-   indicates a flaw in the current itch mechanism: Which operands not greater
-   than un,un will incur the worst itch?  We need a parallel foo_maxitch set
-   of functions.  */
-static void
-mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
-		  mp_srcptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn)
-{
-  mp_ptr scratch_out;
-  TMP_DECL;
-
-  TMP_MARK;
-  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
-  mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
-
-  TMP_FREE;
-}
-
-mp_bitcnt_t
-mpn_remove (mp_ptr wp, mp_size_t *wn,
-	    mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn,
-	    mp_bitcnt_t cap)
-{
-  mp_ptr    pwpsp[LOG];
-  mp_size_t pwpsn[LOG];
-  mp_size_t npowers;
-  mp_ptr tp, qp, np, pp, qp2;
-  mp_size_t pn, nn, qn, i;
-  mp_bitcnt_t pwr;
-  TMP_DECL;
-
-  ASSERT (un > 0);
-  ASSERT (vn > 0);
-  ASSERT (vp[0] % 2 != 0);	/* 2-adic division wants odd numbers */
-  ASSERT (vn > 1 || vp[0] > 1);	/* else we would loop indefinitely */
-
-  TMP_MARK;
-
-  tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */
-  qp = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
-  qp2 = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
-  pp = vp;
-  pn = vn;
-
-  MPN_COPY (qp, up, un);
-  qn = un;
-
-  npowers = 0;
-  while (qn >= pn)
-    {
-      qp[qn] = 0;
-      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
-      if (!mpn_zero_p (tp, pn))
-	break;			/* could not divide by V^npowers */
-
-      MP_PTR_SWAP (qp, qp2);
-      qn = qn - pn;
-      qn += qp[qn] != 0;
-
-      pwpsp[npowers] = pp;
-      pwpsn[npowers] = pn;
-      npowers++;
-
-      if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
-	break;
-
-      nn = 2 * pn - 1;		/* next power will be at least this large */
-      if (nn > qn)
-	break;			/* next power would be overlarge */
-
-      if (npowers == 1)		/* Alloc once, but only if it's needed */
-	np = TMP_ALLOC_LIMBS (qn + LOG);	/* powers of V */
-      else
-	np += pn;
-
-      mpn_sqr (np, pp, pn);
-      pn = nn + (np[nn] != 0);
-      pp = np;
-    }
-
-  pwr = ((mp_bitcnt_t) 1 << npowers) - 1;
-
-  for (i = npowers - 1; i >= 0; i--)
-    {
-      pn = pwpsn[i];
-      if (qn < pn)
-	continue;
-
-      if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
-	continue;		/* V^i would bring us past cap */
-
-      qp[qn] = 0;
-      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn);
-      if (!mpn_zero_p (tp, pn))
-	continue;		/* could not divide by V^i */
-
-      MP_PTR_SWAP (qp, qp2);
-      qn = qn - pn;
-      qn += qp[qn] != 0;
-
-      pwr += (mp_bitcnt_t) 1 << i;
-    }
-
-  MPN_COPY (wp, qp, qn);
-  *wn = qn;
-
-  TMP_FREE;
-
-  return pwr;
-}
diff --git a/gmp/mpn/generic/rootrem.c b/gmp/mpn/generic/rootrem.c
index 2edc74baa3..657e543ab3 100644
--- a/gmp/mpn/generic/rootrem.c
+++ b/gmp/mpn/generic/rootrem.c
@@ -8,37 +8,29 @@
    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2002, 2005, 2009-2012 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* FIXME:
-     This implementation is not optimal when remp == NULL, since the complexity
-     is M(n), whereas it should be M(n/k) on average.
+   (a) Once there is a native mpn_tdiv_q function in GMP (division without
+       remainder), replace the quick-and-dirty implementation below by it.
+   (b) The implementation below is not optimal when remp == NULL, since the
+       complexity is M(n) where n is the input size, whereas it should be
+       only M(n/k) on average.
 */
 
 #include <stdio.h>		/* for NULL */
@@ -49,6 +41,8 @@ see https://www.gnu.org/licenses/.  */
 
 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
 				       mp_limb_t, int);
+static void mpn_tdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t,
+			mp_srcptr, mp_size_t);
 
 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
   do {									\
@@ -90,15 +84,14 @@ mp_size_t
 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
 	     mp_srcptr up, mp_size_t un, mp_limb_t k)
 {
-  mp_size_t m;
   ASSERT (un > 0);
   ASSERT (up[un - 1] != 0);
   ASSERT (k > 1);
 
-  m = (un - 1) / k;		/* ceil(un/k) - 1 */
-  if (remp == NULL && m > 2)
-    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
-       with one more limb, allowing us to compute the exact integral result. */
+  if ((remp == NULL) && (un / k > 2))
+    /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
+       which will produce an approximate root with one more limb,
+       so that in most cases we can conclude. */
     {
       mp_ptr sp, wp;
       mp_size_t rn, sn, wn;
@@ -106,21 +99,21 @@ mpn_rootrem (mp_ptr rootp, mp_ptr remp,
       TMP_MARK;
       wn = un + k;
       wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
-      sn = m + 2; /* ceil(un/k) + 1 */
+      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
       sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
       MPN_COPY (wp + k, up, un);
       MPN_ZERO (wp, k);
       rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
-      /* The approximate root S = {sp,sn} is either the correct root of
-	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
-	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
-	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
+      /* the approximate root S = {sp,sn} is either the correct root of
+	 {sp,sn}, or one too large. Thus unless the least significant limb
+	 of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
+	 one limb. (In case sp[0]=1, we can deduce the root, but not decide
 	 whether it is exact or not.) */
       MPN_COPY (rootp, sp + 1, sn - 1);
       TMP_FREE;
       return rn;
     }
-  else
+  else /* remp <> NULL */
     {
       return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
     }
@@ -131,11 +124,12 @@ static mp_size_t
 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 		      mp_limb_t k, int approx)
 {
-  mp_ptr qp, rp, sp, wp, scratch;
+  mp_ptr qp, rp, sp, wp;
   mp_size_t qn, rn, sn, wn, nl, bn;
   mp_limb_t save, save2, cy;
   unsigned long int unb; /* number of significant bits of {up,un} */
   unsigned long int xnb; /* number of significant bits of the result */
+  unsigned int cnt;
   unsigned long b, kk;
   unsigned long sizes[GMP_NUMB_BITS + 1];
   int ni, i;
@@ -145,19 +139,25 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 
   TMP_MARK;
 
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+					of R/(k*S^(k-1)), and S^k */
   if (remp == NULL)
-    {
-      rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
-      scratch = rp;			 /* used by mpn_div_q */
-    }
+    rp = TMP_ALLOC_LIMBS (un);     /* will contain the remainder */
   else
-    {
-      scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
-      rp = remp;
-    }
+    rp = remp;
   sp = rootp;
-
-  MPN_SIZEINBASE_2EXP(unb, up, un, 1);
+  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+					and temporary for mpn_pow_1 */
+  count_leading_zeros (cnt, up[un - 1]);
+  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
   /* unb is the number of bits of the input U */
 
   xnb = (unb - 1) / k + 1;	/* ceil (unb / k) */
@@ -216,19 +216,6 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
      Newton iteration will first compute sizes[ni-1] extra bits,
      then sizes[ni-2], ..., then sizes[0] = b. */
 
-  /* qp and wp need enough space to store S'^k where S' is an approximate
-     root. Since S' can be as large as S+2, the worst case is when S=2 and
-     S'=4. But then since we know the number of bits of S in advance, S'
-     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
-     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
-     fits in un limbs, the number of extra limbs needed is bounded by
-     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
-#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
-  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
-					of R/(k*S^(k-1)), and S^k */
-  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
-					and temporary for mpn_pow_1 */
-
   wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
   wn = 1;
   for (i = ni; i != 0; i--)
@@ -304,8 +291,13 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 	}
       else
 	{
+	  mp_ptr tp;
 	  qn = rn - wn; /* expected quotient size */
-	  mpn_div_q (qp, rp, rn, wp, wn, scratch);
+	  /* tp must have space for wn limbs.
+	     The quotient needs rn-wn+1 limbs, thus quotient+remainder
+	     need altogether rn+1 limbs. */
+	  tp = qp + qn + 1;	/* put remainder in Q buffer */
+	  mpn_tdiv_q (qp, tp, 0, rp, rn, wp, wn);
 	  qn += qp[qn] != 0;
 	}
 
@@ -400,7 +392,7 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
       ASSERT_ALWAYS (rn >= qn);
 
       /* R = R - Q = floor(U/2^kk) - S^k */
-      if (i > 1 || approx == 0)
+      if ((i > 1) || (approx == 0))
 	{
 	  mpn_sub (rp, rp, rn, qp, qn);
 	  MPN_NORMALIZE (rp, rn);
@@ -413,3 +405,47 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
   TMP_FREE;
   return rn;
 }
+
+/* return the quotient Q = {np, nn} divided by {dp, dn} only */
+static void
+mpn_tdiv_q (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_srcptr np, mp_size_t nn,
+	    mp_srcptr dp, mp_size_t dn)
+{
+  mp_size_t qn = nn - dn; /* expected quotient size is qn+1 */
+  mp_size_t cut;
+
+  ASSERT_ALWAYS (qxn == 0);
+  if (dn <= qn + 3)
+    {
+      mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (qn + 2);
+      cut = dn - (qn + 3);
+      /* perform a first division with divisor cut to dn-cut=qn+3 limbs
+	 and dividend to nn-(cut-1) limbs, i.e. the quotient will be one
+	 limb more than the final quotient.
+	 The quotient will have qn+2 < dn-cut limbs,
+	 and the remainder dn-cut = qn+3 limbs. */
+      mpn_tdiv_qr (tp, rp, 0, np + cut - 1, nn - cut + 1, dp + cut, dn - cut);
+      /* let Q' be the quotient of B * {np, nn} by {dp, dn} [qn+2 limbs]
+	 and T  be the approximation of Q' computed above, where
+	 B = 2^GMP_NUMB_BITS.
+	 We have Q' <= T <= Q'+1, and since floor(Q'/B) = Q, we have
+	 Q = floor(T/B), unless the last limb of T only consists of zeroes. */
+      if (tp[0] != 0)
+	{
+	  /* simply truncate one limb of T */
+	  MPN_COPY (qp, tp + 1, qn + 1);
+	}
+      else /* too bad: perform the expensive division */
+	{
+	  mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn);
+	}
+      TMP_FREE;
+    }
+}
diff --git a/gmp/mpn/generic/rshift.c b/gmp/mpn/generic/rshift.c
index ec61f2f7e2..62256656de 100644
--- a/gmp/mpn/generic/rshift.c
+++ b/gmp/mpn/generic/rshift.c
@@ -1,32 +1,22 @@
 /* mpn_rshift -- Shift right low level.
 
-Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/sb_bdiv_q.c b/gmp/mpn/generic/sb_bdiv_q.c
new file mode 100644
index 0000000000..474c804d48
--- /dev/null
+++ b/gmp/mpn/generic/sb_bdiv_q.c
@@ -0,0 +1,91 @@
+/* mpn_sb_bdiv_q -- schoolbook Hensel division with precomputed inverse,
+   returning quotient only.
+
+   Contributed to the GNU project by Niels M�ller.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^nn, destroys N.
+   Clobbers N.
+
+   D must be odd. dinv is (-D)^-1 mod B.
+
+
+   The straightforward way to compute Q is to cancel one limb at a time, using
+
+     qp[i] = D^{-1} * np[i] (mod B)
+     N -= B^i * qp[i] * D
+
+   But we prefer addition to subtraction, since mpn_addmul_1 is often faster
+   than mpn_submul_1.  Q = - N / D can be computed by iterating
+
+     qp[i] = (-D)^{-1} * np[i] (mod B)
+     N += B^i * qp[i] * D
+
+   And then we flip the sign, -Q = (not Q) + 1.
+*/
+
+void
+mpn_sb_bdiv_q (mp_ptr qp,
+	       mp_ptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t qh;
+
+  ASSERT (nn > 0);
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT (dp[0] & 1);
+
+  for (i = 0; i < nn - dn; i++)
+    {
+      mp_limb_t cy;
+      mp_limb_t q;
+
+      q = dinv * np[i];
+      qp[i] = ~q;
+      cy = mpn_addmul_1 (np + i, dp, dn, q);
+      mpn_add_1 (np + i + dn, np + i + dn, nn - i - dn, cy);
+      ASSERT (np[i] == 0);
+    }
+
+  for (; i < nn - 1; i++)
+    {
+      mp_limb_t q;
+
+      q = dinv * np[i];
+      qp[i] = ~q;
+      mpn_addmul_1 (np + i, dp, nn - i, q);
+
+      ASSERT (np[i] == 0);
+    }
+
+  /* Final limb */
+  qp[nn - 1] = ~(dinv * np[nn - 1]);
+  qh = mpn_add_1 (qp, qp, nn, 1); /* FIXME: can we get carry? */
+}
diff --git a/gmp/mpn/generic/sbpi1_bdiv_qr.c b/gmp/mpn/generic/sb_bdiv_qr.c
index 0e56f58148..d1cd0dee32 100644
--- a/gmp/mpn/generic/sbpi1_bdiv_qr.c
+++ b/gmp/mpn/generic/sb_bdiv_qr.c
@@ -1,39 +1,27 @@
-/* mpn_sbpi1_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
+/* mpn_sb_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
    returning quotient and remainder.
 
-   Contributed to the GNU project by Niels Möller.
+   Contributed to the GNU project by Niels M�ller.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
-   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
-   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
 
-Copyright 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
+Copyright 2006 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -52,20 +40,19 @@ see https://www.gnu.org/licenses/.  */
    D must be odd. dinv is (-D)^-1 mod B. */
 
 mp_limb_t
-mpn_sbpi1_bdiv_qr (mp_ptr qp,
-		   mp_ptr np, mp_size_t nn,
-		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+mpn_sb_bdiv_qr (mp_ptr qp,
+		mp_ptr np, mp_size_t nn,
+		mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
 {
   mp_size_t qn;
   mp_size_t i;
   mp_limb_t rh;
   mp_limb_t ql;
 
+  ASSERT (nn > 0);
   ASSERT (dn > 0);
   ASSERT (nn > dn);
-  ASSERT ((dp[0] & 1) != 0);
-  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
-     but some over N/Q overlaps will not work.  */
+  ASSERT (dp[0] & 1);
 
   qn = nn - dn;
 
@@ -80,8 +67,9 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
 	  mp_limb_t q;
 
 	  q = dinv * np[i];
-	  np[i] = mpn_addmul_1 (np + i, dp, dn, q);
 	  qp[i] = ~q;
+
+	  np[i] = mpn_addmul_1 (np + i, dp, dn, q);
 	}
       rh += mpn_add (np + dn, np + dn, qn, np, dn);
       ql = mpn_add_1 (qp, qp, dn, ql);
@@ -95,8 +83,9 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
       mp_limb_t q;
 
       q = dinv * np[i];
-      np[i] = mpn_addmul_1 (np + i, dp, dn, q);
       qp[i] = ~q;
+
+      np[i] = mpn_addmul_1 (np + i, dp, dn, q);
     }
 
   rh += mpn_add_n (np + dn, np + dn, np, qn);
diff --git a/gmp/mpn/generic/sb_div_q.c b/gmp/mpn/generic/sb_div_q.c
new file mode 100644
index 0000000000..609c4ae7f2
--- /dev/null
+++ b/gmp/mpn/generic/sb_div_q.c
@@ -0,0 +1,240 @@
+/* mpn_sb_div_q -- schoolbook division with 2-limb sloppy non-greater
+   precomputed inverse, returning an accurate quotient.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  CAVEATS:
+  1. Should it demand normalized operands like now, or normalize on-the-fly?
+  2. Overwrites {np,nn}.
+  3. Uses mpn_submul_1.  It would be nice to somehow make it use mpn_addmul_1
+     instead.  (That would open for mpn_addmul_2 straightforwardly.)
+*/
+
+mp_limb_t
+mpn_sb_div_q (mp_ptr qp,
+	      mp_ptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_srcptr dip)
+{
+  mp_limb_t q, q10, q01a, q00a, q01b, q00b;
+  mp_limb_t cy;
+  mp_size_t i;
+  mp_limb_t qh;
+  mp_limb_t di1, di0;
+  mp_size_t qn;
+
+  mp_size_t dn_orig = dn;
+  mp_srcptr dp_orig = dp;
+  mp_ptr np_orig = np;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += nn;
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+  di1 = dip[1]; di0 = dip[0];
+  for (i = qn; i >= dn; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+    }
+
+  for (i = dn - 1; i > 0; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  if (q == 0)
+	    q = GMP_NUMB_MAX;
+	  else
+	    mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+
+      /* Truncate operands.  */
+      dn--;
+      dp++;
+
+      /* The partial remainder might be equal to the truncated divisor,
+	 thus non-canonical.  When that happens, the rest of the quotient
+	 should be all ones.  */
+      if (UNLIKELY (mpn_cmp (np - dn, dp, dn) == 0))
+	{
+	  while (--i)
+	    *--qp = GMP_NUMB_MAX;
+	  break;
+	}
+    }
+
+  dn = dn_orig;
+  if (UNLIKELY (np[-1] < dn))
+    {
+      mp_limb_t q, x;
+
+      /* The quotient may be too large if the remainder is small.  Recompute
+	 for above ignored operand parts, until the remainder spills.
+
+	 FIXME: The quality of this code isn't the same as the code above.
+	 1. We don't compute things in an optimal order, high-to-low, in order
+	    to terminate as quickly as possible.
+	 2. We mess with pointers and sizes, adding and subtracting and
+	    adjusting to get things right.  It surely could be streamlined.
+	 3. The only termination criteria are that we determine that the
+	    quotient needs to be adjusted, or that we have recomputed
+	    everything.  We should stop when the remainder is so large
+	    that no additional subtracting could make it spill.
+	 4. If nothing else, we should not do two loops of submul_1 over the
+	    data, instead handle both the triangularization and chopping at
+	    once.  */
+
+      x = np[-1];
+
+      if (dn > 2)
+	{
+	  /* Compensate for triangularization.  */
+	  mp_limb_t y;
+
+	  dp = dp_orig;
+	  if (qn + 1 < dn)
+	    {
+	      dp += dn - (qn + 1);
+	      dn = qn + 1;
+	    }
+
+	  y = np[-2];
+
+	  for (i = dn - 3; i >= 0; i--)
+	    {
+	      q = qp[i];
+	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
+
+	      if (y < cy)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      ASSERT_ALWAYS (cy == 0);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	      y -= cy;
+	    }
+	  np[-2] = y;
+	}
+
+      dn = dn_orig;
+      if (qn + 1 < dn)
+	{
+	  /* Compensate for ignored dividend and divisor tails.  */
+
+	  if (qn == 0)
+	    return qh;
+
+	  dp = dp_orig;
+	  np = np_orig;
+
+	  if (qh != 0)
+	    {
+	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	    }
+
+	  for (i = dn - qn - 2; i >= 0; i--)
+	    {
+	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
+	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      ASSERT_ALWAYS (cy == 0);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	    }
+	}
+    }
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/sb_div_qr.c b/gmp/mpn/generic/sb_div_qr.c
new file mode 100644
index 0000000000..40e4442e21
--- /dev/null
+++ b/gmp/mpn/generic/sb_div_qr.c
@@ -0,0 +1,91 @@
+/* mpn_sb_div_qr -- schoolbook division with 2-limb sloppy non-greater
+   precomputed inverse, returning quotient and remainder.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  CAVEATS:
+  1. Should it demand normalized operands like now, or normalize on-the-fly?
+  2. Overwrites {np,nn} instead of writing remainder to a designated area.
+  3. Uses mpn_submul_1.  It would be nice to somehow make it use mpn_addmul_1
+     instead.  (That would open for mpn_addmul_2 straightforwardly.)
+*/
+
+mp_limb_t
+mpn_sb_div_qr (mp_ptr qp,
+	       mp_ptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_srcptr dip)
+{
+  mp_limb_t q, q10, q01a, q00a, q01b, q00b;
+  mp_limb_t cy;
+  mp_size_t i;
+  mp_limb_t qh;
+  mp_limb_t di1, di0;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += nn;
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += nn - dn;
+  di1 = dip[1]; di0 = dip[0];
+  for (i = nn - dn; i > 0; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+    }
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/sb_divappr_q.c b/gmp/mpn/generic/sb_divappr_q.c
new file mode 100644
index 0000000000..42a39be009
--- /dev/null
+++ b/gmp/mpn/generic/sb_divappr_q.c
@@ -0,0 +1,136 @@
+/* mpn_sb_divappr_q -- schoolbook division with 2-limb sloppy non-greater
+   precomputed inverse, returning approximate quotient.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  CAVEATS:
+  1. Should it demand normalized operands like now, or normalize on-the-fly?
+  2. Overwrites {np,nn}.
+  3. Uses mpn_submul_1.  It would be nice to somehow make it use mpn_addmul_1
+     instead.  (That would open for mpn_addmul_2 straightforwardly.)
+*/
+
+mp_limb_t
+mpn_sb_divappr_q (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_srcptr dip)
+{
+  mp_limb_t q, q10, q01a, q00a, q01b, q00b;
+  mp_limb_t cy;
+  mp_size_t i;
+  mp_limb_t qh;
+  mp_limb_t di1, di0;
+  mp_size_t qn;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += nn;
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+  di1 = dip[1]; di0 = dip[0];
+  for (i = qn; i >= dn; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+    }
+
+  for (i = dn - 1; i > 0; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  if (q == 0)
+	    q = GMP_NUMB_MAX;
+	  else
+	    mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+
+      /* Truncate operands.  */
+      dn--;
+      dp++;
+
+      /* The partial remainder might be equal to the truncated divisor,
+	 thus non-canonical.  When that happens, the rest of the quotient
+	 should be all ones.  */
+      if (UNLIKELY (mpn_cmp (np - dn, dp, dn) == 0))
+	{
+	  while (--i)
+	    *--qp = GMP_NUMB_MAX;
+	  break;
+	}
+    }
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/sb_divrem_mn.c b/gmp/mpn/generic/sb_divrem_mn.c
new file mode 100644
index 0000000000..06e2f4ca0d
--- /dev/null
+++ b/gmp/mpn/generic/sb_divrem_mn.c
@@ -0,0 +1,205 @@
+/* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
+   quotient.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIV_SB_PREINV_THRESHOLD
+#define DIV_SB_PREINV_THRESHOLD  0
+#endif
+
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+   the NSIZE-DSIZE least significant quotient limbs at QP
+   and the DSIZE long remainder at NP.
+   Return the most significant limb of the quotient, this is always 0 or 1.
+
+   Preconditions:
+   0. NSIZE >= DSIZE.
+   1. The most significant bit of the divisor must be set.
+   2. QP must either not overlap with the input operands at all, or
+      QP + DSIZE >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= DSIZE.
+   4. DSIZE > 2.  */
+
+
+mp_limb_t
+mpn_sb_divrem_mn (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t most_significant_q_limb = 0;
+  mp_size_t qn = nn - dn;
+  mp_size_t i;
+  mp_limb_t dx, d1, n0;
+  mp_limb_t dxinv;
+  int use_preinv;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += qn;
+  dx = dp[dn - 1];
+  d1 = dp[dn - 2];
+  n0 = np[dn - 1];
+
+  if (n0 >= dx)
+    {
+      if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
+	{
+	  mpn_sub_n (np, np, dp, dn);
+	  most_significant_q_limb = 1;
+	}
+    }
+
+  use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
+  if (use_preinv)
+    invert_limb (dxinv, dx);
+
+  for (i = qn - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      mp_limb_t nx;
+      mp_limb_t cy_limb;
+
+      nx = np[dn - 1];		/* FIXME: could get value from r1 */
+      np--;
+
+      if (nx == dx)
+	{
+	  /* This might over-estimate q, but it's probably not worth
+	     the extra code here to find out.  */
+	  q = GMP_NUMB_MASK;
+
+#if 1
+	  cy_limb = mpn_submul_1 (np, dp, dn, q);
+#else
+	  /* This should be faster on many machines */
+	  cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
+	  cy = mpn_add_n (np, np, dp, dn);
+	  np[dn] += cy;
+#endif
+
+	  if (nx != cy_limb)
+	    {
+	      mpn_add_n (np, np, dp, dn);
+	      q--;
+	    }
+
+	  qp[i] = q;
+	}
+      else
+	{
+	  mp_limb_t rx, r1, r0, p1, p0;
+
+	  /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
+	     when np[dn-1] is used in an asm statement like umul_ppmm in
+	     udiv_qrnnd_preinv.  The symptom is seg faults due to registers
+	     being clobbered.  gcc 2.95 i386 doesn't have the problem. */
+	  {
+	    mp_limb_t  workaround = np[dn - 1];
+	    if (CACHED_ABOVE_THRESHOLD (use_preinv, DIV_SB_PREINV_THRESHOLD))
+	      udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
+	    else
+	      {
+		udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
+			    dx << GMP_NAIL_BITS);
+		r1 >>= GMP_NAIL_BITS;
+	      }
+	  }
+	  umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
+	  p0 >>= GMP_NAIL_BITS;
+
+	  r0 = np[dn - 2];
+	  rx = 0;
+	  if (r1 < p1 || (r1 == p1 && r0 < p0))
+	    {
+	      p1 -= p0 < d1;
+	      p0 = (p0 - d1) & GMP_NUMB_MASK;
+	      q--;
+	      r1 = (r1 + dx) & GMP_NUMB_MASK;
+	      rx = r1 < dx;
+	    }
+
+	  p1 += r0 < p0;	/* cannot carry! */
+	  rx -= r1 < p1;	/* may become 11..1 if q is still too large */
+	  r1 = (r1 - p1) & GMP_NUMB_MASK;
+	  r0 = (r0 - p0) & GMP_NUMB_MASK;
+
+	  cy_limb = mpn_submul_1 (np, dp, dn - 2, q);
+
+	  /* Check if we've over-estimated q, and adjust as needed.  */
+	  {
+	    mp_limb_t cy1, cy2;
+	    cy1 = r0 < cy_limb;
+	    r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
+	    cy2 = r1 < cy1;
+	    r1 -= cy1;
+	    np[dn - 1] = r1;
+	    np[dn - 2] = r0;
+	    if (cy2 != rx)
+	      {
+		mpn_add_n (np, np, dp, dn);
+		q--;
+	      }
+	  }
+	  qp[i] = q;
+	}
+    }
+
+  /* ______ ______ ______
+    |__rx__|__r1__|__r0__|		partial remainder
+	    ______ ______
+	 - |__p1__|__p0__|		partial product to subtract
+	    ______ ______
+	 - |______|cylimb|
+
+     rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
+     carry out).  If rx=-1 then q is too large.  If rx=0, then q might
+     be too large, but it is most likely correct.
+  */
+
+  return most_significant_q_limb;
+}
diff --git a/gmp/mpn/generic/sbpi1_bdiv_q.c b/gmp/mpn/generic/sbpi1_bdiv_q.c
deleted file mode 100644
index 645b1d9b6a..0000000000
--- a/gmp/mpn/generic/sbpi1_bdiv_q.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/* mpn_sbpi1_bdiv_q -- schoolbook Hensel division with precomputed inverse,
-   returning quotient only.
-
-   Contributed to the GNU project by Niels Möller.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
-   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
-   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Computes Q = N / D mod B^nn, destroys N.
-
-   D must be odd. dinv is (-D)^-1 mod B.
-
-
-   The straightforward way to compute Q is to cancel one limb at a time, using
-
-     qp[i] = D^{-1} * np[i] (mod B)
-     N -= B^i * qp[i] * D
-
-   But we prefer addition to subtraction, since mpn_addmul_1 is often faster
-   than mpn_submul_1.  Q = - N / D can be computed by iterating
-
-     qp[i] = (-D)^{-1} * np[i] (mod B)
-     N += B^i * qp[i] * D
-
-   And then we flip the sign, -Q = (not Q) + 1. */
-
-void
-mpn_sbpi1_bdiv_q (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  mp_limb_t dinv)
-{
-  mp_size_t i;
-  mp_limb_t cy, q;
-
-  ASSERT (dn > 0);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[0] & 1) != 0);
-  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
-     but some over N/Q overlaps will not work.  */
-
-  for (i = nn - dn; i > 0; i--)
-    {
-      q = dinv * np[0];
-      cy = mpn_addmul_1 (np, dp, dn, q);
-      mpn_add_1 (np + dn, np + dn, i, cy);
-      ASSERT (np[0] == 0);
-      qp[0] = ~q;
-      qp++;
-      np++;
-    }
-
-  for (i = dn; i > 1; i--)
-    {
-      q = dinv * np[0];
-      mpn_addmul_1 (np, dp, i, q);
-      ASSERT (np[0] == 0);
-      qp[0] = ~q;
-      qp++;
-      np++;
-    }
-
-  /* Final limb */
-  q = dinv * np[0];
-  qp[0] = ~q;
-  mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
-}
diff --git a/gmp/mpn/generic/sbpi1_div_q.c b/gmp/mpn/generic/sbpi1_div_q.c
deleted file mode 100644
index 3abbd57933..0000000000
--- a/gmp/mpn/generic/sbpi1_div_q.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
-   division algorithm.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_sbpi1_div_q (mp_ptr qp,
-		 mp_ptr np, mp_size_t nn,
-		 mp_srcptr dp, mp_size_t dn,
-		 mp_limb_t dinv)
-{
-  mp_limb_t qh;
-  mp_size_t qn, i;
-  mp_limb_t n1, n0;
-  mp_limb_t d1, d0;
-  mp_limb_t cy, cy1;
-  mp_limb_t q;
-  mp_limb_t flag;
-
-  mp_size_t dn_orig = dn;
-  mp_srcptr dp_orig = dp;
-  mp_ptr np_orig = np;
-
-  ASSERT (dn > 2);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
-
-  np += nn;
-
-  qn = nn - dn;
-  if (qn + 1 < dn)
-    {
-      dp += dn - (qn + 1);
-      dn = qn + 1;
-    }
-
-  qh = mpn_cmp (np - dn, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (np - dn, np - dn, dp, dn);
-
-  qp += qn;
-
-  dn -= 2;			/* offset dn by 2 for main division loops,
-				   saving two iterations in mpn_submul_1.  */
-  d1 = dp[dn + 1];
-  d0 = dp[dn + 0];
-
-  np -= 2;
-
-  n1 = np[1];
-
-  for (i = qn - (dn + 2); i >= 0; i--)
-    {
-      np--;
-      if (UNLIKELY (n1 == d1) && np[1] == d0)
-	{
-	  q = GMP_NUMB_MASK;
-	  mpn_submul_1 (np - dn, dp, dn + 2, q);
-	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	  cy1 = n0 < cy;
-	  n0 = (n0 - cy) & GMP_NUMB_MASK;
-	  cy = n1 < cy1;
-	  n1 -= cy1;
-	  np[0] = n0;
-
-	  if (UNLIKELY (cy != 0))
-	    {
-	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-	      q--;
-	    }
-	}
-
-      *--qp = q;
-    }
-
-  flag = ~CNST_LIMB(0);
-
-  if (dn >= 0)
-    {
-      for (i = dn; i > 0; i--)
-	{
-	  np--;
-	  if (UNLIKELY (n1 >= (d1 & flag)))
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
-
-	      if (UNLIKELY (n1 != cy))
-		{
-		  if (n1 < (cy & flag))
-		    {
-		      q--;
-		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
-		    }
-		  else
-		    flag = 0;
-		}
-	      n1 = np[1];
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	      cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	      cy1 = n0 < cy;
-	      n0 = (n0 - cy) & GMP_NUMB_MASK;
-	      cy = n1 < cy1;
-	      n1 -= cy1;
-	      np[0] = n0;
-
-	      if (UNLIKELY (cy != 0))
-		{
-		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-		  q--;
-		}
-	    }
-
-	  *--qp = q;
-
-	  /* Truncate operands.  */
-	  dn--;
-	  dp++;
-	}
-
-      np--;
-      if (UNLIKELY (n1 >= (d1 & flag)))
-	{
-	  q = GMP_NUMB_MASK;
-	  cy = mpn_submul_1 (np, dp, 2, q);
-
-	  if (UNLIKELY (n1 != cy))
-	    {
-	      if (n1 < (cy & flag))
-		{
-		  q--;
-		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
-		}
-	      else
-		flag = 0;
-	    }
-	  n1 = np[1];
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  np[0] = n0;
-	  np[1] = n1;
-	}
-
-      *--qp = q;
-    }
-  ASSERT_ALWAYS (np[1] == n1);
-  np += 2;
-
-
-  dn = dn_orig;
-  if (UNLIKELY (n1 < (dn & flag)))
-    {
-      mp_limb_t q, x;
-
-      /* The quotient may be too large if the remainder is small.  Recompute
-	 for above ignored operand parts, until the remainder spills.
-
-	 FIXME: The quality of this code isn't the same as the code above.
-	 1. We don't compute things in an optimal order, high-to-low, in order
-	    to terminate as quickly as possible.
-	 2. We mess with pointers and sizes, adding and subtracting and
-	    adjusting to get things right.  It surely could be streamlined.
-	 3. The only termination criteria are that we determine that the
-	    quotient needs to be adjusted, or that we have recomputed
-	    everything.  We should stop when the remainder is so large
-	    that no additional subtracting could make it spill.
-	 4. If nothing else, we should not do two loops of submul_1 over the
-	    data, instead handle both the triangularization and chopping at
-	    once.  */
-
-      x = n1;
-
-      if (dn > 2)
-	{
-	  /* Compensate for triangularization.  */
-	  mp_limb_t y;
-
-	  dp = dp_orig;
-	  if (qn + 1 < dn)
-	    {
-	      dp += dn - (qn + 1);
-	      dn = qn + 1;
-	    }
-
-	  y = np[-2];
-
-	  for (i = dn - 3; i >= 0; i--)
-	    {
-	      q = qp[i];
-	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
-
-	      if (y < cy)
-		{
-		  if (x == 0)
-		    {
-		      cy = mpn_sub_1 (qp, qp, qn, 1);
-		      ASSERT_ALWAYS (cy == 0);
-		      return qh - cy;
-		    }
-		  x--;
-		}
-	      y -= cy;
-	    }
-	  np[-2] = y;
-	}
-
-      dn = dn_orig;
-      if (qn + 1 < dn)
-	{
-	  /* Compensate for ignored dividend and divisor tails.  */
-
-	  dp = dp_orig;
-	  np = np_orig;
-
-	  if (qh != 0)
-	    {
-	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
-	      if (cy != 0)
-		{
-		  if (x == 0)
-		    {
-		      if (qn != 0)
-			cy = mpn_sub_1 (qp, qp, qn, 1);
-		      return qh - cy;
-		    }
-		  x--;
-		}
-	    }
-
-	  if (qn == 0)
-	    return qh;
-
-	  for (i = dn - qn - 2; i >= 0; i--)
-	    {
-	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
-	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
-	      if (cy != 0)
-		{
-		  if (x == 0)
-		    {
-		      cy = mpn_sub_1 (qp, qp, qn, 1);
-		      return qh;
-		    }
-		  x--;
-		}
-	    }
-	}
-    }
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/sbpi1_div_qr.c b/gmp/mpn/generic/sbpi1_div_qr.c
deleted file mode 100644
index 0c3e4cb729..0000000000
--- a/gmp/mpn/generic/sbpi1_div_qr.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* mpn_sbpi1_div_qr -- Schoolbook division using the Möller-Granlund 3/2
-   division algorithm.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_sbpi1_div_qr (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  mp_limb_t dinv)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t n1, n0;
-  mp_limb_t d1, d0;
-  mp_limb_t cy, cy1;
-  mp_limb_t q;
-
-  ASSERT (dn > 2);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
-
-  np += nn;
-
-  qh = mpn_cmp (np - dn, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (np - dn, np - dn, dp, dn);
-
-  qp += nn - dn;
-
-  dn -= 2;			/* offset dn by 2 for main division loops,
-				   saving two iterations in mpn_submul_1.  */
-  d1 = dp[dn + 1];
-  d0 = dp[dn + 0];
-
-  np -= 2;
-
-  n1 = np[1];
-
-  for (i = nn - (dn + 2); i > 0; i--)
-    {
-      np--;
-      if (UNLIKELY (n1 == d1) && np[1] == d0)
-	{
-	  q = GMP_NUMB_MASK;
-	  mpn_submul_1 (np - dn, dp, dn + 2, q);
-	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	  cy1 = n0 < cy;
-	  n0 = (n0 - cy) & GMP_NUMB_MASK;
-	  cy = n1 < cy1;
-	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
-	  np[0] = n0;
-
-	  if (UNLIKELY (cy != 0))
-	    {
-	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-	      q--;
-	    }
-	}
-
-      *--qp = q;
-    }
-  np[1] = n1;
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/sbpi1_divappr_q.c b/gmp/mpn/generic/sbpi1_divappr_q.c
deleted file mode 100644
index 3e7cf91ba6..0000000000
--- a/gmp/mpn/generic/sbpi1_divappr_q.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/* mpn_sbpi1_divappr_q -- Schoolbook division using the Möller-Granlund 3/2
-   division algorithm, returning approximate quotient.  The quotient returned
-   is either correct, or one too large.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_sbpi1_divappr_q (mp_ptr qp,
-		     mp_ptr np, mp_size_t nn,
-		     mp_srcptr dp, mp_size_t dn,
-		     mp_limb_t dinv)
-{
-  mp_limb_t qh;
-  mp_size_t qn, i;
-  mp_limb_t n1, n0;
-  mp_limb_t d1, d0;
-  mp_limb_t cy, cy1;
-  mp_limb_t q;
-  mp_limb_t flag;
-
-  ASSERT (dn > 2);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
-
-  np += nn;
-
-  qn = nn - dn;
-  if (qn + 1 < dn)
-    {
-      dp += dn - (qn + 1);
-      dn = qn + 1;
-    }
-
-  qh = mpn_cmp (np - dn, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (np - dn, np - dn, dp, dn);
-
-  qp += qn;
-
-  dn -= 2;			/* offset dn by 2 for main division loops,
-				   saving two iterations in mpn_submul_1.  */
-  d1 = dp[dn + 1];
-  d0 = dp[dn + 0];
-
-  np -= 2;
-
-  n1 = np[1];
-
-  for (i = qn - (dn + 2); i >= 0; i--)
-    {
-      np--;
-      if (UNLIKELY (n1 == d1) && np[1] == d0)
-	{
-	  q = GMP_NUMB_MASK;
-	  mpn_submul_1 (np - dn, dp, dn + 2, q);
-	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	  cy1 = n0 < cy;
-	  n0 = (n0 - cy) & GMP_NUMB_MASK;
-	  cy = n1 < cy1;
-	  n1 -= cy1;
-	  np[0] = n0;
-
-	  if (UNLIKELY (cy != 0))
-	    {
-	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-	      q--;
-	    }
-	}
-
-      *--qp = q;
-    }
-
-  flag = ~CNST_LIMB(0);
-
-  if (dn >= 0)
-    {
-      for (i = dn; i > 0; i--)
-	{
-	  np--;
-	  if (UNLIKELY (n1 >= (d1 & flag)))
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
-
-	      if (UNLIKELY (n1 != cy))
-		{
-		  if (n1 < (cy & flag))
-		    {
-		      q--;
-		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
-		    }
-		  else
-		    flag = 0;
-		}
-	      n1 = np[1];
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	      cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	      cy1 = n0 < cy;
-	      n0 = (n0 - cy) & GMP_NUMB_MASK;
-	      cy = n1 < cy1;
-	      n1 -= cy1;
-	      np[0] = n0;
-
-	      if (UNLIKELY (cy != 0))
-		{
-		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-		  q--;
-		}
-	    }
-
-	  *--qp = q;
-
-	  /* Truncate operands.  */
-	  dn--;
-	  dp++;
-	}
-
-      np--;
-      if (UNLIKELY (n1 >= (d1 & flag)))
-	{
-	  q = GMP_NUMB_MASK;
-	  cy = mpn_submul_1 (np, dp, 2, q);
-
-	  if (UNLIKELY (n1 != cy))
-	    {
-	      if (n1 < (cy & flag))
-		{
-		  q--;
-		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
-		}
-	      else
-		flag = 0;
-	    }
-	  n1 = np[1];
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  np[1] = n1;
-	  np[0] = n0;
-	}
-
-      *--qp = q;
-    }
-
-  ASSERT_ALWAYS (np[1] == n1);
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/scan0.c b/gmp/mpn/generic/scan0.c
index 8171fd5afe..2e9f3a43da 100644
--- a/gmp/mpn/generic/scan0.c
+++ b/gmp/mpn/generic/scan0.c
@@ -5,28 +5,17 @@ Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -36,8 +25,9 @@ see https://www.gnu.org/licenses/.  */
    1. U must sooner or later have a limb with a clear bit.
  */
 
-mp_bitcnt_t
-mpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)
+unsigned long int
+mpn_scan0 (register mp_srcptr up,
+	   register unsigned long int starting_bit)
 {
   mp_size_t starting_word;
   mp_limb_t alimb;
diff --git a/gmp/mpn/generic/scan1.c b/gmp/mpn/generic/scan1.c
index e22ad5d827..d0d9a3feea 100644
--- a/gmp/mpn/generic/scan1.c
+++ b/gmp/mpn/generic/scan1.c
@@ -5,28 +5,17 @@ Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -36,8 +25,9 @@ see https://www.gnu.org/licenses/.  */
    1. U must sooner or later have a limb != 0.
  */
 
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)
+unsigned long int
+mpn_scan1 (register mp_srcptr up,
+	   register unsigned long int starting_bit)
 {
   mp_size_t starting_word;
   mp_limb_t alimb;
diff --git a/gmp/mpn/generic/sec_aors_1.c b/gmp/mpn/generic/sec_aors_1.c
deleted file mode 100644
index d789a5792e..0000000000
--- a/gmp/mpn/generic/sec_aors_1.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* mpn_sec_add_1, mpn_sec_sub_1
-
-   Contributed to the GNU project by Niels Möller
-
-Copyright 2013, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if OPERATION_sec_add_1
-#define FNAME mpn_sec_add_1
-#define FNAME_itch mpn_sec_add_1_itch
-#define OP_N mpn_add_n
-#endif
-#if OPERATION_sec_sub_1
-#define FNAME mpn_sec_sub_1
-#define FNAME_itch mpn_sec_sub_1_itch
-#define OP_N mpn_sub_n
-#endif
-
-/* It's annoying to that we need scratch space */
-mp_size_t
-FNAME_itch (mp_size_t n)
-{
-  return n;
-}
-
-mp_limb_t
-FNAME (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_ptr scratch)
-{
-  scratch[0] = b;
-  MPN_ZERO (scratch + 1, n-1);
-  return OP_N (rp, ap, scratch, n);
-}
diff --git a/gmp/mpn/generic/sec_div.c b/gmp/mpn/generic/sec_div.c
deleted file mode 100644
index 483b118d0d..0000000000
--- a/gmp/mpn/generic/sec_div.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* mpn_sec_div_qr, mpn_sec_div_r -- Compute Q = floor(U / V), U = U mod V.
-   Side-channel silent under the assumption that the used instructions are
-   side-channel silent.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2011-2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if OPERATION_sec_div_qr
-#define FNAME mpn_sec_div_qr
-#define FNAME_itch mpn_sec_div_qr_itch
-#define Q(q) q,
-#define RETTYPE mp_limb_t
-#endif
-#if OPERATION_sec_div_r
-#define FNAME mpn_sec_div_r
-#define FNAME_itch mpn_sec_div_r_itch
-#define Q(q)
-#define RETTYPE void
-#endif
-
-mp_size_t
-FNAME_itch (mp_size_t nn, mp_size_t dn)
-{
-#if OPERATION_sec_div_qr
-/* Needs (nn + dn + 1) + mpn_sec_pi1_div_qr's needs of (2nn' - dn + 1) for a
-   total of 3nn + 4 limbs at tp.  Note that mpn_sec_pi1_div_qr's nn is one
-   greater than ours, therefore +4 and not just +2.  */
-  return 3 * nn + 4;
-#endif
-#if OPERATION_sec_div_r
-/* Needs (nn + dn + 1) + mpn_sec_pi1_div_r's needs of (dn + 1) for a total of
-   nn + 2dn + 2 limbs at tp.  */
-  return nn + 2 * dn + 2;
-#endif
-}
-
-RETTYPE
-FNAME (Q(mp_ptr qp)
-       mp_ptr np, mp_size_t nn,
-       mp_srcptr dp, mp_size_t dn,
-       mp_ptr tp)
-{
-  mp_limb_t d1, d0;
-  unsigned int cnt;
-  gmp_pi1_t dinv;
-  mp_limb_t inv32;
-
-  ASSERT (dn >= 1);
-  ASSERT (nn >= dn);
-  ASSERT (dp[dn - 1] != 0);
-
-  d1 = dp[dn - 1];
-  count_leading_zeros (cnt, d1);
-
-  if (cnt != 0)
-    {
-      mp_limb_t qh, cy;
-      mp_ptr np2, dp2;
-      dp2 = tp;					/* dn limbs */
-      mpn_lshift (dp2, dp, dn, cnt);
-
-      np2 = tp + dn;				/* (nn + 1) limbs */
-      cy = mpn_lshift (np2, np, nn, cnt);
-      np2[nn++] = cy;
-
-      d0 = dp2[dn - 1];
-      d0 += (~d0 != 0);
-      invert_limb (inv32, d0);
-
-      /* We add nn + dn to tp here, not nn + 1 + dn, as expected.  This is
-	 since nn here will have been incremented.  */
-#if OPERATION_sec_div_qr
-      qh = mpn_sec_pi1_div_qr (np2 + dn, np2, nn, dp2, dn, inv32, tp + nn + dn);
-      ASSERT (qh == 0);		/* FIXME: this indicates inefficiency! */
-      MPN_COPY (qp, np2 + dn, nn - dn - 1);
-      qh = np2[nn - 1];
-#else
-      mpn_sec_pi1_div_r (np2, nn, dp2, dn, inv32, tp + nn + dn);
-#endif
-
-      mpn_rshift (np, np2, dn, cnt);
-
-#if OPERATION_sec_div_qr
-      return qh;
-#endif
-    }
-  else
-    {
-      /* FIXME: Consider copying np => np2 here, adding a 0-limb at the top.
-	 That would simplify the underlying pi1 function, since then it could
-	 assume nn > dn.  */
-      d0 = dp[dn - 1];
-      d0 += (~d0 != 0);
-      invert_limb (inv32, d0);
-
-#if OPERATION_sec_div_qr
-      return mpn_sec_pi1_div_qr (qp, np, nn, dp, dn, inv32, tp);
-#else
-      mpn_sec_pi1_div_r (np, nn, dp, dn, inv32, tp);
-#endif
-    }
-}
diff --git a/gmp/mpn/generic/sec_invert.c b/gmp/mpn/generic/sec_invert.c
deleted file mode 100644
index 43a578b2a1..0000000000
--- a/gmp/mpn/generic/sec_invert.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* mpn_sec_invert
-
-   Contributed to the GNU project by Niels Möller
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if 0
-/* Currently unused. Should be resurrected once mpn_cnd_neg is
-   advertised. */
-static mp_size_t
-mpn_cnd_neg_itch (mp_size_t n)
-{
-  return n;
-}
-#endif
-
-/* FIXME: Ought to return carry */
-static void
-mpn_cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n,
-	     mp_ptr scratch)
-{
-  mpn_lshift (scratch, ap, n, 1);
-  mpn_cnd_sub_n (cnd, rp, ap, scratch, n);
-}
-
-static void
-mpn_cnd_swap (int cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp,
-	      mp_size_t n)
-{
-  volatile mp_limb_t mask = - (mp_limb_t) (cnd != 0);
-  mp_size_t i;
-  for (i = 0; i < n; i++)
-    {
-      mp_limb_t a, b, t;
-      a = ap[i];
-      b = bp[i];
-      t = (a ^ b) & mask;
-      ap[i] = a ^ t;
-      bp[i] = b ^ t;
-    }
-}
-
-static int
-mpn_sec_eq_ui (mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
-  mp_limb_t d;
-  ASSERT (n > 0);
-
-  d = ap[0] ^ b;
-
-  while (--n > 0)
-    d |= ap[n];
-
-  return d == 0;
-}
-
-mp_size_t
-mpn_sec_invert_itch (mp_size_t n)
-{
-  return 4*n;
-}
-
-/* Compute V <-- A^{-1} (mod M), in data-independent time. M must be
-   odd. Returns 1 on success, and 0 on failure (i.e., if gcd (A, m) !=
-   1). Inputs and outputs of size n, and no overlap allowed. The {ap,
-   n} area is destroyed. For arbitrary inputs, bit_size should be
-   2*n*GMP_NUMB_BITS, but if A or M are known to be smaller, e.g., if
-   M = 2^521 - 1 and A < M, bit_size can be any bound on the sum of
-   the bit sizes of A and M. */
-int
-mpn_sec_invert (mp_ptr vp, mp_ptr ap, mp_srcptr mp,
-		mp_size_t n, mp_bitcnt_t bit_size,
-		mp_ptr scratch)
-{
-  ASSERT (n > 0);
-  ASSERT (bit_size > 0);
-  ASSERT (mp[0] & 1);
-  ASSERT (! MPN_OVERLAP_P (ap, n, vp, n));
-#define bp (scratch + n)
-#define up (scratch + 2*n)
-#define m1hp (scratch + 3*n)
-
-  /* Maintain
-
-       a = u * orig_a (mod m)
-       b = v * orig_a (mod m)
-
-     and b odd at all times. Initially,
-
-       a = a_orig, u = 1
-       b = m,      v = 0
-     */
-
-
-  up[0] = 1;
-  mpn_zero (up+1, n - 1);
-  mpn_copyi (bp, mp, n);
-  mpn_zero (vp, n);
-
-  ASSERT_CARRY (mpn_rshift (m1hp, mp, n, 1));
-  ASSERT_NOCARRY (mpn_sec_add_1 (m1hp, m1hp, n, 1, scratch));
-
-  while (bit_size-- > 0)
-    {
-      mp_limb_t odd, swap, cy;
-
-      /* Always maintain b odd. The logic of the iteration is as
-	 follows. For a, b:
-
-	   odd = a & 1
-	   a -= odd * b
-	   if (underflow from a-b)
-	     {
-	       b += a, assigns old a
-	       a = B^n-a
-	     }
-
-	   a /= 2
-
-	 For u, v:
-
-	   if (underflow from a - b)
-	     swap u, v
-	   u -= odd * v
-	   if (underflow from u - v)
-	     u += m
-
-	   u /= 2
-	   if (a one bit was shifted out)
-	     u += (m+1)/2
-
-	 As long as a > 0, the quantity
-
-	   (bitsize of a) + (bitsize of b)
-
-	 is reduced by at least one bit per iteration, hence after (bit_size of
-	 orig_a) + (bit_size of m) - 1 iterations we surely have a = 0. Then b
-	 = gcd(orig_a, m) and if b = 1 then also v = orig_a^{-1} (mod m).
-      */
-
-      ASSERT (bp[0] & 1);
-      odd = ap[0] & 1;
-
-      swap = mpn_cnd_sub_n (odd, ap, ap, bp, n);
-      mpn_cnd_add_n (swap, bp, bp, ap, n);
-      mpn_cnd_neg (swap, ap, ap, n, scratch);
-
-      mpn_cnd_swap (swap, up, vp, n);
-      cy = mpn_cnd_sub_n (odd, up, up, vp, n);
-      cy -= mpn_cnd_add_n (cy, up, up, mp, n);
-      ASSERT (cy == 0);
-
-      cy = mpn_rshift (ap, ap, n, 1);
-      ASSERT (cy == 0);
-      cy = mpn_rshift (up, up, n, 1);
-      cy = mpn_cnd_add_n (cy, up, up, m1hp, n);
-      ASSERT (cy == 0);
-    }
-  /* Should be all zeros, but check only extreme limbs */
-  ASSERT ( (ap[0] | ap[n-1]) == 0);
-  /* Check if indeed gcd == 1. */
-  return mpn_sec_eq_ui (bp, n, 1);
-#undef bp
-#undef up
-#undef m1hp
-}
diff --git a/gmp/mpn/generic/sec_mul.c b/gmp/mpn/generic/sec_mul.c
deleted file mode 100644
index 2cd87fab1d..0000000000
--- a/gmp/mpn/generic/sec_mul.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* mpn_sec_mul.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_sec_mul (mp_ptr rp,
-	     mp_srcptr ap, mp_size_t an,
-	     mp_srcptr bp, mp_size_t bn,
-	     mp_ptr tp)
-{
-  mpn_mul_basecase (rp, ap, an, bp, bn);
-}
-
-mp_size_t
-mpn_sec_mul_itch (mp_size_t an, mp_size_t bn)
-{
-  return 0;
-}
diff --git a/gmp/mpn/generic/sec_pi1_div.c b/gmp/mpn/generic/sec_pi1_div.c
deleted file mode 100644
index 1e075daf73..0000000000
--- a/gmp/mpn/generic/sec_pi1_div.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* mpn_sec_pi1_div_qr, mpn_sec_pi1_div_r -- Compute Q = floor(U / V), U = U
-   mod V.  Side-channel silent under the assumption that the used instructions
-   are side-channel silent.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011-2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* This side-channel silent division algorithm reduces the partial remainder by
-   GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
-   division algorithm.  We actually do not insist on reducing by exactly
-   GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
-   too large (B is the limb base, D is the divisor, and i is the induction
-   variable); the subsequent step will handle the extra partial remainder bits.
-
-   With that partial remainder reduction, each step generates a quotient "half
-   limb".  The outer loop generates two quotient half limbs, an upper (q1h) and
-   a lower (q0h) which are stored sparsely in separate limb arrays.  These
-   arrays are added at the end; using separate arrays avoids data-dependent
-   carry propagation which could else pose a side-channel leakage problem.
-
-   The quotient half limbs may be between -3 to 0 from the accurate value
-   ("accurate" being the one which corresponds to a reduction to a principal
-   partial remainder).  Too small quotient half limbs correspond to too large
-   remainders, which we reduce later, as described above.
-
-   In order to keep quotients from getting too big, corresponding to a negative
-   partial remainder, we use an inverse which is slightly smaller than usually.
-*/
-
-#if OPERATION_sec_pi1_div_qr
-/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
-#define FNAME mpn_sec_pi1_div_qr
-#define Q(q) q,
-#define RETTYPE mp_limb_t
-#endif
-#if OPERATION_sec_pi1_div_r
-/* Needs (dn + 1) limbs at tp.  */
-#define FNAME mpn_sec_pi1_div_r
-#define Q(q)
-#define RETTYPE void
-#endif
-
-RETTYPE
-FNAME (Q(mp_ptr qp)
-       mp_ptr np, mp_size_t nn,
-       mp_srcptr dp, mp_size_t dn,
-       mp_limb_t dinv,
-       mp_ptr tp)
-{
-  mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
-  mp_size_t i;
-  mp_ptr hp;
-#if OPERATION_sec_pi1_div_qr
-  mp_limb_t qh;
-  mp_ptr qlp, qhp;
-#endif
-
-  ASSERT (dn >= 1);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
-
-  if (nn == dn)
-    {
-      cy = mpn_sub_n (np, np, dp, dn);
-      mpn_cnd_add_n (cy, np, np, dp, dn);
-#if OPERATION_sec_pi1_div_qr
-      return 1 - cy;
-#else
-      return;
-#endif
-    }
-
-  /* Create a divisor copy shifted half a limb.  */
-  hp = tp;					/* (dn + 1) limbs */
-  hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
-
-#if OPERATION_sec_pi1_div_qr
-  qlp = tp + (dn + 1);				/* (nn - dn) limbs */
-  qhp = tp + (nn + 1);				/* (nn - dn) limbs */
-#endif
-
-  np += nn - dn;
-  nh = 0;
-
-  for (i = nn - dn - 1; i >= 0; i--)
-    {
-      np--;
-
-      nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
-      umul_ppmm (q1h, dummy, nh, dinv);
-      q1h += nh;
-#if OPERATION_sec_pi1_div_qr
-      qhp[i] = q1h;
-#endif
-      mpn_submul_1 (np, hp, dn + 1, q1h);
-
-      nh = np[dn];
-      umul_ppmm (q0h, dummy, nh, dinv);
-      q0h += nh;
-#if OPERATION_sec_pi1_div_qr
-      qlp[i] = q0h;
-#endif
-      nh -= mpn_submul_1 (np, dp, dn, q0h);
-    }
-
-  /* 1st adjustment depends on extra high remainder limb.  */
-  cnd = nh != 0;				/* FIXME: cmp-to-int */
-#if OPERATION_sec_pi1_div_qr
-  qlp[0] += cnd;
-#endif
-  nh -= mpn_cnd_sub_n (cnd, np, np, dp, dn);
-
-  /* 2nd adjustment depends on remainder/divisor comparison as well as whether
-     extra remainder limb was nullified by previous subtract.  */
-  cy = mpn_sub_n (np, np, dp, dn);
-  cy = cy - nh;
-#if OPERATION_sec_pi1_div_qr
-  qlp[0] += 1 - cy;
-#endif
-  mpn_cnd_add_n (cy, np, np, dp, dn);
-
-  /* 3rd adjustment depends on remainder/divisor comparison.  */
-  cy = mpn_sub_n (np, np, dp, dn);
-#if OPERATION_sec_pi1_div_qr
-  qlp[0] += 1 - cy;
-#endif
-  mpn_cnd_add_n (cy, np, np, dp, dn);
-
-#if OPERATION_sec_pi1_div_qr
-  /* Combine quotient halves into final quotient.  */
-  qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
-  qh += mpn_add_n (qp, qhp, qlp, nn - dn);
-
-  return qh;
-#else
-  return;
-#endif
-}
diff --git a/gmp/mpn/generic/sec_powm.c b/gmp/mpn/generic/sec_powm.c
deleted file mode 100644
index 67de44e10a..0000000000
--- a/gmp/mpn/generic/sec_powm.c
+++ /dev/null
@@ -1,438 +0,0 @@
-/* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
-   under the assumption that the multiply instruction is side channel silent.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2007-2009, 2011, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-/*
-  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
-
-  1. T <- (B^n * U) mod M                Convert to REDC form
-
-  2. Compute table U^0, U^1, U^2... of E-dependent size
-
-  3. While there are more bits in E
-       W <- power left-to-right base-k
-
-
-  TODO:
-
-   * Make getbits a macro, thereby allowing it to update the index operand.
-     That will simplify the code using getbits.  (Perhaps make getbits' sibling
-     getbit then have similar form, for symmetry.)
-
-   * Choose window size without looping.  (Superoptimize or think(tm).)
-
-   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
-     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
-     slowdown.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#undef MPN_REDC_1_SEC
-#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
-    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
-  } while (0)
-
-#undef MPN_REDC_2_SEC
-#define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
-    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
-  } while (0)
-
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
-
-/* Define our own mpn squaring function.  We do this since we cannot use a
-   native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
-   SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
-   made inside mpn_sqr_basecase.  */
-
-#if HAVE_NATIVE_mpn_sqr_diagonal
-#define MPN_SQR_DIAGONAL(rp, up, n)					\
-  mpn_sqr_diagonal (rp, up, n)
-#else
-#define MPN_SQR_DIAGONAL(rp, up, n)					\
-  do {									\
-    mp_size_t _i;							\
-    for (_i = 0; _i < (n); _i++)					\
-      {									\
-	mp_limb_t ul, lpl;						\
-	ul = (up)[_i];							\
-	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
-	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
-      }									\
-  } while (0)
-#endif
-
-
-#if ! HAVE_NATIVE_mpn_sqr_basecase
-/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
-#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
-#endif
-
-#if HAVE_NATIVE_mpn_sqr_basecase
-#ifdef TUNE_SQR_TOOM2_MAX
-/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
-   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
-   file.  An assembly mpn_sqr_basecase that does not define it, should allow
-   any size.  */
-#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
-#endif
-#endif
-
-#ifdef WANT_FAT_BINARY
-/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
-   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
-   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
-#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
-#endif
-
-#ifndef SQR_BASECASE_LIM
-/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
-   size.  */
-#define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n)
-#else
-/* Define our own squaring function, which uses mpn_sqr_basecase for its
-   allowed sizes, but its own code for larger sizes.  */
-static void
-mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
-{
-  mp_size_t i;
-
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
-
-  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
-    {
-      mpn_sqr_basecase (rp, up, n);
-      return;
-    }
-
-  {
-    mp_limb_t ul, lpl;
-    ul = up[0];
-    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
-    rp[0] = lpl >> GMP_NAIL_BITS;
-  }
-  if (n > 1)
-    {
-      mp_limb_t cy;
-
-      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
-      tp[n - 1] = cy;
-      for (i = 2; i < n; i++)
-	{
-	  mp_limb_t cy;
-	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
-	  tp[n + i - 2] = cy;
-	}
-      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
-
-      {
-	mp_limb_t cy;
-#if HAVE_NATIVE_mpn_addlsh1_n
-	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
-	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
-	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
-	rp[2 * n - 1] += cy;
-      }
-    }
-}
-#endif
-
-#define getbit(p,bi) \
-  ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
-
-/* FIXME: Maybe some things would get simpler if all callers ensure
-   that bi >= nbits. As far as I understand, with the current code bi
-   < nbits can happen only for the final iteration. */
-static inline mp_limb_t
-getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
-{
-  int nbits_in_r;
-  mp_limb_t r;
-  mp_size_t i;
-
-  if (bi < nbits)
-    {
-      return p[0] & (((mp_limb_t) 1 << bi) - 1);
-    }
-  else
-    {
-      bi -= nbits;			/* bit index of low bit to extract */
-      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
-      bi %= GMP_NUMB_BITS;		/* bit index in low word */
-      r = p[i] >> bi;			/* extract (low) bits */
-      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
-      if (nbits_in_r < nbits)		/* did we get enough bits? */
-	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
-      return r & (((mp_limb_t ) 1 << nbits) - 1);
-    }
-}
-
-#ifndef POWM_SEC_TABLE
-#if GMP_NUMB_BITS < 50
-#define POWM_SEC_TABLE  2,33,96,780,2741
-#else
-#define POWM_SEC_TABLE  2,130,524,2578
-#endif
-#endif
-
-#if TUNE_PROGRAM_BUILD
-extern int win_size (mp_bitcnt_t);
-#else
-static inline int
-win_size (mp_bitcnt_t enb)
-{
-  int k;
-  /* Find k, such that x[k-1] < enb <= x[k].
-
-     We require that x[k] >= k, then it follows that enb > x[k-1] >=
-     k-1, which implies k <= enb.
-  */
-  static const mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
-  for (k = 1; enb > x[k]; k++)
-    ;
-  ASSERT (k <= enb);
-  return k;
-}
-#endif
-
-/* Convert U to REDC form, U_r = B^n * U mod M.
-   Uses scratch space at tp of size 2un + n + 1.  */
-static void
-redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
-{
-  MPN_ZERO (tp, n);
-  MPN_COPY (tp + n, up, un);
-
-  mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
-  MPN_COPY (rp, tp, n);
-}
-
-/* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
-   where en = ceil (enb / GMP_NUMB_BITS)
-   Requires that {mp, n} is odd (and hence also mp[0] odd).
-   Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
-void
-mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
-	      mp_srcptr ep, mp_bitcnt_t enb,
-	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
-{
-  mp_limb_t ip[2], *mip;
-  int windowsize, this_windowsize;
-  mp_limb_t expbits;
-  mp_ptr pp, this_pp;
-  long i;
-  int cnd;
-
-  ASSERT (enb > 0);
-  ASSERT (n > 0);
-  /* The code works for bn = 0, but the defined scratch space is 2 limbs
-     greater than we supply, when converting 1 to redc form .  */
-  ASSERT (bn > 0);
-  ASSERT ((mp[0] & 1) != 0);
-
-  windowsize = win_size (enb);
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    {
-      mip = ip;
-      binvert_limb (mip[0], mp[0]);
-      mip[0] = -mip[0];
-    }
-  else
-    {
-      mip = ip;
-      mpn_binvert (mip, mp, 2, tp);
-      mip[0] = -mip[0]; mip[1] = ~mip[1];
-    }
-#else
-  mip = ip;
-  binvert_limb (mip[0], mp[0]);
-  mip[0] = -mip[0];
-#endif
-
-  pp = tp;
-  tp += (n << windowsize);	/* put tp after power table */
-
-  /* Compute pp[0] table entry */
-  /* scratch: |   n   | 1 |   n+2    |  */
-  /*          | pp[0] | 1 | redcify  |  */
-  this_pp = pp;
-  this_pp[n] = 1;
-  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
-  this_pp += n;
-
-  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
-     degenerate situation where B >> M, we let redcify use scratch space which
-     will later be used by the pp table (element 2 and up).  */
-  /* scratch: |   n   |   n   |  bn + n + 1  |  */
-  /*          | pp[0] | pp[1] |   redcify    |  */
-  redcify (this_pp, bp, bn, mp, n, this_pp + n);
-
-  /* Precompute powers of b and put them in the temporary area at pp.  */
-  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
-  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
-  for (i = (1 << windowsize) - 2; i > 0; i--)
-    {
-      mpn_mul_basecase (tp, this_pp, n, pp + n, n);
-      this_pp += n;
-#if WANT_REDC_2
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
-      else
-	MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
-#else
-      MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
-#endif
-    }
-
-  expbits = getbits (ep, enb, windowsize);
-  ASSERT_ALWAYS (enb >= windowsize);
-  enb -= windowsize;
-
-  mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
-
-  /* Main exponentiation loop.  */
-  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
-  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
-
-#define INNERLOOP							\
-  while (enb != 0)							\
-    {									\
-      expbits = getbits (ep, enb, windowsize);				\
-      this_windowsize = windowsize;					\
-      if (enb < windowsize)						\
-	{								\
-	  this_windowsize -= windowsize - enb;				\
-	  enb = 0;							\
-	}								\
-      else								\
-	enb -= windowsize;						\
-									\
-      do								\
-	{								\
-	  mpn_local_sqr (tp, rp, n, tp + 2 * n);			\
-	  MPN_REDUCE (rp, tp, mp, n, mip);				\
-	  this_windowsize--;						\
-	}								\
-      while (this_windowsize != 0);					\
-									\
-      mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
-      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
-									\
-      MPN_REDUCE (rp, tp, mp, n, mip);					\
-    }
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-  else
-    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
-      INNERLOOP;
-    }
-#else
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
-  INNERLOOP;
-#endif
-
-  MPN_COPY (tp, rp, n);
-  MPN_ZERO (tp + n, n);
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
-  else
-    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
-#else
-  MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
-#endif
-  cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
-  mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
-}
-
-mp_size_t
-mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
-{
-  int windowsize;
-  mp_size_t redcify_itch, itch;
-
-  /* The top scratch usage will either be when reducing B in the 2nd redcify
-     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
-     is 3n or 4n depending on if we use mpn_local_sqr or a native
-     mpn_sqr_basecase.  We assume 4n always for now.) */
-
-  windowsize = win_size (enb);
-
-  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
-     the (bn + n) term is due to redcify's own usage, and the rest is due to
-     mpn_sec_div_r's usage when called from redcify.  */
-  redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
-
-  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
-     scratch needs of squaring/multiplication in the exponentiation loop.  */
-  itch = (n << windowsize) + (4 * n);
-
-  return MAX (itch, redcify_itch);
-}
diff --git a/gmp/mpn/generic/sec_sqr.c b/gmp/mpn/generic/sec_sqr.c
deleted file mode 100644
index 736924cc22..0000000000
--- a/gmp/mpn/generic/sec_sqr.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* mpn_sec_sqr.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_sec_sqr (mp_ptr rp,
-	     mp_srcptr ap, mp_size_t an,
-	     mp_ptr tp)
-{
-  mpn_sqr_basecase (rp, ap, an);
-}
-
-mp_size_t
-mpn_sec_sqr_itch (mp_size_t an)
-{
-  return 0;
-}
diff --git a/gmp/mpn/generic/sec_tabselect.c b/gmp/mpn/generic/sec_tabselect.c
deleted file mode 100644
index a79c73a575..0000000000
--- a/gmp/mpn/generic/sec_tabselect.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* mpn_sec_tabselect.
-
-Copyright 2007-2009, 2011, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Select entry `which' from table `tab', which has nents entries, each `n'
-   limbs.  Store the selected entry at rp.  Reads entire table to avoid
-   side-channel information leaks.  O(n*nents).  */
-void
-mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
-		   mp_size_t n, mp_size_t nents, mp_size_t which)
-{
-  mp_size_t k, i;
-  mp_limb_t mask;
-  volatile mp_limb_t *tp;
-
-  for (k = 0; k < nents; k++)
-    {
-      mask = -(mp_limb_t) (which == k);
-      tp = tab + n * k;
-      for (i = 0; i < n; i++)
-	{
-	  rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
-	}
-    }
-}
diff --git a/gmp/mpn/generic/set_str.c b/gmp/mpn/generic/set_str.c
index 71034e34bf..975cfb0dad 100644
--- a/gmp/mpn/generic/set_str.c
+++ b/gmp/mpn/generic/set_str.c
@@ -9,34 +9,23 @@
    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
    GNU MP RELEASE.
 
-Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2012, 2013 Free
-Software Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
+2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* TODO:
@@ -80,7 +69,7 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
       int next_bitpos;
       mp_limb_t res_digit;
       mp_size_t size;
-      int bits_per_indigit = mp_bases[base].big_base;
+      int bits_per_indigit = __mp_bases[base].big_base;
 
       size = 0;
       res_digit = 0;
@@ -118,7 +107,7 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
 
       TMP_MARK;
 
-      chars_per_limb = mp_bases[base].chars_per_limb;
+      chars_per_limb = __mp_bases[base].chars_per_limb;
 
       un = str_len / chars_per_limb + 1;
 
@@ -142,15 +131,18 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i
   long i, pi;
   mp_size_t n;
   mp_ptr p, t;
-  mp_limb_t big_base;
+  unsigned normalization_steps;
+  mp_limb_t big_base, big_base_inverted;
   int chars_per_limb;
   size_t digits_in_base;
   mp_size_t shift;
 
   powtab_mem_ptr = powtab_mem;
 
-  chars_per_limb = mp_bases[base].chars_per_limb;
-  big_base = mp_bases[base].big_base;
+  chars_per_limb = __mp_bases[base].chars_per_limb;
+  big_base = __mp_bases[base].big_base;
+  big_base_inverted = __mp_bases[base].big_base_inverted;
+  count_leading_zeros (normalization_steps, big_base);
 
   p = powtab_mem_ptr;
   powtab_mem_ptr += 1;
@@ -177,7 +169,7 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i
 
       ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un));
 
-      mpn_sqr (t, p, n);
+      mpn_sqr_n (t, p, n);
       n = 2 * n - 1; n += t[n] != 0;
       digits_in_base *= 2;
 #if 1
@@ -247,9 +239,7 @@ mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
 
   if (hn == 0)
     {
-      /* Zero +1 limb here, to avoid reading an allocated but uninitialised
-	 limb in mpn_incr_u below.  */
-      MPN_ZERO (rp, powtab->n + sn + 1);
+      MPN_ZERO (rp, powtab->n + sn);
     }
   else
     {
@@ -288,11 +278,11 @@ mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
   mp_limb_t res_digit;
 
   ASSERT (base >= 2);
-  ASSERT (base < numberof (mp_bases));
+  ASSERT (base < numberof (__mp_bases));
   ASSERT (str_len >= 1);
 
-  big_base = mp_bases[base].big_base;
-  chars_per_limb = mp_bases[base].chars_per_limb;
+  big_base = __mp_bases[base].big_base;
+  chars_per_limb = __mp_bases[base].chars_per_limb;
 
   size = 0;
   for (i = chars_per_limb; i < str_len; i += chars_per_limb)
diff --git a/gmp/mpn/generic/sizeinbase.c b/gmp/mpn/generic/sizeinbase.c
index 16633569ec..edd10b544e 100644
--- a/gmp/mpn/generic/sizeinbase.c
+++ b/gmp/mpn/generic/sizeinbase.c
@@ -4,34 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 1991, 1993-1995, 2001, 2002, 2011, 2012 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -44,7 +32,27 @@ see https://www.gnu.org/licenses/.  */
 size_t
 mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
 {
-  size_t  result;
-  MPN_SIZEINBASE (result, xp, xsize, base);
-  return result;
+  int lb_base, cnt;
+  mp_size_t totbits;
+
+  ASSERT (xsize >= 0);
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (__mp_bases));
+
+  /* Special case for X == 0.  */
+  if (xsize == 0)
+    return 1;
+
+  /* Calculate the total number of significant bits of X.  */
+  count_leading_zeros (cnt, xp[xsize-1]);
+  totbits = xsize * BITS_PER_MP_LIMB - cnt;
+
+  if (POW2_P (base))
+    {
+      /* Special case for powers of 2, giving exact result.  */
+      lb_base = __mp_bases[base].big_base;
+      return (totbits + lb_base - 1) / lb_base;
+    }
+  else
+    return (size_t) (totbits * __mp_bases[base].chars_per_bit_exactly) + 1;
 }
diff --git a/gmp/mpn/generic/sqr.c b/gmp/mpn/generic/sqr.c
deleted file mode 100644
index 3743761f78..0000000000
--- a/gmp/mpn/generic/sqr.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/* mpn_sqr -- square natural numbers.
-
-Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-void
-mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
-{
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
-
-  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
-      mpn_mul_basecase (p, a, n, a, n);
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
-    {
-      mpn_sqr_basecase (p, a, n);
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
-    {
-      /* Allocate workspace of fixed size on stack: fast! */
-      mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
-      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
-      mpn_toom2_sqr (p, a, n, ws);
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_SDECL;
-      TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
-      mpn_toom3_sqr (p, a, n, ws);
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_SDECL;
-      TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
-      mpn_toom4_sqr (p, a, n, ws);
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_SDECL;
-      TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
-      mpn_toom6_sqr (p, a, n, ws);
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_DECL;
-      TMP_MARK;
-      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
-      mpn_toom8_sqr (p, a, n, ws);
-      TMP_FREE;
-    }
-  else
-    {
-      /* The current FFT code allocates its own space.  That should probably
-	 change.  */
-      mpn_fft_mul (p, a, n, a, n);
-    }
-}
diff --git a/gmp/mpn/generic/sqr_basecase.c b/gmp/mpn/generic/sqr_basecase.c
index fc6a043a94..56d22216f6 100644
--- a/gmp/mpn/generic/sqr_basecase.c
+++ b/gmp/mpn/generic/sqr_basecase.c
@@ -5,34 +5,23 @@
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
 
-Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
+2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -56,30 +45,6 @@ see https://www.gnu.org/licenses/.  */
   } while (0)
 #endif
 
-#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
-#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
-  mpn_sqr_diag_addlsh1 (rp, tp, up, n)
-#else
-#if HAVE_NATIVE_mpn_addlsh1_n
-#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
-  do {									\
-    mp_limb_t cy;							\
-    MPN_SQR_DIAGONAL (rp, up, n);					\
-    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
-    rp[2 * n - 1] += cy;						\
-  } while (0)
-#else
-#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
-  do {									\
-    mp_limb_t cy;							\
-    MPN_SQR_DIAGONAL (rp, up, n);					\
-    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
-    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
-    rp[2 * n - 1] += cy;						\
-  } while (0)
-#endif
-#endif
-
 
 #undef READY_WITH_mpn_sqr_basecase
 
@@ -89,12 +54,12 @@ void
 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 {
   mp_size_t i;
-  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
   mp_ptr tp = tarr;
   mp_limb_t cy;
 
   /* must fit 2*n limbs in tarr */
-  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
 
   if ((n & 1) != 0)
     {
@@ -119,13 +84,9 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     {
       if (n == 2)
 	{
-#if HAVE_NATIVE_mpn_mul_2
-	  rp[3] = mpn_mul_2 (rp, up, 2, up);
-#else
 	  rp[0] = 0;
 	  rp[1] = 0;
 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
-#endif
 	  return;
 	}
 
@@ -140,7 +101,15 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
       tp[2 * n - 3] = cy;
     }
 
-  MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+  MPN_SQR_DIAGONAL (rp, up, n);
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+  rp[2 * n - 1] += cy;
 }
 #define READY_WITH_mpn_sqr_basecase
 #endif
@@ -167,12 +136,12 @@ void
 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 {
   mp_size_t i;
-  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
   mp_ptr tp = tarr;
   mp_limb_t cy;
 
   /* must fit 2*n limbs in tarr */
-  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
 
   if ((n & 1) != 0)
     {
@@ -225,13 +194,9 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 
       if (n == 2)
 	{
-#if HAVE_NATIVE_mpn_mul_2
-	  rp[3] = mpn_mul_2 (rp, up, 2, up);
-#else
 	  rp[0] = 0;
 	  rp[1] = 0;
 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
-#endif
 	  return;
 	}
 
@@ -303,12 +268,12 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
   }
   if (n > 1)
     {
-      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+      mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
       mp_ptr tp = tarr;
       mp_limb_t cy;
 
       /* must fit 2*n limbs in tarr */
-      ASSERT (n <= SQR_TOOM2_THRESHOLD);
+      ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
 
       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
       tp[n - 1] = cy;
@@ -318,8 +283,18 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
 	  tp[n + i - 2] = cy;
 	}
+      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
 
-      MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+      {
+	mp_limb_t cy;
+#if HAVE_NATIVE_mpn_addlsh1_n
+	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+	rp[2 * n - 1] += cy;
+      }
     }
 }
 #endif
diff --git a/gmp/mpn/generic/sqrmod_bnm1.c b/gmp/mpn/generic/sqrmod_bnm1.c
deleted file mode 100644
index fd0868b90b..0000000000
--- a/gmp/mpn/generic/sqrmod_bnm1.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/* sqrmod_bnm1.c -- squaring mod B^n-1.
-
-   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
-   Marco Bodrato.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Input is {ap,rn}; output is {rp,rn}, computation is
-   mod B^rn - 1, and values are semi-normalised; zero is represented
-   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
-   tp==rp is allowed. */
-static void
-mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_sqr (tp, ap, rn);
-  cy = mpn_add_n (rp, tp, tp + rn, rn);
-  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
-   * be no overflow when adding in the carry. */
-  MPN_INCR_U (rp, rn, cy);
-}
-
-
-/* Input is {ap,rn+1}; output is {rp,rn+1}, in
-   semi-normalised representation, computation is mod B^rn + 1. Needs
-   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
-   Output is normalised. */
-static void
-mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_sqr (tp, ap, rn + 1);
-  ASSERT (tp[2*rn+1] == 0);
-  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
-  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
-  rp[rn] = 0;
-  MPN_INCR_U (rp, rn+1, cy );
-}
-
-
-/* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
- *
- * The result is expected to be ZERO if and only if the operand
- * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
- * B^rn-1.
- * It should not be a problem if sqrmod_bnm1 is used to
- * compute the full square with an <= 2*rn, because this condition
- * implies (B^an-1)^2 < (B^rn-1) .
- *
- * Requires rn/4 < an <= rn
- * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
- *
- * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
- */
-void
-mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
-{
-  ASSERT (0 < an);
-  ASSERT (an <= rn);
-
-  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
-    {
-      if (UNLIKELY (an < rn))
-	{
-	  if (UNLIKELY (2*an <= rn))
-	    {
-	      mpn_sqr (rp, ap, an);
-	    }
-	  else
-	    {
-	      mp_limb_t cy;
-	      mpn_sqr (tp, ap, an);
-	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
-	      MPN_INCR_U (rp, rn, cy);
-	    }
-	}
-      else
-	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
-    }
-  else
-    {
-      mp_size_t n;
-      mp_limb_t cy;
-      mp_limb_t hi;
-
-      n = rn >> 1;
-
-      ASSERT (2*an > n);
-
-      /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
-	 and crt together as
-
-	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
-      */
-
-#define a0 ap
-#define a1 (ap + n)
-
-#define xp  tp	/* 2n + 2 */
-      /* am1  maybe in {xp, n} */
-#define sp1 (tp + 2*n + 2)
-      /* ap1  maybe in {sp1, n + 1} */
-
-      {
-	mp_srcptr am1;
-	mp_size_t anm;
-	mp_ptr so;
-
-	if (LIKELY (an > n))
-	  {
-	    so = xp + n;
-	    am1 = xp;
-	    cy = mpn_add (xp, a0, n, a1, an - n);
-	    MPN_INCR_U (xp, n, cy);
-	    anm = n;
-	  }
-	else
-	  {
-	    so = xp;
-	    am1 = a0;
-	    anm = an;
-	  }
-
-	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
-      }
-
-      {
-	int       k;
-	mp_srcptr ap1;
-	mp_size_t anp;
-
-	if (LIKELY (an > n)) {
-	  ap1 = sp1;
-	  cy = mpn_sub (sp1, a0, n, a1, an - n);
-	  sp1[n] = 0;
-	  MPN_INCR_U (sp1, n + 1, cy);
-	  anp = n + ap1[n];
-	} else {
-	  ap1 = a0;
-	  anp = an;
-	}
-
-	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
-	  k=0;
-	else
-	  {
-	    int mask;
-	    k = mpn_fft_best_k (n, 1);
-	    mask = (1<<k) -1;
-	    while (n & mask) {k--; mask >>=1;};
-	  }
-	if (k >= FFT_FIRST_K)
-	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
-	else if (UNLIKELY (ap1 == a0))
-	  {
-	    ASSERT (anp <= n);
-	    ASSERT (2*anp > n);
-	    mpn_sqr (xp, a0, an);
-	    anp = 2*an - n;
-	    cy = mpn_sub (xp, xp, n, xp + n, anp);
-	    xp[n] = 0;
-	    MPN_INCR_U (xp, n+1, cy);
-	  }
-	else
-	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
-      }
-
-      /* Here the CRT recomposition begins.
-
-	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
-	 Division by 2 is a bitwise rotation.
-
-	 Assumes xp normalised mod (B^n+1).
-
-	 The residue class [0] is represented by [B^n-1]; except when
-	 both input are ZERO.
-      */
-
-#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
-#if HAVE_NATIVE_mpn_rsh1add_nc
-      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
-      hi = cy << (GMP_NUMB_BITS - 1);
-      cy = 0;
-      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
-	 overflows, i.e. a further increment will not overflow again. */
-#else /* ! _nc */
-      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
-	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
-#endif
-#if GMP_NAIL_BITS == 0
-      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
-#else
-      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
-      rp[n-1] ^= hi;
-#endif
-#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
-#if HAVE_NATIVE_mpn_add_nc
-      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
-#else /* ! _nc */
-      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
-#endif
-      cy += (rp[0]&1);
-      mpn_rshift(rp, rp, n, 1);
-      ASSERT (cy <= 2);
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* We can have cy != 0 only if hi = 0... */
-      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
-      rp[n-1] |= hi;
-      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
-#endif
-      ASSERT (cy <= 1);
-      /* Next increment can not overflow, read the previous comments about cy. */
-      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
-      MPN_INCR_U(rp, n, cy);
-
-      /* Compute the highest half:
-	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
-       */
-      if (UNLIKELY (2*an < rn))
-	{
-	  /* Note that in this case, the only way the result can equal
-	     zero mod B^{rn} - 1 is if the input is zero, and
-	     then the output of both the recursive calls and this CRT
-	     reconstruction is zero, not B^{rn} - 1. */
-	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
-
-	  /* FIXME: This subtraction of the high parts is not really
-	     necessary, we do it to get the carry out, and for sanity
-	     checking. */
-	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
-				   xp + 2*an - n, rn - 2*an, cy);
-	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
-	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
-	  ASSERT (cy == (xp + 2*an - n)[0]);
-	}
-      else
-	{
-	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
-	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
-	     DECR will affect _at most_ the lowest n limbs. */
-	  MPN_DECR_U (rp, 2*n, cy);
-	}
-#undef a0
-#undef a1
-#undef xp
-#undef sp1
-    }
-}
-
-mp_size_t
-mpn_sqrmod_bnm1_next_size (mp_size_t n)
-{
-  mp_size_t nh;
-
-  if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
-    return n;
-  if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (2-1)) & (-2);
-  if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (4-1)) & (-4);
-
-  nh = (n + 1) >> 1;
-
-  if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
-    return (n + (8-1)) & (-8);
-
-  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
-}
diff --git a/gmp/mpn/generic/sqrtrem.c b/gmp/mpn/generic/sqrtrem.c
index 7d0f120001..ac878c5083 100644
--- a/gmp/mpn/generic/sqrtrem.c
+++ b/gmp/mpn/generic/sqrtrem.c
@@ -8,34 +8,23 @@
    INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
    DISAPPEAR IN A FUTURE GMP RELEASE.
 
-Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012 Free Software Foundation,
+Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
 Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* See "Karatsuba Square Root", reference in gmp.texi.  */
@@ -48,64 +37,64 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 #include "longlong.h"
 
-static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
+static const unsigned short invsqrttab[384] =
 {
-  0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
-  0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
-  0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
-  0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
-  0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
-  0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
-  0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
-  0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
-  0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
-  0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
-  0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
-  0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
-  0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
-  0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
-  0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
-  0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
-  0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
-  0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
-  0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
-  0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
-  0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
-  0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
-  0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
-  0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
-  0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
-  0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
-  0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
-  0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
-  0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
-  0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
-  0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
-  0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
-  0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
-  0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
-  0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
-  0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
-  0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
-  0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
-  0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
-  0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
-  0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
-  0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
-  0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
-  0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
-  0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
-  0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
-  0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
-  0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00  /* sqrt(1/1f8)..sqrt(1/1ff) */
+  0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
+  0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
+  0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
+  0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
+  0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
+  0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
+  0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
+  0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
+  0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
+  0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
+  0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
+  0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
+  0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
+  0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
+  0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
+  0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
+  0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
+  0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
+  0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
+  0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
+  0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
+  0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
+  0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
+  0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
+  0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
+  0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
+  0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
+  0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
+  0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
+  0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
+  0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
+  0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
+  0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
+  0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
+  0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
+  0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
+  0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
+  0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
+  0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
+  0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
+  0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+  0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+  0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+  0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
+  0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
+  0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
+  0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
+  0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100  /* sqrt(1/1f8)..sqrt(1/1ff) */
 };
 
 /* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */
 
 #if GMP_NUMB_BITS > 32
-#define MAGIC CNST_LIMB(0x10000000000)	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
+#define MAGIC 0x10000000000	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
 #else
-#define MAGIC CNST_LIMB(0x100000)		/* 0xfee6f < MAGIC < 0x29cbc8 */
+#define MAGIC 0x100000		/* 0xfee6f < MAGIC < 0x29cbc8 */
 #endif
 
 static mp_limb_t
@@ -126,16 +115,16 @@ mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
      iteration convert from 1/sqrt(a) to sqrt(a).  */
 
   abits = a0 >> (GMP_LIMB_BITS - 1 - 8);	/* extract bits for table lookup */
-  x0 = 0x100 | invsqrttab[abits - 0x80];	/* initial 1/sqrt(a) */
+  x0 = invsqrttab[abits - 0x80];		/* initial 1/sqrt(a) */
 
   /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
 
 #if GMP_NUMB_BITS > 32
   a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
-  t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000  - a1 * x0 * x0) >> 16;
+  t = (mp_limb_signed_t) (0x2000000000000l - 0x30000  - a1 * x0 * x0) >> 16;
   x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
 
-  /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
+  /* x0 is now an 16 bits approximation of 1/sqrt(a0) */
 
   t2 = x0 * (a0 >> (32-8));
   t = t2 >> 25;
@@ -250,18 +239,14 @@ mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
       q >>= 1;
       if (c != 0)
 	c = mpn_add_n (np + l, np + l, sp + l, h);
-      mpn_sqr (np + n, sp, l);
+      mpn_sqr_n (np + n, sp, l);
       b = q + mpn_sub_n (np, np, np + n, 2 * l);
       c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
       q = mpn_add_1 (sp + l, sp + l, h, q);
 
       if (c < 0)
 	{
-#if HAVE_NATIVE_mpn_addlsh1_n
-	  c += mpn_addlsh1_n (np, np, sp, n) + 2 * q;
-#else
 	  c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
-#endif
 	  c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
 	  q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
 	}
diff --git a/gmp/mpn/generic/sub.c b/gmp/mpn/generic/sub.c
index 3fbcbbe98b..ada3e91b83 100644
--- a/gmp/mpn/generic/sub.c
+++ b/gmp/mpn/generic/sub.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_sub 1
 
diff --git a/gmp/mpn/generic/sub_1.c b/gmp/mpn/generic/sub_1.c
index db2e6f948f..4ed2eabccb 100644
--- a/gmp/mpn/generic/sub_1.c
+++ b/gmp/mpn/generic/sub_1.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_sub_1 1
 
diff --git a/gmp/mpn/generic/sub_err1_n.c b/gmp/mpn/generic/sub_err1_n.c
deleted file mode 100644
index 340313a323..0000000000
--- a/gmp/mpn/generic/sub_err1_n.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* mpn_sub_err1_n -- sub_n with one error term
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
-  return value is borrow out.
-
-  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
-  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-		mp_ptr ep, mp_srcptr yp,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
-
-  yp += n - 1;
-  el = eh = 0;
-
-  do
-    {
-      yl = *yp--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary sub_n */
-      SUBC_LIMB (cy1, sl, ul, vl);
-      SUBC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh:el) */
-      zl = (-cy) & yl;
-      el += zl;
-      eh += el < zl;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
-  el &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el;
-  ep[1] = eh;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/sub_err2_n.c b/gmp/mpn/generic/sub_err2_n.c
deleted file mode 100644
index 63ea2451b4..0000000000
--- a/gmp/mpn/generic/sub_err2_n.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/* mpn_sub_err2_n -- sub_n with two error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
-  return value is borrow out.
-
-  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-  stores two-limb results at {ep,2} and {ep+2,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary sub_n */
-      SUBC_LIMB (cy1, sl, ul, vl);
-      SUBC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/sub_err3_n.c b/gmp/mpn/generic/sub_err3_n.c
deleted file mode 100644
index a80e05d0d9..0000000000
--- a/gmp/mpn/generic/sub_err3_n.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* mpn_sub_err3_n -- sub_n with three error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
-  return value is borrow out.
-
-  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
-  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  yp3 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-  el3 = eh3 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      yl3 = *yp3--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary sub_n */
-      SUBC_LIMB (cy1, sl, ul, vl);
-      SUBC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-
-      /* update (eh3:el3) */
-      zl3 = (-cy) & yl3;
-      el3 += zl3;
-      eh3 += el3 < zl3;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
-  el3 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-  ep[4] = el3;
-  ep[5] = eh3;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/sub_n.c b/gmp/mpn/generic/sub_n.c
index 29de2d2d89..d33668fa86 100644
--- a/gmp/mpn/generic/sub_n.c
+++ b/gmp/mpn/generic/sub_n.c
@@ -1,32 +1,21 @@
 /* mpn_sub_n -- Subtract equal length limb vectors.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,8 +29,8 @@ mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
@@ -70,8 +59,8 @@ mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, rl, cy;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
diff --git a/gmp/mpn/generic/subcnd_n.c b/gmp/mpn/generic/subcnd_n.c
new file mode 100644
index 0000000000..0dcc45641d
--- /dev/null
+++ b/gmp/mpn/generic/subcnd_n.c
@@ -0,0 +1,85 @@
+/* mpn_subcnd_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, rl, cy, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+      rl = ul - vl - cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
diff --git a/gmp/mpn/generic/submul_1.c b/gmp/mpn/generic/submul_1.c
index fbc3501389..3e8e74302d 100644
--- a/gmp/mpn/generic/submul_1.c
+++ b/gmp/mpn/generic/submul_1.c
@@ -3,33 +3,23 @@
    vector pointed to by RP.  Return the most significant limb of the
    product, adjusted for carry-out from the subtraction.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/tdiv_qr.c b/gmp/mpn/generic/tdiv_qr.c
index be213b0467..8ac4d38813 100644
--- a/gmp/mpn/generic/tdiv_qr.c
+++ b/gmp/mpn/generic/tdiv_qr.c
@@ -1,43 +1,33 @@
 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
    qxn is non-zero, generate that many fraction limbs and append them after the
-   other quotient limbs, and update the remainder accordingly.  The input
+   other quotient limbs, and update the remainder accordningly.  The input
    operands are unaffected.
 
    Preconditions:
    1. The most significant limb of of the divisor must be non-zero.
-   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
+   2. No argument overlap is permitted.  (??? relax this ???)
+   3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
 
    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
    complexity of multiplication.
 
-Copyright 1997, 2000-2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 1997, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,8 +38,13 @@ void
 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
 {
+  /* FIXME:
+     1. qxn
+     2. pass allocated storage in additional parameter?
+  */
   ASSERT_ALWAYS (qxn == 0);
 
+  ASSERT (qxn >= 0);
   ASSERT (nn >= 0);
   ASSERT (dn >= 0);
   ASSERT (dn == 0 || dp[dn - 1] != 0);
@@ -63,7 +58,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 
     case 1:
       {
-	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+	rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
 	return;
       }
 
@@ -82,7 +77,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	    d2p = dtmp;
 	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
 	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
-	    n2p = TMP_ALLOC_LIMBS (nn + 1);
+	    n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 	    cy = mpn_lshift (n2p, np, nn, cnt);
 	    n2p[nn] = cy;
 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
@@ -95,7 +90,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	else
 	  {
 	    d2p = (mp_ptr) dp;
-	    n2p = TMP_ALLOC_LIMBS (nn);
+	    n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
 	    MPN_COPY (n2p, np, nn);
 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
 	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
@@ -109,13 +104,12 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
     default:
       {
 	int adjust;
-	gmp_pi1_t dinv;
 	TMP_DECL;
 	TMP_MARK;
 	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
 	if (nn + adjust >= 2 * dn)
 	  {
-	    mp_ptr n2p, d2p;
+	    mp_ptr n2p, d2p, q2p;
 	    mp_limb_t cy;
 	    int cnt;
 
@@ -124,9 +118,9 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	      {
 		count_leading_zeros (cnt, dp[dn - 1]);
 		cnt -= GMP_NAIL_BITS;
-		d2p = TMP_ALLOC_LIMBS (dn);
+		d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 		mpn_lshift (d2p, dp, dn, cnt);
-		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 		cy = mpn_lshift (n2p, np, nn, cnt);
 		n2p[nn] = cy;
 		nn += adjust;
@@ -135,28 +129,51 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	      {
 		cnt = 0;
 		d2p = (mp_ptr) dp;
-		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 		MPN_COPY (n2p, np, nn);
 		n2p[nn] = 0;
 		nn += adjust;
 	      }
 
-	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
-	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
-	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
-	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
-		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
-		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
-		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
-	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
+	    if (dn < DIV_DC_THRESHOLD)
+	      mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
 	    else
 	      {
-		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
-		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
-		n2p = rp;
+		/* Divide 2*dn / dn limbs as long as the limbs in np last.  */
+		q2p = qp + nn - dn;
+		n2p += nn - dn;
+		do
+		  {
+		    q2p -= dn;  n2p -= dn;
+		    mpn_dc_divrem_n (q2p, n2p, d2p, dn);
+		    nn -= dn;
+		  }
+		while (nn >= 2 * dn);
+
+		if (nn != dn)
+		  {
+		    mp_limb_t ql;
+		    n2p -= nn - dn;
+
+		    /* We have now dn < nn - dn < 2dn.  Make a recursive call,
+		       since falling out to the code below isn't pretty.
+		       Unfortunately, mpn_tdiv_qr returns nn-dn+1 quotient
+		       limbs, which would overwrite one already generated
+		       quotient limbs.  Preserve it with an ugly hack.  */
+		    /* FIXME: This suggests that we should have an
+		       mpn_tdiv_qr_internal that instead returns the most
+		       significant quotient limb and move the meat of this
+		       function there.  */
+		    /* FIXME: Perhaps call mpn_sb_divrem_mn here for certain
+		       operand ranges, to decrease overhead for small
+		       operands?  */
+		    ql = qp[nn - dn]; /* preserve quotient limb... */
+		    mpn_tdiv_qr (qp, n2p, 0L, n2p, nn, d2p, dn);
+		    qp[nn - dn] = ql; /* ...restore it again */
+		  }
 	      }
 
+
 	    if (cnt != 0)
 	      mpn_rshift (rp, n2p, dn, cnt);
 	    else
@@ -229,11 +246,11 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 		count_leading_zeros (cnt, dp[dn - 1]);
 		cnt -= GMP_NAIL_BITS;
 
-		d2p = TMP_ALLOC_LIMBS (qn);
+		d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
 		mpn_lshift (d2p, dp + in, qn, cnt);
 		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
 
-		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
 		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
 		if (adjust)
 		  {
@@ -250,7 +267,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 		cnt = 0;
 		d2p = (mp_ptr) dp + in;
 
-		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
 		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
 		if (adjust)
 		  {
@@ -263,30 +280,25 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	    if (qn == 1)
 	      {
 		mp_limb_t q0, r0;
-		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
-		n2p[0] = r0 >> GMP_NAIL_BITS;
+		mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
+		/* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
+		   temps here.  This doesn't hurt code quality on any machines
+		   so we do it unconditionally.  */
+		gcc272bug_n1 = n2p[1];
+		gcc272bug_n0 = n2p[0];
+		gcc272bug_d0 = d2p[0];
+		udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS,
+			    gcc272bug_d0 << GMP_NAIL_BITS);
+		r0 >>= GMP_NAIL_BITS;
+		n2p[0] = r0;
 		qp[0] = q0;
 	      }
 	    else if (qn == 2)
-	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
+	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
+	    else if (qn < DIV_DC_THRESHOLD)
+	      mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn);
 	    else
-	      {
-		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
-		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
-		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
-		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
-		else
-		  {
-		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
-		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-		    mp_ptr r2p = rp;
-		    if (np == r2p)	/* If N and R share space, put ... */
-		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
-		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
-		    MPN_COPY (n2p, r2p, qn);
-		  }
-	      }
+	      mpn_dc_divrem_n (qp, n2p, d2p, qn);
 
 	    rn = qn;
 	    /* Multiply the first ignored divisor limb by the most significant
@@ -304,7 +316,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 		dl = dp[in - 2];
 
 #if GMP_NAIL_BITS == 0
-	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
+	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB));
 #else
 	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
 	      if (cnt != 0)
@@ -354,7 +366,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	      }
 	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
 
-	    tp = TMP_ALLOC_LIMBS (dn);
+	    tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 
 	    if (in < qn)
 	      {
diff --git a/gmp/mpn/generic/toom22_mul.c b/gmp/mpn/generic/toom22_mul.c
index 36ac29b72d..6407bbeb96 100644
--- a/gmp/mpn/generic/toom22_mul.c
+++ b/gmp/mpn/generic/toom22_mul.c
@@ -7,33 +7,22 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -52,7 +41,7 @@ see https://www.gnu.org/licenses/.  */
   vinf=      a1 *     b1   # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_mul_toom22   1
 #else
 #define MAYBE_mul_toom22						\
@@ -62,36 +51,18 @@ see https://www.gnu.org/licenses/.  */
 #define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
     if (! MAYBE_mul_toom22						\
-	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+	|| BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
       mpn_mul_basecase (p, a, n, b, n);					\
     else								\
       mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
-/* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
-   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
-   relative unbalance will become a larger and larger relative unbalance with
-   each recursion (the difference s-t will be invariant over recursive calls).
-   Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
-   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
-#define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
-  do {									\
-    if (! MAYBE_mul_toom22						\
-	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
-      mpn_mul_basecase (p, a, an, b, bn);				\
-    else if (4 * an < 5 * bn)						\
-      mpn_toom22_mul (p, a, an, b, bn, ws);				\
-    else								\
-      mpn_toom32_mul (p, a, an, b, bn, ws);				\
-  } while (0)
-
 void
 mpn_toom22_mul (mp_ptr pp,
 		mp_srcptr ap, mp_size_t an,
 		mp_srcptr bp, mp_size_t bn,
 		mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s, t;
   int vm1_neg;
   mp_limb_t cy, cy2;
@@ -179,8 +150,8 @@ mpn_toom22_mul (mp_ptr pp,
   /* vm1, 2n limbs */
   TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
 
-  if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
-  else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
+  /* vinf, s+t limbs */
+  mpn_mul (vinf, a1, s, b1, t);
 
   /* v0, 2n limbs */
   TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
diff --git a/gmp/mpn/generic/toom2_sqr.c b/gmp/mpn/generic/toom2_sqr.c
index 2f2fdaee6f..445cff8f5d 100644
--- a/gmp/mpn/generic/toom2_sqr.c
+++ b/gmp/mpn/generic/toom2_sqr.c
@@ -6,33 +6,22 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -43,23 +32,25 @@ see https://www.gnu.org/licenses/.  */
   <-s--><--n-->
    ____ ______
   |_a1_|___a0_|
+   |b1_|___b0_|
+   <-t-><--n-->
 
-  v0  =  a0     ^2  #   A(0)^2
-  vm1 = (a0- a1)^2  #  A(-1)^2
-  vinf=      a1 ^2  # A(inf)^2
+  v0  =  a0     * b0       #   A(0)*B(0)
+  vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
+  vinf=      a1 *     b1   # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_sqr_toom2   1
 #else
 #define MAYBE_sqr_toom2							\
   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
 #endif
 
-#define TOOM2_SQR_REC(p, a, n, ws)					\
+#define TOOM2_SQR_N_REC(p, a, n, ws)					\
   do {									\
     if (! MAYBE_sqr_toom2						\
-	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+	|| BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
       mpn_sqr_basecase (p, a, n);					\
     else								\
       mpn_toom2_sqr (p, a, n, ws);					\
@@ -70,7 +61,6 @@ mpn_toom2_sqr (mp_ptr pp,
 	       mp_srcptr ap, mp_size_t an,
 	       mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s;
   mp_limb_t cy, cy2;
   mp_ptr asm1;
@@ -113,16 +103,15 @@ mpn_toom2_sqr (mp_ptr pp,
 #define v0	pp				/* 2n */
 #define vinf	(pp + 2 * n)			/* s+s */
 #define vm1	scratch				/* 2n */
-#define scratch_out	scratch + 2 * n
 
   /* vm1, 2n limbs */
-  TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
+  TOOM2_SQR_N_REC (vm1, asm1, n, scratch);
 
   /* vinf, s+s limbs */
-  TOOM2_SQR_REC (vinf, a1, s, scratch_out);
+  TOOM2_SQR_N_REC (vinf, a1, s, scratch);
 
   /* v0, 2n limbs */
-  TOOM2_SQR_REC (v0, ap, n, scratch_out);
+  TOOM2_SQR_N_REC (v0, ap, n, scratch);
 
   /* H(v0) + L(vinf) */
   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
diff --git a/gmp/mpn/generic/toom32_mul.c b/gmp/mpn/generic/toom32_mul.c
index 0b05669cc4..7bdd688a53 100644
--- a/gmp/mpn/generic/toom32_mul.c
+++ b/gmp/mpn/generic/toom32_mul.c
@@ -2,7 +2,6 @@
    times as large as bn.  Or more accurately, bn < an < 3bn.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Improvements by Marco Bodrato and Niels Möller.
 
    The idea of applying toom to unbalanced multiplication is due to Marco
    Bodrato and Alberto Zanoni.
@@ -11,34 +10,32 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+/*
+  Things to work on:
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
 
+  2. Apply optimizations also to mul_toom42.c.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -57,9 +54,20 @@ see https://www.gnu.org/licenses/.  */
   vinf=          a2 *     b1  # A(inf)*B(inf)
 */
 
-#define TOOM32_MUL_N_REC(p, a, b, n, ws)				\
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
-    mpn_mul_n (p, a, b, n);						\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else								\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
 void
@@ -70,9 +78,15 @@ mpn_toom32_mul (mp_ptr pp,
 {
   mp_size_t n, s, t;
   int vm1_neg;
+#if HAVE_NATIVE_mpn_add_nc
   mp_limb_t cy;
-  mp_limb_signed_t hi;
-  mp_limb_t ap1_hi, bp1_hi;
+#else
+  mp_limb_t cy, cy2;
+#endif
+  mp_ptr a0_a2;
+  mp_ptr as1, asm1;
+  mp_ptr bs1, bsm1;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -80,9 +94,6 @@ mpn_toom32_mul (mp_ptr pp,
 #define b0  bp
 #define b1  (bp + n)
 
-  /* Required, to ensure that s + t >= n. */
-  ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
-
   n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
 
   s = an - 2 * n;
@@ -90,234 +101,191 @@ mpn_toom32_mul (mp_ptr pp,
 
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
-  ASSERT (s + t >= n);
 
-  /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
-#define ap1 (pp)		/* n, most significant limb in ap1_hi */
-#define bp1 (pp + n)		/* n, most significant bit in bp1_hi */
-#define am1 (pp + 2*n)		/* n, most significant bit in hi */
-#define bm1 (pp + 3*n)		/* n */
-#define v1 (scratch)		/* 2n + 1 */
-#define vm1 (pp)		/* 2n + 1 */
-#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
 
-  /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n);
 
-  /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
+  a0_a2 = pp;
 
-  /* Compute ap1 = a0 + a1 + a3, am1 = a0 - a1 + a3 */
-  ap1_hi = mpn_add (ap1, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+  /* Compute as1 and asm1.  */
+  a0_a2[n] = mpn_add (a0_a2, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (a0_a2[n] == 0 && mpn_cmp (a0_a2, a1, n) < 0)
     {
-      ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
-      hi = 0;
+      cy = mpn_addsub_n (as1, asm1, a1, a0_a2, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
       vm1_neg = 1;
     }
   else
     {
-      cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
-      hi = ap1_hi - (cy & 1);
-      ap1_hi += (cy >> 1);
+      cy = mpn_addsub_n (as1, asm1, a0_a2, a1, n);
+      as1[n] = a0_a2[n] + (cy >> 1);
+      asm1[n] = a0_a2[n] - (cy & 1);
       vm1_neg = 0;
     }
 #else
-  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+  as1[n] = a0_a2[n] + mpn_add_n (as1, a0_a2, a1, n);
+  if (a0_a2[n] == 0 && mpn_cmp (a0_a2, a1, n) < 0)
     {
-      ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
-      hi = 0;
+      mpn_sub_n (asm1, a1, a0_a2, n);
+      asm1[n] = 0;
       vm1_neg = 1;
     }
   else
     {
-      hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
+      cy = mpn_sub_n (asm1, a0_a2, a1, n);
+      asm1[n] = a0_a2[n] - cy;
       vm1_neg = 0;
     }
-  ap1_hi += mpn_add_n (ap1, ap1, a1, n);
 #endif
 
-  /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
+  /* Compute bs1 and bsm1.  */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
 	}
-      bp1_hi = cy >> 1;
+      bs1[n] = cy >> 1;
 #else
-      bp1_hi = mpn_add_n (bp1, b0, b1, n);
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
 
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, n));
+	  mpn_sub_n (bsm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  ASSERT_NOCARRY (mpn_sub_n (bm1, b0, b1, n));
+	  mpn_sub_n (bsm1, b0, b1, n);
 	}
 #endif
     }
   else
     {
-      /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
-      bp1_hi = mpn_add (bp1, b0, n, b1, t);
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
 
       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
 	{
-	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, t));
-	  MPN_ZERO (bm1 + t, n - t);
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  ASSERT_NOCARRY (mpn_sub (bm1, b0, n, b1, t));
+	  mpn_sub (bsm1, b0, n, b1, t);
 	}
     }
 
-  TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
-  if (ap1_hi == 1)
+  ASSERT (as1[n] <= 2);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 1);
+/*ASSERT (bsm1[n] == 0); */
+
+#define v0    pp				/* 2n */
+#define v1    (scratch)				/* 2n+1 */
+#define vinf  (pp + 3 * n)			/* s+t */
+#define vm1   (scratch + 2 * n + 1)		/* 2n+1 */
+#define scratch_out	scratch + 4 * n + 2
+
+  /* vm1, 2n+1 limbs */
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  vm1[2 * n] = cy;
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a2, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a2, s);
+
+  /* v1, 2n+1 limbs */
+  TOOM22_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
     {
-      cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n);
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
     }
-  else if (ap1_hi == 2)
+  else if (as1[n] == 2)
     {
 #if HAVE_NATIVE_mpn_addlsh1_n
-      cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
 #else
-      cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
 #endif
     }
   else
     cy = 0;
-  if (bp1_hi != 0)
-    cy += mpn_add_n (v1 + n, v1 + n, ap1, n);
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   v1[2 * n] = cy;
 
-  TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out);
-  if (hi)
-    hi = mpn_add_n (vm1+n, vm1+n, bm1, n);
+  mpn_mul_n (v0, ap, bp, n);                    /* v0, 2n limbs */
 
-  vm1[2*n] = hi;
+  /* Interpolate */
 
-  /* v1 <-- (v1 + vm1) / 2 = x0 + x2 */
   if (vm1_neg)
     {
-#if HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (v1, v1, vm1, 2*n+1);
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (vm1, v1, vm1, 2 * n + 1);
 #else
-      mpn_sub_n (v1, v1, vm1, 2*n+1);
-      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+      mpn_add_n (vm1, v1, vm1, 2 * n + 1);
+      mpn_rshift (vm1, vm1, 2 * n + 1, 1);
 #endif
     }
   else
     {
-#if HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (v1, v1, vm1, 2*n+1);
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (vm1, v1, vm1, 2 * n + 1);
 #else
-      mpn_add_n (v1, v1, vm1, 2*n+1);
-      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+      mpn_sub_n (vm1, v1, vm1, 2 * n + 1);
+      mpn_rshift (vm1, vm1, 2 * n + 1, 1);
 #endif
     }
 
-  /* We get x1 + x3 = (x0 + x2) - (x0 - x1 + x2 - x3), and hence
-
-     y = x1 + x3 + (x0 + x2) * B
-       = (x0 + x2) * B + (x0 + x2) - vm1.
-
-     y is 3*n + 1 limbs, y = y0 + y1 B + y2 B^2. We store them as
-     follows: y0 at scratch, y1 at pp + 2*n, and y2 at scratch + n
-     (already in place, except for carry propagation).
+  mpn_sub_n (v1, v1, vm1, 2 * n + 1);
+  v1[2 * n] -= mpn_sub_n (v1, v1, v0, 2 * n);
 
-     We thus add
+  /*
+    pp[] prior to operations:
+     |_H vinf|_L vinf|_______|_______|_______|
 
-   B^3  B^2   B    1
-    |    |    |    |
-   +-----+----+
- + |  x0 + x2 |
-   +----+-----+----+
- +      |  x0 + x2 |
-	+----------+
- -      |  vm1     |
- --+----++----+----+-
-   | y2  | y1 | y0 |
-   +-----+----+----+
-
-  Since we store y0 at the same location as the low half of x0 + x2, we
-  need to do the middle sum first. */
-
-  hi = vm1[2*n];
-  cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
-  MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
-
-  /* FIXME: Can we get rid of this second vm1_neg conditional by
-     swapping the location of +1 and -1 values? */
-  if (vm1_neg)
-    {
-      cy = mpn_add_n (v1, v1, vm1, n);
-      hi += mpn_add_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
-      MPN_INCR_U (v1 + n, n+1, hi);
-    }
-  else
-    {
-      cy = mpn_sub_n (v1, v1, vm1, n);
-      hi += mpn_sub_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
-      MPN_DECR_U (v1 + n, n+1, hi);
-    }
-
-  TOOM32_MUL_N_REC (pp, a0, b0, n, scratch_out);
-  /* vinf, s+t limbs.  Use mpn_mul for now, to handle unbalanced operands */
-  if (s > t)  mpn_mul (pp+3*n, a2, s, b1, t);
-  else        mpn_mul (pp+3*n, b1, t, a2, s);
-
-  /* Remaining interpolation.
-
-     y * B + x0 + x3 B^3 - x0 B^2 - x3 B
-     = (x1 + x3) B + (x0 + x2) B^2 + x0 + x3 B^3 - x0 B^2 - x3 B
-     = y0 B + y1 B^2 + y3 B^3 + Lx0 + H x0 B
-       + L x3 B^3 + H x3 B^4 - Lx0 B^2 - H x0 B^3 - L x3 B - H x3 B^2
-     = L x0 + (y0 + H x0 - L x3) B + (y1 - L x0 - H x3) B^2
-       + (y2 - (H x0 - L x3)) B^3 + H x3 B^4
-
-	  B^4       B^3       B^2        B         1
- |         |         |         |         |         |
-   +-------+                   +---------+---------+
-   |  Hx3  |                   | Hx0-Lx3 |    Lx0  |
-   +------+----------+---------+---------+---------+
-	  |    y2    |  y1     |   y0    |
-	  ++---------+---------+---------+
-	  -| Hx0-Lx3 | - Lx0   |
-	   +---------+---------+
-		      | - Hx3  |
-		      +--------+
-
-    We must take into account the carry from Hx0 - Lx3.
+    summation scheme for remaining operations:
+     |_______|_______|_______|_______|_______|
+     |_Hvinf_|_Lvinf_|       |_H v0__|_L v0__|
+		     | H vm1 | L vm1 |
+		     |-H vinf|-L vinf|
+	     | H v1  | L v1  |
   */
 
-  cy = mpn_sub_n (pp + n, pp + n, pp+3*n, n);
-  hi = scratch[2*n] + cy;
-
-  cy = mpn_sub_nc (pp + 2*n, pp + 2*n, pp, n, cy);
-  hi -= mpn_sub_nc (pp + 3*n, scratch + n, pp + n, n, cy);
-
-  hi += mpn_add (pp + n, pp + n, 3*n, scratch, n);
-
-  /* FIXME: Is support for s + t == n needed? */
-  if (LIKELY (s + t > n))
-    {
-      hi -= mpn_sub (pp + 2*n, pp + 2*n, 2*n, pp + 4*n, s+t-n);
+  mpn_sub (vm1, vm1, 2 * n + 1, vinf, s + t);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = mpn_add_n (pp + n, pp + n, vm1, n);
+  cy = mpn_add_nc (pp + 2 * n, v1, vm1 + n, n, cy);
+  cy = mpn_add_nc (pp + 3 * n, pp + 3 * n, v1 + n, n, cy);
+  mpn_incr_u (pp + 3 * n, vm1[2 * n]);
+  if (LIKELY (n != s + t))  /* FIXME: Limit operand range to avoid condition */
+    mpn_incr_u (pp + 4 * n, cy + v1[2 * n]);
+#else
+  cy2 = mpn_add_n (pp + n, pp + n, vm1, n);
+  cy = mpn_add_n (pp + 2 * n, v1, vm1 + n, n);
+  mpn_incr_u (pp + 2 * n, cy2);
+  mpn_incr_u (pp + 3 * n, cy + vm1[2 * n]);
+  cy = mpn_add_n (pp + 3 * n, pp + 3 * n, v1 + n,  n);
+  if (LIKELY (n != s + t))  /* FIXME: Limit operand range to avoid condition */
+    mpn_incr_u (pp + 4 * n, cy + v1[2 * n]);
+#endif
 
-      if (hi < 0)
-	MPN_DECR_U (pp + 4*n, s+t-n, -hi);
-      else
-	MPN_INCR_U (pp + 4*n, s+t-n, hi);
-    }
-  else
-    ASSERT (hi == 0);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom33_mul.c b/gmp/mpn/generic/toom33_mul.c
index 655355c39a..5fa2813c31 100644
--- a/gmp/mpn/generic/toom33_mul.c
+++ b/gmp/mpn/generic/toom33_mul.c
@@ -1,52 +1,48 @@
-/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
+/* mpn_toom33_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
    size.  Or more accurately, bn <= an < (3/2)bn.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Additional improvements by Marco Bodrato.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
 /* Evaluate in: -1, 0, +1, +2, +inf
 
-  <-s--><--n--><--n-->
-   ____ ______ ______
-  |_a2_|___a1_|___a0_|
-   |b2_|___b1_|___b0_|
-   <-t-><--n--><--n-->
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	       |_b1_|___b0_|
+	       <-t--><--n-->
 
   v0  =  a0         * b0          #   A(0)*B(0)
   v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
@@ -55,33 +51,26 @@ see https://www.gnu.org/licenses/.  */
   vinf=          a2 *         b2  # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_mul_basecase 1
 #define MAYBE_mul_toom33   1
 #else
 #define MAYBE_mul_basecase						\
-  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
+  (MUL_TOOM33_THRESHOLD < 3 * MUL_KARATSUBA_THRESHOLD)
 #define MAYBE_mul_toom33						\
   (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
 #endif
 
-/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
-   multiplication at the infinity point. We may have
-   MAYBE_mul_basecase == 0, and still get s just below
-   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
-   s == 1 and mpn_toom22_mul will crash.
-*/
-
 #define TOOM33_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
     if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
       mpn_mul_basecase (p, a, n, b, n);					\
     else if (! MAYBE_mul_toom33						\
 	     || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      mpn_kara_mul_n (p, a, b, n, ws);					\
     else								\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      mpn_toom3_mul_n (p, a, b, n, ws);					\
   } while (0)
 
 void
@@ -90,13 +79,13 @@ mpn_toom33_mul (mp_ptr pp,
 		mp_srcptr bp, mp_size_t bn,
 		mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s, t;
   int vm1_neg;
   mp_limb_t cy, vinf0;
   mp_ptr gp;
   mp_ptr as1, asm1, as2;
   mp_ptr bs1, bsm1, bs2;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -115,34 +104,35 @@ mpn_toom33_mul (mp_ptr pp,
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
 
-  as1  = scratch + 4 * n + 4;
-  asm1 = scratch + 2 * n + 2;
-  as2 = pp + n + 1;
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
 
-  bs1 = pp;
-  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
-  bs2 = pp + 2 * n + 2;
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n + 1);
+  bs2 = TMP_SALLOC_LIMBS (n + 1);
 
-  gp = scratch;
+  gp = pp;
 
   vm1_neg = 0;
 
   /* Compute as1 and asm1.  */
   cy = mpn_add (gp, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
     {
-      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
-      as1[n] = cy >> 1;
+      cy = mpn_addsub_n (as1, asm1, a1, gp, n);
+      as1[n] = 0;
       asm1[n] = 0;
       vm1_neg = 1;
     }
   else
     {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      cy2 = mpn_addsub_n (as1, asm1, gp, a1, n);
       as1[n] = cy + (cy2 >> 1);
-      asm1[n] = cy - (cy2 & 1);
+      asm1[n] = cy - (cy & 1);
     }
 #else
   as1[n] = cy + mpn_add_n (as1, gp, a1, n);
@@ -160,45 +150,36 @@ mpn_toom33_mul (mp_ptr pp,
 #endif
 
   /* Compute as2.  */
-#if HAVE_NATIVE_mpn_rsblsh1_n
-  cy = mpn_add_n (as2, a2, as1, s);
-  if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
-  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
-#else
 #if HAVE_NATIVE_mpn_addlsh1_n
   cy  = mpn_addlsh1_n (as2, a1, a2, s);
   if (s != n)
     cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
 #else
-  cy = mpn_add_n (as2, a2, as1, s);
+  cy  = mpn_lshift (as2, a2, s, 1);
+  cy += mpn_add_n (as2, a1, as2, s);
   if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
-  cy -= mpn_sub_n (as2, as2, a0, n);
-#endif
+  cy += mpn_add_n (as2, a0, as2, n);
 #endif
   as2[n] = cy;
 
   /* Compute bs1 and bsm1.  */
   cy = mpn_add (gp, b0, n, b2, t);
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
     {
-      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
-      bs1[n] = cy >> 1;
+      cy = mpn_addsub_n (bs1, bsm1, b1, gp, n);
+      bs1[n] = 0;
       bsm1[n] = 0;
       vm1_neg ^= 1;
     }
   else
     {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
+      cy2 = mpn_addsub_n (bs1, bsm1, gp, b1, n);
       bs1[n] = cy + (cy2 >> 1);
-      bsm1[n] = cy - (cy2 & 1);
+      bsm1[n] = cy - (cy & 1);
     }
 #else
   bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
@@ -216,26 +197,18 @@ mpn_toom33_mul (mp_ptr pp,
 #endif
 
   /* Compute bs2.  */
-#if HAVE_NATIVE_mpn_rsblsh1_n
-  cy = mpn_add_n (bs2, b2, bs1, t);
-  if (t != n)
-    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
-  cy += bs1[n];
-  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
-#else
 #if HAVE_NATIVE_mpn_addlsh1_n
   cy  = mpn_addlsh1_n (bs2, b1, b2, t);
   if (t != n)
     cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
   cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
 #else
-  cy  = mpn_add_n (bs2, bs1, b2, t);
+  cy  = mpn_lshift (bs2, b2, t, 1);
+  cy += mpn_add_n (bs2, b1, bs2, t);
   if (t != n)
-    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
-  cy += bs1[n];
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
   cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
-  cy -= mpn_sub_n (bs2, bs2, b0, n);
-#endif
+  cy += mpn_add_n (bs2, b0, bs2, n);
 #endif
   bs2[n] = cy;
 
@@ -251,7 +224,7 @@ mpn_toom33_mul (mp_ptr pp,
 #define vinf  (pp + 4 * n)			/* s+t */
 #define vm1   scratch				/* 2n+1 */
 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
-#define scratch_out  (scratch + 5 * n + 5)
+#define scratch_out  (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
@@ -312,5 +285,7 @@ mpn_toom33_mul (mp_ptr pp,
 
   TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
 
-  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, 1^vm1_neg, vinf0, scratch_out);
+
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom3_sqr.c b/gmp/mpn/generic/toom3_sqr.c
index 6117c67ca6..0c8a4ff74d 100644
--- a/gmp/mpn/generic/toom3_sqr.c
+++ b/gmp/mpn/generic/toom3_sqr.c
@@ -1,77 +1,75 @@
 /* mpn_toom3_sqr -- Square {ap,an}.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Additional improvements by Marco Bodrato.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1 and asm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
 
 /* Evaluate in: -1, 0, +1, +2, +inf
 
-  <-s--><--n--><--n-->
-   ____ ______ ______
-  |_a2_|___a1_|___a0_|
-
-  v0  =  a0         ^2 #   A(0)^2
-  v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
-  vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
-  v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
-  vinf=          a2 ^2 # A(inf)^2
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	       |_b1_|___b0_|
+	       <-t--><--n-->
+
+  v0  =  a0         * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
+  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
+  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
+  vinf=          a2 *         b2  # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_sqr_basecase 1
 #define MAYBE_sqr_toom3   1
 #else
 #define MAYBE_sqr_basecase						\
-  (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
+  (SQR_TOOM3_THRESHOLD < 3 * SQR_KARATSUBA_THRESHOLD)
 #define MAYBE_sqr_toom3							\
   (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
 #endif
 
-#define TOOM3_SQR_REC(p, a, n, ws)					\
+#define TOOM3_SQR_N_REC(p, a, n, ws)					\
   do {									\
     if (MAYBE_sqr_basecase						\
-	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
       mpn_sqr_basecase (p, a, n);					\
     else if (! MAYBE_sqr_toom3						\
 	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
-      mpn_toom2_sqr (p, a, n, ws);					\
+      mpn_kara_sqr_n (p, a, n, ws);					\
     else								\
-      mpn_toom3_sqr (p, a, n, ws);					\
+      mpn_toom3_sqr_n (p, a, n, ws);					\
   } while (0)
 
 void
@@ -79,11 +77,11 @@ mpn_toom3_sqr (mp_ptr pp,
 	       mp_srcptr ap, mp_size_t an,
 	       mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s;
   mp_limb_t cy, vinf0;
   mp_ptr gp;
   mp_ptr as1, asm1, as2;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -95,27 +93,28 @@ mpn_toom3_sqr (mp_ptr pp,
 
   ASSERT (0 < s && s <= n);
 
-  as1 = scratch + 4 * n + 4;
-  asm1 = scratch + 2 * n + 2;
-  as2 = pp + n + 1;
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
 
-  gp = scratch;
+  gp = pp;
 
   /* Compute as1 and asm1.  */
   cy = mpn_add (gp, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
     {
-      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
-      as1[n] = cy >> 1;
+      cy = mpn_addsub_n (as1, asm1, a1, gp, n);
+      as1[n] = 0;
       asm1[n] = 0;
     }
   else
     {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      cy2 = mpn_addsub_n (as1, asm1, gp, a1, n);
       as1[n] = cy + (cy2 >> 1);
-      asm1[n] = cy - (cy2 & 1);
+      asm1[n] = cy - (cy & 1);
     }
 #else
   as1[n] = cy + mpn_add_n (as1, gp, a1, n);
@@ -132,26 +131,18 @@ mpn_toom3_sqr (mp_ptr pp,
 #endif
 
   /* Compute as2.  */
-#if HAVE_NATIVE_mpn_rsblsh1_n
-  cy = mpn_add_n (as2, a2, as1, s);
-  if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
-  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
-#else
 #if HAVE_NATIVE_mpn_addlsh1_n
   cy  = mpn_addlsh1_n (as2, a1, a2, s);
   if (s != n)
     cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
 #else
-  cy = mpn_add_n (as2, a2, as1, s);
+  cy  = mpn_lshift (as2, a2, s, 1);
+  cy += mpn_add_n (as2, a1, as2, s);
   if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
-  cy -= mpn_sub_n (as2, as2, a0, n);
-#endif
+  cy += mpn_add_n (as2, a0, as2, n);
 #endif
   as2[n] = cy;
 
@@ -163,11 +154,11 @@ mpn_toom3_sqr (mp_ptr pp,
 #define vinf  (pp + 4 * n)			/* s+s */
 #define vm1   scratch				/* 2n+1 */
 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
-#define scratch_out  (scratch + 5 * n + 5)
+#define scratch_out  (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
-  TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
+  TOOM3_SQR_N_REC (vm1, asm1, n, scratch_out);
   cy = 0;
   if (asm1[n] != 0)
     cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
@@ -175,18 +166,18 @@ mpn_toom3_sqr (mp_ptr pp,
     cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
   vm1[2 * n] = cy;
 #else
-  TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
+  TOOM3_SQR_N_REC (vm1, asm1, n + 1, scratch_out);
 #endif
 
-  TOOM3_SQR_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+  TOOM3_SQR_N_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
 
-  TOOM3_SQR_REC (vinf, a2, s, scratch_out);	/* vinf, s+s limbs */
+  TOOM3_SQR_N_REC (vinf, a2, s, scratch_out);		/* vinf, s+s limbs */
 
   vinf0 = vinf[0];				/* v1 overlaps with this */
 
 #ifdef SMALLER_RECURSION
   /* v1, 2n+1 limbs */
-  TOOM3_SQR_REC (v1, as1, n, scratch_out);
+  TOOM3_SQR_N_REC (v1, as1, n, scratch_out);
   if (as1[n] == 1)
     {
       cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
@@ -216,11 +207,13 @@ mpn_toom3_sqr (mp_ptr pp,
   v1[2 * n] = cy;
 #else
   cy = vinf[1];
-  TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
+  TOOM3_SQR_N_REC (v1, as1, n + 1, scratch_out);
   vinf[1] = cy;
 #endif
 
-  TOOM3_SQR_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
+  TOOM3_SQR_N_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 1, vinf0, scratch_out);
 
-  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom42_mul.c b/gmp/mpn/generic/toom42_mul.c
index 9b1e7d491b..981b45df83 100644
--- a/gmp/mpn/generic/toom42_mul.c
+++ b/gmp/mpn/generic/toom42_mul.c
@@ -11,34 +11,32 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+/*
+  Things to work on:
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
 
+  2. Apply optimizations also to mul_toom32.c.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -58,9 +56,20 @@ see https://www.gnu.org/licenses/.  */
   vinf=              a3 *     b1  # A(inf)*B(inf)
 */
 
-#define TOOM42_MUL_N_REC(p, a, b, n, ws)				\
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
-    mpn_mul_n (p, a, b, n);						\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else								\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
 void
@@ -72,7 +81,7 @@ mpn_toom42_mul (mp_ptr pp,
   mp_size_t n, s, t;
   int vm1_neg;
   mp_limb_t cy, vinf0;
-  mp_ptr a0_a2;
+  mp_ptr a0_a2, a1_a3;
   mp_ptr as1, asm1, as2;
   mp_ptr bs1, bsm1, bs2;
   TMP_DECL;
@@ -103,9 +112,35 @@ mpn_toom42_mul (mp_ptr pp,
   bs2 = TMP_SALLOC_LIMBS (n + 1);
 
   a0_a2 = pp;
+  a1_a3 = pp + n + 1;
 
   /* Compute as1 and asm1.  */
-  vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
+  a0_a2[n] = mpn_add_n (a0_a2, a0, a2, n);
+  a1_a3[n] = mpn_add (a1_a3, a1, n, a3, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#else
+  mpn_add_n (as1, a0_a2, a1_a3, n + 1);
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#endif
 
   /* Compute as2.  */
 #if HAVE_NATIVE_mpn_addlsh1_n
@@ -129,15 +164,15 @@ mpn_toom42_mul (mp_ptr pp,
   /* Compute bs1 and bsm1.  */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
 	}
       bs1[n] = cy >> 1;
 #else
@@ -185,16 +220,16 @@ mpn_toom42_mul (mp_ptr pp,
 #define vinf  (pp + 4 * n)			/* s+t */
 #define vm1   scratch				/* 2n+1 */
 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
-#define scratch_out	scratch + 4 * n + 4	/* Currently unused. */
+#define scratch_out	scratch + 4 * n + 4
 
   /* vm1, 2n+1 limbs */
-  TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
   cy = 0;
   if (asm1[n] != 0)
     cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
   vm1[2 * n] = cy;
 
-  TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+  TOOM22_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
 
   /* vinf, s+t limbs */
   if (s > t)  mpn_mul (vinf, a3, s, b1, t);
@@ -203,7 +238,7 @@ mpn_toom42_mul (mp_ptr pp,
   vinf0 = vinf[0];				/* v1 overlaps with this */
 
   /* v1, 2n+1 limbs */
-  TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  TOOM22_MUL_N_REC (v1, as1, bs1, n, scratch_out);
   if (as1[n] == 1)
     {
       cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
@@ -226,9 +261,9 @@ mpn_toom42_mul (mp_ptr pp,
     cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   v1[2 * n] = cy;
 
-  TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
+  TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
 
-  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, 1^vm1_neg, vinf0, scratch + 4 * n + 4);
 
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom42_mulmid.c b/gmp/mpn/generic/toom42_mulmid.c
deleted file mode 100644
index 0251a6d7ed..0000000000
--- a/gmp/mpn/generic/toom42_mulmid.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/* mpn_toom42_mulmid -- toom42 middle product
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-
-/*
-  Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
-
-  Neither ap nor bp may overlap rp.
-
-  Must have n >= 4.
-
-  Amount of scratch space required is given by mpn_toom42_mulmid_itch().
-
-  FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
-  requirements should be clarified.
-*/
-void
-mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
-                   mp_ptr scratch)
-{
-  mp_limb_t cy, e[12], zh, zl;
-  mp_size_t m;
-  int neg;
-
-  ASSERT (n >= 4);
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
-
-  ap += n & 1;   /* handle odd row and diagonal later */
-  m = n / 2;
-
-  /* (e0h:e0l) etc are correction terms, in 2's complement */
-#define e0l (e[0])
-#define e0h (e[1])
-#define e1l (e[2])
-#define e1h (e[3])
-#define e2l (e[4])
-#define e2h (e[5])
-#define e3l (e[6])
-#define e3h (e[7])
-#define e4l (e[8])
-#define e4h (e[9])
-#define e5l (e[10])
-#define e5h (e[11])
-
-#define s (scratch + 2)
-#define t (rp + m + 2)
-#define p0 rp
-#define p1 scratch
-#define p2 (rp + m)
-#define next_scratch (scratch + 3*m + 1)
-
-  /*
-            rp                            scratch
-  |---------|-----------|    |---------|---------|----------|
-  0         m         2m+2   0         m         2m        3m+1
-            <----p2---->       <-------------s------------->
-  <----p0----><---t---->     <----p1---->
-  */
-
-  /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
-  cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
-  cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
-		       bp + m, bp, m, cy);
-  mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
-
-  /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
-  if (mpn_cmp (bp + m, bp, m) < 0)
-    {
-      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
-				      ap + m - 1, ap + 2*m - 1, m, 0));
-      neg = 1;
-    }
-  else
-    {
-      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
-				      ap + m - 1, ap + 2*m - 1, m, 0));
-      neg = 0;
-    }
-
-  /* recursive middle products. The picture is:
-
-      b[2m-1]   A   A   A   B   B   B   -   -   -   -   -
-      ...       -   A   A   A   B   B   B   -   -   -   -
-      b[m]      -   -   A   A   A   B   B   B   -   -   -
-      b[m-1]    -   -   -   C   C   C   D   D   D   -   -
-      ...       -   -   -   -   C   C   C   D   D   D   -
-      b[0]      -   -   -   -   -   C   C   C   D   D   D
-               a[0]   ...  a[m]  ...  a[2m]    ...    a[4m-2]
-  */
-
-  if (m < MULMID_TOOM42_THRESHOLD)
-    {
-      /* A + B */
-      mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
-      /* accumulate high limbs of p0 into e1 */
-      ADDC_LIMB (cy, e1l, e1l, p0[m]);
-      e1h += p0[m + 1] + cy;
-      /* (-1)^neg * (B - C)   (overwrites first m limbs of s) */
-      mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
-      /* C + D   (overwrites t) */
-      mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
-    }
-  else
-    {
-      /* as above, but use toom42 instead */
-      mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
-      ADDC_LIMB (cy, e1l, e1l, p0[m]);
-      e1h += p0[m + 1] + cy;
-      mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
-      mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
-    }
-
-  /* apply error terms */
-
-  /* -e0 at rp[0] */
-  SUBC_LIMB (cy, rp[0], rp[0], e0l);
-  SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
-  if (UNLIKELY (cy))
-    {
-      cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
-      SUBC_LIMB (cy, e1l, e1l, cy);
-      e1h -= cy;
-    }
-
-  /* z = e1 - e2 + high(p0) */
-  SUBC_LIMB (cy, zl, e1l, e2l);
-  zh = e1h - e2h - cy;
-
-  /* z at rp[m] */
-  ADDC_LIMB (cy, rp[m], rp[m], zl);
-  zh = (zh + cy) & GMP_NUMB_MASK;
-  ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
-  cy -= (zh >> (GMP_NUMB_BITS - 1));
-  if (UNLIKELY (cy))
-    {
-      if (cy == 1)
-	mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
-      else /* cy == -1 */
-	mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
-    }
-
-  /* e3 at rp[2*m] */
-  ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
-  rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
-
-  /* e4 at p1[0] */
-  ADDC_LIMB (cy, p1[0], p1[0], e4l);
-  ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
-  if (UNLIKELY (cy))
-    mpn_add_1 (p1 + 2, p1 + 2, m, 1);
-
-  /* -e5 at p1[m] */
-  SUBC_LIMB (cy, p1[m], p1[m], e5l);
-  p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
-
-  /* adjustment if p1 ends up negative */
-  cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
-
-  /* add (-1)^neg * (p1 - B^m * p1) to output */
-  if (neg)
-    {
-      mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
-      mpn_add (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
-      mpn_sub_n (rp + m, rp + m, p1, m + 2);            /* B + D */
-    }
-  else
-    {
-      mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
-      mpn_sub (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
-      mpn_add_n (rp + m, rp + m, p1, m + 2);            /* B + D */
-    }
-
-  /* odd row and diagonal */
-  if (n & 1)
-    {
-      /*
-        Products marked E are already done. We need to do products marked O.
-
-        OOOOO----
-        -EEEEO---
-        --EEEEO--
-        ---EEEEO-
-        ----EEEEO
-       */
-
-      /* first row of O's */
-      cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
-      ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
-
-      /* O's on diagonal */
-      /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
-         that can handle the sum below. Currently we're relying on
-         mulmid_basecase being pretty fast for a diagonal sum like this,
-	 which is true at least for the K8 asm version, but surely false
-	 for the generic version. */
-      mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
-      mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
-    }
-}
diff --git a/gmp/mpn/generic/toom43_mul.c b/gmp/mpn/generic/toom43_mul.c
deleted file mode 100644
index 59d45576b8..0000000000
--- a/gmp/mpn/generic/toom43_mul.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
-   times as large as bn.  Or more accurately, bn < an < 2 bn.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   The idea of applying toom to unbalanced multiplication is due to Marco
-   Bodrato and Alberto Zanoni.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluate in: -2, -1, 0, +1, +2, +inf
-
-  <-s-><--n--><--n--><--n-->
-   ___ ______ ______ ______
-  |a3_|___a2_|___a1_|___a0_|
-	|_b2_|___b1_|___b0_|
-	<-t--><--n--><--n-->
-
-  v0  =  a0             * b0          #   A(0)*B(0)
-  v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
-  vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
-  v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
-  vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
-  vinf=              a3 *         b2  # A(inf)*B(inf)
-*/
-
-void
-mpn_toom43_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  enum toom6_flags flags;
-  mp_limb_t cy;
-
-#define a0  ap
-#define a1  (ap + n)
-#define a2  (ap + 2 * n)
-#define a3  (ap + 3 * n)
-#define b0  bp
-#define b1  (bp + n)
-#define b2  (bp + 2 * n)
-
-  n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
-
-  s = an - 3 * n;
-  t = bn - 2 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-
-  /* This is true whenever an >= 25 or bn >= 19, I think. It
-     guarantees that we can fit 5 values of size n+1 in the product
-     area. */
-  ASSERT (s+t >= 5);
-
-#define v0    pp				/* 2n */
-#define vm1   (scratch)				/* 2n+1 */
-#define v1    (pp + 2*n)			/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vinf  (pp + 5 * n)			/* s+t */
-#define bs1    pp				/* n+1 */
-#define bsm1  (scratch + 2 * n + 2)		/* n+1 */
-#define asm1  (scratch + 3 * n + 3)		/* n+1 */
-#define asm2  (scratch + 4 * n + 4)		/* n+1 */
-#define bsm2  (pp + n + 1)			/* n+1 */
-#define bs2   (pp + 2 * n + 2)			/* n+1 */
-#define as2   (pp + 3 * n + 3)			/* n+1 */
-#define as1   (pp + 4 * n + 4)			/* n+1 */
-
-  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
-     limb, because products will overwrite 2n+2 limbs. */
-
-#define a0a2  scratch
-#define b0b2  scratch
-#define a1a3  asm1
-#define b1d   bsm1
-
-  /* Compute as2 and asm2.  */
-  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
-
-  /* Compute bs2 and bsm2.  */
-  b1d[n] = mpn_lshift (b1d, b1, n, 1);			/*       2b1      */
-  cy  = mpn_lshift (b0b2, b2, t, 2);			/*  4b2           */
-  cy += mpn_add_n (b0b2, b0b2, b0, t);			/*  4b2      + b0 */
-  if (t != n)
-    cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
-  b0b2[n] = cy;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (mpn_cmp (b0b2, b1d, n+1) < 0)
-    {
-      mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
-      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-    }
-  else
-    {
-      mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
-    }
-#else
-  mpn_add_n (bs2, b0b2, b1d, n+1);
-  if (mpn_cmp (b0b2, b1d, n+1) < 0)
-    {
-      mpn_sub_n (bsm2, b1d, b0b2, n+1);
-      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-    }
-  else
-    {
-      mpn_sub_n (bsm2, b0b2, b1d, n+1);
-    }
-#endif
-
-  /* Compute as1 and asm1.  */
-  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2));
-
-  /* Compute bs1 and bsm1.  */
-  bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
-    {
-      cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
-      bs1[n] = cy >> 1;
-      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-    }
-  else
-    {
-      cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
-      bs1[n] = bsm1[n] + (cy >> 1);
-      bsm1[n]-= cy & 1;
-    }
-#else
-  bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
-  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
-    {
-      mpn_sub_n (bsm1, b1, bsm1, n);
-      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-    }
-  else
-    {
-      bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
-    }
-#endif
-
-  ASSERT (as1[n] <= 3);
-  ASSERT (bs1[n] <= 2);
-  ASSERT (asm1[n] <= 1);
-  ASSERT (bsm1[n] <= 1);
-  ASSERT (as2[n] <=14);
-  ASSERT (bs2[n] <= 6);
-  ASSERT (asm2[n] <= 9);
-  ASSERT (bsm2[n] <= 4);
-
-  /* vm1, 2n+1 limbs */
-  mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
-
-  /* vm2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
-
-  /* v2, 2n+1 limbs */
-  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
-
-  /* v1, 2n+1 limbs */
-  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
-
-  /* vinf, s+t limbs */   /* W0 */
-  if (s > t)  mpn_mul (vinf, a3, s, b2, t);
-  else        mpn_mul (vinf, b2, t, a3, s);
-
-  /* v0, 2n limbs */
-  mpn_mul_n (v0, ap, bp, n);  /* W5 */
-
-  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
-
-#undef v0
-#undef vm1
-#undef v1
-#undef vm2
-#undef v2
-#undef vinf
-#undef bs1
-#undef bs2
-#undef bsm1
-#undef bsm2
-#undef asm1
-#undef asm2
-/* #undef as1 */
-/* #undef as2 */
-#undef a0a2
-#undef b0b2
-#undef a1a3
-#undef b1d
-#undef a0
-#undef a1
-#undef a2
-#undef a3
-#undef b0
-#undef b1
-#undef b2
-}
diff --git a/gmp/mpn/generic/toom44_mul.c b/gmp/mpn/generic/toom44_mul.c
index 5abf2d14a9..37ff45279d 100644
--- a/gmp/mpn/generic/toom44_mul.c
+++ b/gmp/mpn/generic/toom44_mul.c
@@ -7,39 +7,36 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2013 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
 
   <-s--><--n--><--n--><--n-->
    ____ ______ ______ ______
@@ -51,8 +48,8 @@ see https://www.gnu.org/licenses/.  */
   v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
   vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
   v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
-  vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) #    A(2)*B(2)      ah  <= 9  |bh| <= 9
   vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vmh = (8a0-4a1+2a2- a3)*(8b0-4b1+2b2- b3) # A(-1/2)*B(-1/2)  -4<=ah<=9  -4<=bh<=9
   vinf=               a3 *          b2      #  A(inf)*B(inf)
 */
 
@@ -62,51 +59,28 @@ see https://www.gnu.org/licenses/.  */
 #define MAYBE_mul_toom44   1
 #else
 #define MAYBE_mul_basecase						\
-  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM22_THRESHOLD)
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_KARATSUBA_THRESHOLD)
 #define MAYBE_mul_toom22						\
   (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
 #define MAYBE_mul_toom44						\
-  (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+  (MUL_FFT_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
 #endif
 
 #define TOOM44_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
     if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
       mpn_mul_basecase (p, a, n, b, n);					\
     else if (MAYBE_mul_toom22						\
 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      mpn_kara_mul_n (p, a, b, n, ws);					\
     else if (! MAYBE_mul_toom44						\
 	     || BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      mpn_toom3_mul_n (p, a, b, n, ws);					\
     else								\
       mpn_toom44_mul (p, a, n, b, n, ws);				\
   } while (0)
 
-/* Use of scratch space. In the product area, we store
-
-      ___________________
-     |vinf|____|_v1_|_v0_|
-      s+t  2n-1 2n+1  2n
-
-   The other recursive products, vm1, v2, vm2, vh are stored in the
-   scratch area. When computing them, we use the product area for
-   intermediate values.
-
-   Next, we compute v1. We can store the intermediate factors at v0
-   and at vh + 2n + 2.
-
-   Finally, for v0 and vinf, factors are parts of the input operands,
-   and we need scratch space only for the recursive multiplication.
-
-   In all, if S(an) is the scratch need, the needed space is bounded by
-
-     S(an) <= 4 (2*ceil(an/4) + 1) + 1 + S(ceil(an/4) + 1)
-
-   which should give S(n) = 8 n/3 + c log(n) for some constant c.
-*/
-
 void
 mpn_toom44_mul (mp_ptr pp,
 		mp_srcptr ap, mp_size_t an,
@@ -115,7 +89,11 @@ mpn_toom44_mul (mp_ptr pp,
 {
   mp_size_t n, s, t;
   mp_limb_t cy;
-  enum toom7_flags flags;
+  mp_ptr gp, hp;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
+  enum toom4_flags flags;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -126,111 +104,227 @@ mpn_toom44_mul (mp_ptr pp,
 #define b2  (bp + 2*n)
 #define b3  (bp + 3*n)
 
-  ASSERT (an >= bn);
-
   n = (an + 3) >> 2;
 
   s = an - 3 * n;
   t = bn - 3 * n;
 
+  ASSERT (an >= bn);
+
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
-  ASSERT (s >= t);
-
-  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
-   * following limb, so these must be computed in order, and we need a
-   * one limb gap to tp. */
-#define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
-#define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define tp (scratch + 8*n + 5)
-
-  /* apx and bpx must not overlap with v1 */
-#define apx   pp				/* n+1 */
-#define amx   (pp + n + 1)			/* n+1 */
-#define bmx   (pp + 2*n + 2)			/* n+1 */
-#define bpx   (pp + 4*n + 2)			/* n+1 */
 
-  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
-     gives roughly 32 n/3 + log term. */
-
-  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
-  flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
-
-  /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3.  */
-  flags = (enum toom7_flags) (flags ^ toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp));
+  TMP_MARK;
+
+  as1  = TMP_ALLOC_LIMBS (10 * n + 10);
+  asm1 = as1  + n + 1;
+  as2  = asm1 + n + 1;
+  ash  = as2  + n + 1;
+  asmh = ash  + n + 1;
+  bs1  = asmh + n + 1;
+  bsm1 = bs1  + n + 1;
+  bs2  = bsm1 + n + 1;
+  bsh  = bs2  + n + 1;
+  bsmh = bsh  + n + 1;
+
+  gp = pp;
+  hp = pp + n + 1;
+
+  flags = 0;
+
+  /* Compute as1 and asm1.  */
+  gp[n]  = mpn_add_n (gp, a0, a2, n);
+  hp[n]  = mpn_add (hp, a1, n, a3, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
+    }
+#else
+  mpn_add_n (as1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_sub_n (asm1, gp, hp, n + 1);
+    }
+#endif
 
-  TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp);	/* v2,  2n+1 limbs */
-  TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp);	/* vm2,  2n+1 limbs */
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
 
-  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+  /* Compute ash and asmh.  */
+  cy  = mpn_lshift (gp, a0, n, 3);			/*  8a0             */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (apx, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
-  if (s < n)
+  gp[n] = cy + mpn_addlsh1_n (gp, gp, a2, n);		/*  8a0 + 2a2       */
+#else
+  cy += mpn_lshift (hp, a2, n, 1);			/*        2a2       */
+  gp[n] = cy + mpn_add_n (gp, gp, hp, n);		/*  8a0 + 2a2       */
+#endif
+  cy = mpn_lshift (hp, a1, n, 2);			/*  4a1             */
+  hp[n] = cy + mpn_add (hp, hp, n, a3, s);		/*  4a1 +  a3       */
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
-      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
-      MPN_INCR_U (apx + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
     }
   else
-    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+    {
+      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
+    }
 #else
-  cy = mpn_lshift (apx, a0, n, 1);
-  cy += mpn_add_n (apx, apx, a1, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  cy += mpn_add_n (apx, apx, a2, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+  mpn_add_n (ash, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
+    }
+  else
+    {
+      mpn_sub_n (asmh, gp, hp, n + 1);
+    }
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  gp[n]  = mpn_add_n (gp, b0, b2, n);
+  hp[n]  = mpn_add (hp, b1, n, b3, t);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (bs1, bsm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_addsub_n (bs1, bsm1, gp, hp, n + 1);
+    }
+#else
+  mpn_add_n (bs1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (bsm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_sub_n (bsm1, gp, hp, n + 1);
+    }
 #endif
 
-  /* Compute bpx = 8 b0 + 4 b1 + 2 b2 + b3 = (((2*b0 + b1) * 2 + b2) * 2 + b3 */
+  /* Compute bs2.  */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (bpx, b1, b0, n);
-  cy = 2*cy + mpn_addlsh1_n (bpx, b2, bpx, n);
-  if (t < n)
+  cy  = mpn_addlsh1_n (bs2, b2, b3, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b2 + t, n - t, cy);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b1, bs2, n);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
+#else
+  cy  = mpn_lshift (bs2, b3, t, 1);
+  cy += mpn_add_n (bs2, b2, bs2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b2 + t, n - t, cy);
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy += mpn_add_n (bs2, b1, bs2, n);
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy += mpn_add_n (bs2, b0, bs2, n);
+#endif
+  bs2[n] = cy;
+
+  /* Compute bsh and bsmh.  */
+  cy  = mpn_lshift (gp, b0, n, 3);			/*  8b0             */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  gp[n] = cy + mpn_addlsh1_n (gp, gp, b2, n);		/*  8b0 + 2b2       */
+#else
+  cy += mpn_lshift (hp, b2, n, 1);			/*        2b2       */
+  gp[n] = cy + mpn_add_n (gp, gp, hp, n);		/*  8b0 + 2b2       */
+#endif
+  cy = mpn_lshift (hp, b1, n, 2);			/*  4b1             */
+  hp[n] = cy + mpn_add (hp, hp, n, b3, t);		/*  4b1 +  b3       */
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (bpx, b3, bpx, t);
-      bpx[n] = 2*cy + mpn_lshift (bpx + t, bpx + t, n - t, 1);
-      MPN_INCR_U (bpx + t, n+1-t, cy2);
+      mpn_addsub_n (bsh, bsmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
     }
   else
-    bpx[n] = 2*cy + mpn_addlsh1_n (bpx, b3, bpx, n);
+    {
+      mpn_addsub_n (bsh, bsmh, gp, hp, n + 1);
+    }
 #else
-  cy = mpn_lshift (bpx, b0, n, 1);
-  cy += mpn_add_n (bpx, bpx, b1, n);
-  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
-  cy += mpn_add_n (bpx, bpx, b2, n);
-  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
-  bpx[n] = cy + mpn_add (bpx, bpx, n, b3, t);
+  mpn_add_n (bsh, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (bsmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
+    }
+  else
+    {
+      mpn_sub_n (bsmh, gp, hp, n + 1);
+    }
 #endif
 
-  ASSERT (apx[n] < 15);
-  ASSERT (bpx[n] < 15);
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 3);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 14);
+  ASSERT (bs2[n] <= 14);
+  ASSERT (ash[n] <= 14);
+  ASSERT (bsh[n] <= 14);
+  ASSERT (asmh[n] <= 9);
+  ASSERT (bsmh[n] <= 9);
+
+#define v0    pp				/* 2n */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
+#define scratch_out  (scratch + 8 * n + 8)
+
+  /* vm1, 2n+1 limbs */
+  TOOM44_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);	/* vm1, 2n+1 limbs */
 
-  TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp);	/* vh,  2n+1 limbs */
+  TOOM44_MUL_N_REC (v2 , as2 , bs2 , n + 1, scratch_out);	/* v2,  2n+1 limbs */
 
-  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
-  flags = (enum toom7_flags) (flags | toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp));
+  if (s > t)  mpn_mul (vinf, a3, s, b3, t);
+  else   TOOM44_MUL_N_REC (vinf, a3, b3, s, scratch_out);	/* vinf, s+t limbs */
 
-  /* Compute bpx = b0 + b1 + b2 + b3 bnd bmx = b0 - b1 + b2 - b3.  */
-  flags = (enum toom7_flags) (flags ^ toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp));
+  TOOM44_MUL_N_REC (v1 , as1 , bs1 , n + 1, scratch_out);	/* v1,  2n+1 limbs */
 
-  TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp);	/* vm1,  2n+1 limbs */
-  /* Clobbers amx, bmx. */
-  TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp);	/* v1,  2n+1 limbs */
+  TOOM44_MUL_N_REC (vh , ash , bsh , n + 1, scratch_out);
 
-  TOOM44_MUL_N_REC (v0, a0, b0, n, tp);
-  if (s > t)
-    mpn_mul (vinf, a3, s, b3, t);
-  else
-    TOOM44_MUL_N_REC (vinf, a3, b3, s, tp);	/* vinf, s+t limbs */
+  TOOM44_MUL_N_REC (vmh, asmh, bsmh, n + 1, scratch_out);
+
+  TOOM44_MUL_N_REC (v0 , ap  , bp  , n    , scratch_out);	/* v0,  2n limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vmh, vm1, v1, v2, s + t, scratch_out);
 
-  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, tp);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom4_sqr.c b/gmp/mpn/generic/toom4_sqr.c
index b4154ba83f..911b5548d7 100644
--- a/gmp/mpn/generic/toom4_sqr.c
+++ b/gmp/mpn/generic/toom4_sqr.c
@@ -6,34 +6,31 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2013 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -43,14 +40,16 @@ see https://www.gnu.org/licenses/.  */
   <-s--><--n--><--n--><--n-->
    ____ ______ ______ ______
   |_a3_|___a2_|___a1_|___a0_|
-
-  v0  =   a0             ^2 #    A(0)^2
-  v1  = ( a0+ a1+ a2+ a3)^2 #    A(1)^2   ah  <= 3
-  vm1 = ( a0- a1+ a2- a3)^2 #   A(-1)^2  |ah| <= 1
-  v2  = ( a0+2a1+4a2+8a3)^2 #    A(2)^2   ah  <= 14
-  vh  = (8a0+4a1+2a2+ a3)^2 #  A(1/2)^2   ah  <= 14
-  vmh = (8a0-4a1+2a2- a3)^2 # A(-1/2)^2  -4<=ah<=9
-  vinf=               a3 ^2 #  A(inf)^2
+   |b3_|___b2_|___b1_|___b0_|
+   <-t-><--n--><--n--><--n-->
+
+  v0  =   a0             *  b0              #    A(0)*B(0)
+  v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
+  vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
+  v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
+  vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vmh = (8a0-4a1+2a2- a3)*(8b0-4b1+2b2- b3) # A(-1/2)*B(-1/2)  -4<=ah<=9  -4<=bh<=9
+  vinf=               a3 *          b2      #  A(inf)*B(inf)
 */
 
 #if TUNE_PROGRAM_BUILD
@@ -59,24 +58,24 @@ see https://www.gnu.org/licenses/.  */
 #define MAYBE_sqr_toom4   1
 #else
 #define MAYBE_sqr_basecase						\
-  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_KARATSUBA_THRESHOLD)
 #define MAYBE_sqr_toom2							\
   (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
 #define MAYBE_sqr_toom4							\
-  (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+  (SQR_FFT_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
 #endif
 
-#define TOOM4_SQR_REC(p, a, n, ws)					\
+#define TOOM4_SQR_N_REC(p, a, n, ws)					\
   do {									\
     if (MAYBE_sqr_basecase						\
-	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
       mpn_sqr_basecase (p, a, n);					\
     else if (MAYBE_sqr_toom2						\
 	     && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
-      mpn_toom2_sqr (p, a, n, ws);					\
+      mpn_kara_sqr_n (p, a, n, ws);					\
     else if (! MAYBE_sqr_toom4						\
 	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))		\
-      mpn_toom3_sqr (p, a, n, ws);					\
+      mpn_toom3_sqr_n (p, a, n, ws);					\
     else								\
       mpn_toom4_sqr (p, a, n, ws);					\
   } while (0)
@@ -88,6 +87,9 @@ mpn_toom4_sqr (mp_ptr pp,
 {
   mp_size_t n, s;
   mp_limb_t cy;
+  mp_ptr gp, hp;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -100,65 +102,122 @@ mpn_toom4_sqr (mp_ptr pp,
 
   ASSERT (0 < s && s <= n);
 
-  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
-   * following limb, so these must be computed in order, and we need a
-   * one limb gap to tp. */
-#define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
-#define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define tp (scratch + 8*n + 5)
+  TMP_MARK;
 
-  /* No overlap with v1 */
-#define apx   pp				/* n+1 */
-#define amx   (pp + 4*n + 2)			/* n+1 */
+  as1  = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2  = TMP_SALLOC_LIMBS (n + 1);
+  ash  = TMP_SALLOC_LIMBS (n + 1);
+  asmh = TMP_SALLOC_LIMBS (n + 1);
 
-  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
-     gives roughly 32 n/3 + log term. */
+  gp = pp;
+  hp = pp + n + 1;
 
-  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
-  mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+  /* Compute as1 and asm1.  */
+  gp[n]  = mpn_add_n (gp, a0, a2, n);
+  hp[n]  = mpn_add (hp, a1, n, a3, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
+    }
+#else
+  mpn_add_n (as1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, hp, gp, n + 1);
+    }
+  else
+    {
+      mpn_sub_n (asm1, gp, hp, n + 1);
+    }
+#endif
 
-  TOOM4_SQR_REC (v2, apx, n + 1, tp);	/* v2,  2n+1 limbs */
-  TOOM4_SQR_REC (vm2, amx, n + 1, tp);	/* vm2,  2n+1 limbs */
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
 
-  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+  /* Compute ash and asmh.  */
+  cy  = mpn_lshift (gp, a0, n, 3);			/*  8a0             */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (apx, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
-  if (s < n)
+  gp[n] = cy + mpn_addlsh1_n (gp, gp, a2, n);		/*  8a0 + 2a2       */
+#else
+  cy += mpn_lshift (hp, a2, n, 1);			/*        2a2       */
+  gp[n] = cy + mpn_add_n (gp, gp, hp, n);		/*  8a0 + 2a2       */
+#endif
+  cy = mpn_lshift (hp, a1, n, 2);			/*  4a1             */
+  hp[n] = cy + mpn_add (hp, hp, n, a3, s);		/*  4a1 +  a3       */
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
-      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
-      MPN_INCR_U (apx + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
     }
   else
-    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+    {
+      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
+    }
 #else
-  cy = mpn_lshift (apx, a0, n, 1);
-  cy += mpn_add_n (apx, apx, a1, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  cy += mpn_add_n (apx, apx, a2, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+  mpn_add_n (ash, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, hp, gp, n + 1);
+    }
+  else
+    {
+      mpn_sub_n (asmh, gp, hp, n + 1);
+    }
 #endif
 
-  ASSERT (apx[n] < 15);
+  ASSERT (as1[n] <= 3);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (as2[n] <= 14);
+  ASSERT (ash[n] <= 14);
+  ASSERT (asmh[n] <= 9);
+
+#define v0    pp				/* 2n */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
+#define scratch_out  (scratch + 8 * n + 8)
+
+  /* vm1, 2n+1 limbs */
+  TOOM4_SQR_N_REC (vm1, asm1, n + 1, scratch_out);	/* vm1, 2n+1 limbs */
+
+  TOOM4_SQR_N_REC (v2 , as2 , n + 1, scratch_out);	/* v2,  2n+1 limbs */
+
+  TOOM4_SQR_N_REC (vinf, a3 , s,     scratch_out);	/* vinf, 2s limbs */
+
+  TOOM4_SQR_N_REC (v1 , as1 , n + 1, scratch_out);	/* v1,  2n+1 limbs */
 
-  TOOM4_SQR_REC (vh, apx, n + 1, tp);	/* vh,  2n+1 limbs */
+  TOOM4_SQR_N_REC (vh , ash , n + 1, scratch_out);
 
-  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
-  mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+  TOOM4_SQR_N_REC (vmh, asmh, n + 1, scratch_out);
 
-  TOOM4_SQR_REC (v1, apx, n + 1, tp);	/* v1,  2n+1 limbs */
-  TOOM4_SQR_REC (vm1, amx, n + 1, tp);	/* vm1,  2n+1 limbs */
+  TOOM4_SQR_N_REC (v0 , ap  , n    , scratch_out);	/* v0,  2n limbs */
 
-  TOOM4_SQR_REC (v0, a0, n, tp);
-  TOOM4_SQR_REC (vinf, a3, s, tp);	/* vinf, 2s limbs */
+  mpn_toom_interpolate_7pts (pp, n, 0, vmh, vm1, v1, v2, s + s, scratch_out);
 
-  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom52_mul.c b/gmp/mpn/generic/toom52_mul.c
deleted file mode 100644
index e15b5833aa..0000000000
--- a/gmp/mpn/generic/toom52_mul.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
-   times as large as bn.  Or more accurately, bn < an < 2 bn.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   The idea of applying toom to unbalanced multiplication is due to Marco
-   Bodrato and Alberto Zanoni.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluate in: -2, -1, 0, +1, +2, +inf
-
-  <-s-><--n--><--n--><--n--><--n-->
-   ___ ______ ______ ______ ______
-  |a4_|___a3_|___a2_|___a1_|___a0_|
-			|b1|___b0_|
-			<t-><--n-->
-
-  v0  =  a0                  * b0      #   A(0)*B(0)
-  v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
-  vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
-  v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
-  vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
-  vinf=                   a4 *     b1  # A(inf)*B(inf)
-
-  Some slight optimization in evaluation are taken from the paper:
-  "Towards Optimal Toom-Cook Multiplication for Univariate and
-  Multivariate Polynomials in Characteristic 2 and 0."
-*/
-
-void
-mpn_toom52_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  enum toom6_flags flags;
-
-#define a0  ap
-#define a1  (ap + n)
-#define a2  (ap + 2 * n)
-#define a3  (ap + 3 * n)
-#define a4  (ap + 4 * n)
-#define b0  bp
-#define b1  (bp + n)
-
-  n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
-
-  s = an - 4 * n;
-  t = bn - n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-
-  /* Ensures that 5 values of n+1 limbs each fits in the product area.
-     Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
-     n = 8. */
-  ASSERT (s+t >= 5);
-
-#define v0    pp				/* 2n */
-#define vm1   (scratch)				/* 2n+1 */
-#define v1    (pp + 2 * n)			/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vinf  (pp + 5 * n)			/* s+t */
-#define bs1    pp				/* n+1 */
-#define bsm1  (scratch + 2 * n + 2)		/* n   */
-#define asm1  (scratch + 3 * n + 3)		/* n+1 */
-#define asm2  (scratch + 4 * n + 4)		/* n+1 */
-#define bsm2  (pp + n + 1)			/* n+1 */
-#define bs2   (pp + 2 * n + 2)			/* n+1 */
-#define as2   (pp + 3 * n + 3)			/* n+1 */
-#define as1   (pp + 4 * n + 4)			/* n+1 */
-
-  /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
-     products will overwrite 2n+2 limbs. */
-
-#define a0a2  scratch
-#define a1a3  asm1
-
-  /* Compute as2 and asm2.  */
-  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
-
-  /* Compute bs1 and bsm1.  */
-  if (t == n)
-    {
-#if HAVE_NATIVE_mpn_add_n_sub_n
-      mp_limb_t cy;
-
-      if (mpn_cmp (b0, b1, n) < 0)
-	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
-	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-	}
-      else
-	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
-	}
-      bs1[n] = cy >> 1;
-#else
-      bs1[n] = mpn_add_n (bs1, b0, b1, n);
-      if (mpn_cmp (b0, b1, n) < 0)
-	{
-	  mpn_sub_n (bsm1, b1, b0, n);
-	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-	}
-      else
-	{
-	  mpn_sub_n (bsm1, b0, b1, n);
-	}
-#endif
-    }
-  else
-    {
-      bs1[n] = mpn_add (bs1, b0, n, b1, t);
-      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
-	{
-	  mpn_sub_n (bsm1, b1, b0, t);
-	  MPN_ZERO (bsm1 + t, n - t);
-	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-	}
-      else
-	{
-	  mpn_sub (bsm1, b0, n, b1, t);
-	}
-    }
-
-  /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
-  mpn_add (bs2, bs1, n+1, b1, t);
-  if (flags & toom6_vm1_neg )
-    {
-      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
-      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-    }
-  else
-    {
-      bsm2[n] = 0;
-      if (t == n)
-	{
-	  if (mpn_cmp (bsm1, b1, n) < 0)
-	    {
-	      mpn_sub_n (bsm2, b1, bsm1, n);
-	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-	    }
-	  else
-	    {
-	      mpn_sub_n (bsm2, bsm1, b1, n);
-	    }
-	}
-      else
-	{
-	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
-	    {
-	      mpn_sub_n (bsm2, b1, bsm1, t);
-	      MPN_ZERO (bsm2 + t, n - t);
-	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-	    }
-	  else
-	    {
-	      mpn_sub (bsm2, bsm1, n, b1, t);
-	    }
-	}
-    }
-
-  /* Compute as1 and asm1.  */
-  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2));
-
-  ASSERT (as1[n] <= 4);
-  ASSERT (bs1[n] <= 1);
-  ASSERT (asm1[n] <= 2);
-/*   ASSERT (bsm1[n] <= 1); */
-  ASSERT (as2[n] <=30);
-  ASSERT (bs2[n] <= 2);
-  ASSERT (asm2[n] <= 20);
-  ASSERT (bsm2[n] <= 1);
-
-  /* vm1, 2n+1 limbs */
-  mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
-
-  /* vm2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
-
-  /* v2, 2n+1 limbs */
-  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
-
-  /* v1, 2n+1 limbs */
-  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
-
-  /* vinf, s+t limbs */   /* W0 */
-  if (s > t)  mpn_mul (vinf, a4, s, b1, t);
-  else        mpn_mul (vinf, b1, t, a4, s);
-
-  /* v0, 2n limbs */
-  mpn_mul_n (v0, ap, bp, n);  /* W5 */
-
-  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
-
-#undef v0
-#undef vm1
-#undef v1
-#undef vm2
-#undef v2
-#undef vinf
-#undef bs1
-#undef bs2
-#undef bsm1
-#undef bsm2
-#undef asm1
-#undef asm2
-#undef as1
-#undef as2
-#undef a0a2
-#undef b0b2
-#undef a1a3
-#undef a0
-#undef a1
-#undef a2
-#undef a3
-#undef b0
-#undef b1
-#undef b2
-
-}
diff --git a/gmp/mpn/generic/toom53_mul.c b/gmp/mpn/generic/toom53_mul.c
index 41274d48e0..4483d4dfb7 100644
--- a/gmp/mpn/generic/toom53_mul.c
+++ b/gmp/mpn/generic/toom53_mul.c
@@ -10,39 +10,35 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
 
   <-s-><--n--><--n--><--n--><--n-->
    ___ ______ ______ ______ ______
@@ -54,8 +50,8 @@ see https://www.gnu.org/licenses/.  */
   v1  = (  a0+ a1+ a2+ a3+  a4)*( b0+ b1+ b2) #    A(1)*B(1)      ah  <= 4   bh <= 2
   vm1 = (  a0- a1+ a2- a3+  a4)*( b0- b1+ b2) #   A(-1)*B(-1)    |ah| <= 2   bh <= 1
   v2  = (  a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) #    A(2)*B(2)      ah  <= 30  bh <= 6
-  vm2 = (  a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) #    A(2)*B(2)     -9<=ah<=20 -1<=bh<=4
   vh  = (16a0+8a1+4a2+2a3+  a4)*(4b0+2b1+ b2) #  A(1/2)*B(1/2)    ah  <= 30  bh <= 6
+  vmh = (16a0-8a1+4a2-2a3+  a4)*(4b0-2b1+ b2) # A(-1/2)*B(-1/2)  -9<=ah<=20 -1<=bh<=4
   vinf=                     a4 *          b2  #  A(inf)*B(inf)
 */
 
@@ -66,11 +62,12 @@ mpn_toom53_mul (mp_ptr pp,
 		mp_ptr scratch)
 {
   mp_size_t n, s, t;
+  int vm1_neg, vmh_neg;
   mp_limb_t cy;
-  mp_ptr gp;
-  mp_ptr as1, asm1, as2, asm2, ash;
-  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
-  enum toom7_flags flags;
+  mp_ptr gp, hp;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
+  enum toom4_flags flags;
   TMP_DECL;
 
 #define a0  ap
@@ -95,61 +92,124 @@ mpn_toom53_mul (mp_ptr pp,
   as1  = TMP_SALLOC_LIMBS (n + 1);
   asm1 = TMP_SALLOC_LIMBS (n + 1);
   as2  = TMP_SALLOC_LIMBS (n + 1);
-  asm2 = TMP_SALLOC_LIMBS (n + 1);
   ash  = TMP_SALLOC_LIMBS (n + 1);
+  asmh = TMP_SALLOC_LIMBS (n + 1);
 
   bs1  = TMP_SALLOC_LIMBS (n + 1);
   bsm1 = TMP_SALLOC_LIMBS (n + 1);
   bs2  = TMP_SALLOC_LIMBS (n + 1);
-  bsm2 = TMP_SALLOC_LIMBS (n + 1);
   bsh  = TMP_SALLOC_LIMBS (n + 1);
+  bsmh = TMP_SALLOC_LIMBS (n + 1);
 
   gp = pp;
+  hp = pp + n + 1;
 
   /* Compute as1 and asm1.  */
-  flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
-
-  /* Compute as2 and asm2. */
-  flags = (enum toom7_flags) (flags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp));
+  gp[n]  = mpn_add_n (gp, a0, a2, n);
+  gp[n] += mpn_add   (gp, gp, n, a4, s);
+  hp[n]  = mpn_add_n (hp, a1, a3, n);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
+      vm1_neg = 0;
+    }
+#else
+  mpn_add_n (as1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, hp, gp, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asm1, gp, hp, n + 1);
+      vm1_neg = 0;
+    }
+#endif
 
-  /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
-     = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
+  /* Compute as2.  */
+#if !HAVE_NATIVE_mpn_addlsh_n
+  ash[n] = mpn_lshift (ash, a2, n, 2);			/*        4a2       */
+#endif
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (ash, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
-  if (s < n)
+  cy  = mpn_addlsh1_n (as2, a3, a4, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a4, s, 1);
+  cy += mpn_add_n (as2, a3, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
+  cy = 4 * cy + mpn_lshift (as2, as2, n, 2);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  as2[n] = cy + mpn_add_n (as2, a0, as2, n);
+  mpn_add_n (as2, ash, as2, n + 1);
+#endif
+
+  /* Compute ash and asmh.  */
+#if HAVE_NATIVE_mpn_addlsh_n
+  cy  = mpn_addlsh_n (gp, a2, a0, n, 2);		/* 4a0  +  a2       */
+  cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2);	/* 16a0 + 4a2 +  a4 */ /* FIXME s */
+  gp[n] = cy;
+  cy  = mpn_addlsh_n (hp, a3, a1, n, 2);		/*  4a1 +  a3       */
+  cy = 2 * cy + mpn_lshift (hp, hp, n, 1);		/*  8a1 + 2a3       */
+  hp[n] = cy;
+#else
+  gp[n] = mpn_lshift (gp, a0, n, 4);			/* 16a0             */
+  mpn_add (gp, gp, n + 1, a4, s);			/* 16a0 +        a4 */
+  mpn_add_n (gp, ash, gp, n+1);				/* 16a0 + 4a2 +  a4 */
+  cy  = mpn_lshift (hp, a1, n, 3);			/*  8a1             */
+  cy += mpn_lshift (ash, a3, n, 1);			/*        2a3       */
+  cy += mpn_add_n (hp, ash, hp, n);			/*  8a1 + 2a3       */
+  hp[n] = cy;
+#endif
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (ash, a4, ash, s);
-      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
-      MPN_INCR_U (ash + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
+      vmh_neg = 1;
     }
   else
-    ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+    {
+      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
+      vmh_neg = 0;
+    }
 #else
-  cy = mpn_lshift (ash, a0, n, 1);
-  cy += mpn_add_n (ash, ash, a1, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a2, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a3, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  ash[n] = cy + mpn_add (ash, ash, n, a4, s);
+  mpn_add_n (ash, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, hp, gp, n + 1);
+      vmh_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asmh, gp, hp, n + 1);
+      vmh_neg = 0;
+    }
 #endif
 
   /* Compute bs1 and bsm1.  */
   bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
     {
-      bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
+      bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1;
       bsm1[n] = 0;
-      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+      vm1_neg ^= 1;
     }
   else
     {
-      cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n);
+      cy = mpn_addsub_n (bs1, bsm1, bs1, b1, n);
       bsm1[n] = bs1[n] - (cy & 1);
       bs1[n] += (cy >> 1);
     }
@@ -158,7 +218,7 @@ mpn_toom53_mul (mp_ptr pp,
     {
       mpn_sub_n (bsm1, b1, bs1, n);
       bsm1[n] = 0;
-      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+      vm1_neg ^= 1;
     }
   else
     {
@@ -167,64 +227,46 @@ mpn_toom53_mul (mp_ptr pp,
   bs1[n] += mpn_add_n (bs1, bs1, b1, n);  /* b0+b1+b2 */
 #endif
 
-  /* Compute bs2 and bsm2. */
-#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
-#if HAVE_NATIVE_mpn_addlsh2_n
-  cy = mpn_addlsh2_n (bs2, b0, b2, t);
-#else /* HAVE_NATIVE_mpn_addlsh_n */
-  cy = mpn_addlsh_n (bs2, b0, b2, t, 2);
-#endif
-  if (t < n)
-    cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy);
-  bs2[n] = cy;
+  /* Compute bs2 */
+  hp[n]   = mpn_lshift (hp, b1, n, 1);			/*       2b1       */
+
+#ifdef HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bs2, b1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
+  bs2[n] = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
 #else
-  cy = mpn_lshift (gp, b2, t, 2);
-  bs2[n] = mpn_add (bs2, b0, n, gp, t);
-  MPN_INCR_U (bs2 + t, n+1-t, cy);
+  bs2[t] = mpn_lshift (bs2, b2, t, 2);
+  mpn_add (bs2, hp, n + 1, bs2, t + 1);
+  bs2[n] += mpn_add_n (bs2, bs2, b0, n);
 #endif
 
-  gp[n] = mpn_lshift (gp, b1, n, 1);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (mpn_cmp (bs2, gp, n+1) < 0)
+  /* Compute bsh and bsmh.  */
+#if HAVE_NATIVE_mpn_addlsh_n
+  gp[n] = mpn_addlsh_n (gp, b2, b0, n, 2);		/* 4a0  +       a2 */
+#else
+  cy = mpn_lshift (gp, b0, n, 2);			/* 4b0             */
+  gp[n] = cy + mpn_add (gp, gp, n, b2, t);		/* 4b0 +        b2 */
+#endif
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
-      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+      mpn_addsub_n (bsh, bsmh, hp, gp, n + 1);
+      vmh_neg^= 1;
     }
   else
-    {
-      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1));
-    }
+    mpn_addsub_n (bsh, bsmh, gp, hp, n + 1);
 #else
-  if (mpn_cmp (bs2, gp, n+1) < 0)
+  mpn_add_n (bsh, gp, hp, n + 1);			/* 4b0 + 2b1 +  b2 */
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
-      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+      mpn_sub_n (bsmh, hp, gp, n + 1);
+      vmh_neg ^= 1;
     }
   else
     {
-      ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1));
+      mpn_sub_n (bsmh, gp, hp, n + 1);
     }
-  mpn_add_n (bs2, bs2, gp, n+1);
-#endif
-
-  /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2.  */
-#if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (bsh, b1, b0, n);
-  if (t < n)
-    {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (bsh, b2, bsh, t);
-      bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1);
-      MPN_INCR_U (bsh + t, n+1-t, cy2);
-    }
-  else
-    bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n);
-#else
-  cy = mpn_lshift (bsh, b0, n, 1);
-  cy += mpn_add_n (bsh, bsh, b1, n);
-  cy = 2*cy + mpn_lshift (bsh, bsh, n, 1);
-  bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t);
 #endif
 
   ASSERT (as1[n] <= 4);
@@ -233,26 +275,18 @@ mpn_toom53_mul (mp_ptr pp,
   ASSERT (bsm1[n] <= 1);
   ASSERT (as2[n] <= 30);
   ASSERT (bs2[n] <= 6);
-  ASSERT (asm2[n] <= 20);
-  ASSERT (bsm2[n] <= 4);
   ASSERT (ash[n] <= 30);
   ASSERT (bsh[n] <= 6);
+  ASSERT (asmh[n] <= 20);
+  ASSERT (bsmh[n] <= 4);
 
 #define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
 #define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
-  /* Total scratch need: 10*n+5 */
-
-  /* Must be in allocation order, as they overwrite one limb beyond
-   * 2n+1. */
-  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
-  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
@@ -279,6 +313,12 @@ mpn_toom53_mul (mp_ptr pp,
   mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0));
 #endif /* SMALLER_RECURSION */
 
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a4, s);
+
   /* v1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
   mpn_mul_n (v1, as1, bs1, n);
@@ -318,14 +358,16 @@ mpn_toom53_mul (mp_ptr pp,
   mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0));
 #endif /* SMALLER_RECURSION */
 
-  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);
 
-  /* vinf, s+t limbs */
-  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
-  else        mpn_mul (vinf, b2, t, a4, s);
+  mpn_mul_n (vmh, asmh, bsmh, n + 1);
+
+  mpn_mul_n (v0, ap, bp, n);			/* v0, 2n limbs */
+
+  flags =  vm1_neg ? toom4_w3_neg : 0;
+  flags |= vmh_neg ? toom4_w1_neg : 0;
 
-  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t,
-			     scratch_out);
+  mpn_toom_interpolate_7pts (pp, n, flags, vmh, vm1, v1, v2, s + t, scratch + 8 * n + 8);
 
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom54_mul.c b/gmp/mpn/generic/toom54_mul.c
deleted file mode 100644
index 939bb53ab6..0000000000
--- a/gmp/mpn/generic/toom54_mul.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Implementation of the algorithm for Toom-Cook 4.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Toom-4.5, the splitting 5x4 unbalanced version.
-   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
-
-  <--s-><--n--><--n--><--n--><--n-->
-   ____ ______ ______ ______ ______
-  |_a4_|__a3__|__a2__|__a1__|__a0__|
-	  |b3_|__b2__|__b1__|__b0__|
-	  <-t-><--n--><--n--><--n-->
-
-*/
-#define TOOM_54_MUL_N_REC(p, a, b, n, ws)		\
-  do {	mpn_mul_n (p, a, b, n);				\
-  } while (0)
-
-#define TOOM_54_MUL_REC(p, a, na, b, nb, ws)		\
-  do {	mpn_mul (p, a, na, b, nb);			\
-  } while (0)
-
-void
-mpn_toom54_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  int sign;
-
-  /***************************** decomposition *******************************/
-#define a4  (ap + 4 * n)
-#define b3  (bp + 3 * n)
-
-  ASSERT (an >= bn);
-  n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
-
-  s = an - 4 * n;
-  t = bn - 3 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  /* Required by mpn_toom_interpolate_8pts. */
-  ASSERT ( s + t >= n );
-  ASSERT ( s + t > 4);
-  ASSERT ( n > 2);
-
-#define   r8    pp				/* 2n   */
-#define   r7    scratch				/* 3n+1 */
-#define   r5    (pp + 3*n)			/* 3n+1 */
-#define   v0    (pp + 3*n)			/* n+1 */
-#define   v1    (pp + 4*n+1)			/* n+1 */
-#define   v2    (pp + 5*n+2)			/* n+1 */
-#define   v3    (pp + 6*n+3)			/* n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (pp + 7*n)			/* s+t <= 2*n */
-#define   ws    (scratch + 6 * n + 2)		/* ??? */
-
-  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
-       ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
-  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
-  TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s,    pp)
-       ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
-  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
-  TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
-  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
-       ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
-  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
-  TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
-  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
-
-  /* A(0)*B(0) */
-  TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
-
-  /* Infinity */
-  if (s > t) {
-    TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
-  } else {
-    TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
-  };
-
-  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
-
-#undef a4
-#undef b3
-#undef r1
-#undef r3
-#undef r5
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef r7
-#undef r8
-#undef ws
-}
diff --git a/gmp/mpn/generic/toom62_mul.c b/gmp/mpn/generic/toom62_mul.c
index 3759e3cb3c..944b3feffd 100644
--- a/gmp/mpn/generic/toom62_mul.c
+++ b/gmp/mpn/generic/toom62_mul.c
@@ -10,42 +10,38 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Evaluate in:
-   0, +1, -1, +2, -2, 1/2, +inf
 
-  <-s-><--n--><--n--><--n--><--n--><--n-->
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
    ___ ______ ______ ______ ______ ______
   |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
 			     |_b1_|___b0_|
@@ -55,8 +51,8 @@ see https://www.gnu.org/licenses/.  */
   v1  = (  a0+  a1+ a2+ a3+  a4+  a5)*( b0+ b1) #    A(1)*B(1)      ah  <= 5   bh <= 1
   vm1 = (  a0-  a1+ a2- a3+  a4-  a5)*( b0- b1) #   A(-1)*B(-1)    |ah| <= 2   bh  = 0
   v2  = (  a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) #    A(2)*B(2)      ah  <= 62  bh <= 2
-  vm2 = (  a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) #   A(-2)*B(-2)    -41<=ah<=20 -1<=bh<=0
   vh  = (32a0+16a1+8a2+4a3+ 2a4+  a5)*(2b0+ b1) #  A(1/2)*B(1/2)    ah  <= 62  bh <= 2
+  vmh = (32a0-16a1+8a2-4a3+ 2a4-  a5)*(2b0- b1) # A(-1/2)*B(-1/2)  -20<=ah<=41 0<=bh<=1
   vinf=                           a5 *      b1  #  A(inf)*B(inf)
 */
 
@@ -67,11 +63,12 @@ mpn_toom62_mul (mp_ptr pp,
 		mp_ptr scratch)
 {
   mp_size_t n, s, t;
+  int vm1_neg, vmh_neg, bsm_neg;
   mp_limb_t cy;
-  mp_ptr as1, asm1, as2, asm2, ash;
-  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
-  mp_ptr gp;
-  enum toom7_flags aflags, bflags;
+  mp_ptr a0_a2, a1_a3;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
+  enum toom4_flags flags;
   TMP_DECL;
 
 #define a0  ap
@@ -83,7 +80,7 @@ mpn_toom62_mul (mp_ptr pp,
 #define b0  bp
 #define b1  (bp + n)
 
-  n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+  n = 1 + (an >= 3 * bn ? (an - 1) / (unsigned long) 6 : (bn - 1) >> 1);
 
   s = an - 5 * n;
   t = bn - n;
@@ -96,66 +93,133 @@ mpn_toom62_mul (mp_ptr pp,
   as1 = TMP_SALLOC_LIMBS (n + 1);
   asm1 = TMP_SALLOC_LIMBS (n + 1);
   as2 = TMP_SALLOC_LIMBS (n + 1);
-  asm2 = TMP_SALLOC_LIMBS (n + 1);
   ash = TMP_SALLOC_LIMBS (n + 1);
+  asmh = TMP_SALLOC_LIMBS (n + 1);
 
   bs1 = TMP_SALLOC_LIMBS (n + 1);
   bsm1 = TMP_SALLOC_LIMBS (n);
   bs2 = TMP_SALLOC_LIMBS (n + 1);
-  bsm2 = TMP_SALLOC_LIMBS (n + 1);
   bsh = TMP_SALLOC_LIMBS (n + 1);
+  bsmh = TMP_SALLOC_LIMBS (n + 1);
 
-  gp = pp;
+  a0_a2 = pp;
+  a1_a3 = pp + n + 1;
 
   /* Compute as1 and asm1.  */
-  aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
-
-  /* Compute as2 and asm2. */
-  aflags = (enum toom7_flags) (aflags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp));
-
-  /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
-     = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
+  a0_a2[n]  = mpn_add_n (a0_a2, a0, a2, n);
+  a0_a2[n] += mpn_add_n (a0_a2, a0_a2, a4, n);
+  a1_a3[n]  = mpn_add_n (a1_a3, a1, a3, n);
+  a1_a3[n] += mpn_add (a1_a3, a1_a3, n, a5, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#else
+  mpn_add_n (as1, a0_a2, a1_a3, n + 1);
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#endif
 
+  /* Compute as2.  */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (ash, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
-  if (s < n)
+  cy  = mpn_addlsh1_n (as2, a4, a5, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a4 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a3, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a5, s, 1);
+  cy += mpn_add_n (as2, a4, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a4 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a3, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a2, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
+
+  /* Compute ash and asmh.  */
+#if HAVE_NATIVE_mpn_addlsh_n
+  cy  = mpn_addlsh_n (a0_a2, a2, a0, n, 2);		/* 4a0  +  a2       */
+  cy = 4 * cy + mpn_addlsh_n (a0_a2, a4, a0_a2, n, 2);	/* 16a0 + 4a2 +  a4 */
+  cy = 2 * cy + mpn_lshift (a0_a2, a0_a2, n, 1);	/* 32a0 + 8a2 + 2a4 */
+  a0_a2[n] = cy;
+  cy  = mpn_addlsh_n (a1_a3, a3, a1, n, 2);		/* 4a1              */
+  cy = 4 * cy + mpn_addlsh_n (a1_a3, a5, a1_a3, n, 2);	/* 16a1 + 4a3       */
+  a1_a3[n] = cy;
+#else
+  cy  = mpn_lshift (a0_a2, a0, n, 2);			/* 4a0              */
+  cy += mpn_add_n (a0_a2, a2, a0_a2, n);		/* 4a0  +  a2       */
+  cy = 4 * cy + mpn_lshift (a0_a2, a0_a2, n, 2);	/* 16a0 + 4a2       */
+  cy += mpn_add_n (a0_a2, a4, a0_a2, n);		/* 16a0 + 4a2 +  a4 */
+  cy = 2 * cy + mpn_lshift (a0_a2, a0_a2, n, 1);	/* 32a0 + 8a2 + 2a4 */
+  a0_a2[n] = cy;
+  cy  = mpn_lshift (a1_a3, a1, n, 2);			/* 4a1              */
+  cy += mpn_add_n (a1_a3, a3, a1_a3, n);		/* 4a1  +  a3       */
+  cy = 4 * cy + mpn_lshift (a1_a3, a1_a3, n, 2);	/* 16a1 + 4a3       */
+  cy += mpn_add (a1_a3, a1_a3, n, a5, s);		/* 16a1 + 4a3 + a5  */
+  a1_a3[n] = cy;
+#endif
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (ash, a5, ash, s);
-      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
-      MPN_INCR_U (ash + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, a1_a3, a0_a2, n + 1);
+      vmh_neg = 1;
     }
   else
-    ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
+    {
+      mpn_addsub_n (ash, asmh, a0_a2, a1_a3, n + 1);
+      vmh_neg = 0;
+    }
 #else
-  cy = mpn_lshift (ash, a0, n, 1);
-  cy += mpn_add_n (ash, ash, a1, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a2, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a3, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a4, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  ash[n] = cy + mpn_add (ash, ash, n, a5, s);
+  mpn_add_n (ash, a0_a2, a1_a3, n + 1);
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, a1_a3, a0_a2, n + 1);
+      vmh_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asmh, a0_a2, a1_a3, n + 1);
+      vmh_neg = 0;
+    }
 #endif
 
   /* Compute bs1 and bsm1.  */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
-	  bflags = toom7_w3_neg;
+	  cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
+	  bsm_neg = 1;
 	}
       else
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
-	  bflags = (enum toom7_flags) 0;
+	  cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
+	  bsm_neg = 0;
 	}
       bs1[n] = cy >> 1;
 #else
@@ -163,12 +227,12 @@ mpn_toom62_mul (mp_ptr pp,
       if (mpn_cmp (b0, b1, n) < 0)
 	{
 	  mpn_sub_n (bsm1, b1, b0, n);
-	  bflags = toom7_w3_neg;
+	  bsm_neg = 1;
 	}
       else
 	{
 	  mpn_sub_n (bsm1, b0, b1, n);
-	  bflags = (enum toom7_flags) 0;
+	  bsm_neg = 0;
 	}
 #endif
     }
@@ -179,83 +243,56 @@ mpn_toom62_mul (mp_ptr pp,
 	{
 	  mpn_sub_n (bsm1, b1, b0, t);
 	  MPN_ZERO (bsm1 + t, n - t);
-	  bflags = toom7_w3_neg;
+	  bsm_neg = 1;
 	}
       else
 	{
 	  mpn_sub (bsm1, b0, n, b1, t);
-	  bflags = (enum toom7_flags) 0;
+	  bsm_neg = 0;
 	}
     }
 
-  /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
-     bsm1 - b1 */
+  vm1_neg ^= bsm_neg;
+
+  /* Compute bs2, recycling bs1. bs2=bs1+b1  */
   mpn_add (bs2, bs1, n + 1, b1, t);
-  if (bflags & toom7_w3_neg)
-    {
-      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
-      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
-    }
-  else
+
+  /* Compute bsh and bsmh, recycling bs1 and bsm1. bsh=bs1+b0; bsmh=bsmh+b0  */
+  if (bsm_neg == 1)
     {
-      /* FIXME: Simplify this logic? */
-      if (t < n)
+      bsmh[n] = 0;
+      if (mpn_cmp (bsm1, b0, n) < 0)
 	{
-	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
-	    {
-	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
-	      MPN_ZERO (bsm2 + t, n + 1 - t);
-	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
-	    }
-	  else
-	    {
-	      ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
-	      bsm2[n] = 0;
-	    }
+	  bsm_neg = 0;
+	  mpn_sub_n (bsmh, b0, bsm1, n);
 	}
       else
-	{
-	  if (mpn_cmp (bsm1, b1, n) < 0)
-	    {
-	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
-	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
-	    }
-	  else
-	    {
-	      ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
-	    }
-	  bsm2[n] = 0;
-	}
+	mpn_sub_n (bsmh, bsm1, b0, n);
     }
+  else
+    bsmh[n] = mpn_add_n (bsmh, bsm1, b0, n);
+  mpn_add (bsh, bs1, n + 1, b0, n);
+  vmh_neg ^= bsm_neg;
 
-  /* Compute bsh, recycling bs1. bsh=bs1+b0;  */
-  bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
 
   ASSERT (as1[n] <= 5);
   ASSERT (bs1[n] <= 1);
   ASSERT (asm1[n] <= 2);
+/*ASSERT (bsm1[n] == 0);*/
   ASSERT (as2[n] <= 62);
   ASSERT (bs2[n] <= 2);
-  ASSERT (asm2[n] <= 41);
-  ASSERT (bsm2[n] <= 1);
   ASSERT (ash[n] <= 62);
   ASSERT (bsh[n] <= 2);
+  ASSERT (asmh[n] <= 41);
+  ASSERT (bsmh[n] <= 1);
 
 #define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
 #define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
-  /* Total scratch need: 10*n+5 */
-
-  /* Must be in allocation order, as they overwrite one limb beyond
-   * 2n+1. */
-  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
-  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
   mpn_mul_n (vm1, asm1, bsm1, n);
@@ -274,6 +311,12 @@ mpn_toom62_mul (mp_ptr pp,
     }
   vm1[2 * n] = cy;
 
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a5, s);
+
   /* v1, 2n+1 limbs */
   mpn_mul_n (v1, as1, bs1, n);
   if (as1[n] == 1)
@@ -298,14 +341,16 @@ mpn_toom62_mul (mp_ptr pp,
     cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   v1[2 * n] = cy;
 
-  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);
 
-  /* vinf, s+t limbs */
-  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
-  else        mpn_mul (vinf, b1, t, a5, s);
+  mpn_mul_n (vmh, asmh, bsmh, n + 1);
+
+  mpn_mul_n (v0, ap, bp, n);			/* v0, 2n limbs */
+
+  flags =  vm1_neg ? toom4_w3_neg : 0;
+  flags |= vmh_neg ? toom4_w1_neg : 0;
 
-  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
-			     vm2, vm1, v2, vh, s + t, scratch_out);
+  mpn_toom_interpolate_7pts (pp, n, flags, vmh, vm1, v1, v2, s + t, scratch + 8 * n + 8);
 
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom63_mul.c b/gmp/mpn/generic/toom63_mul.c
deleted file mode 100644
index 57c5d3e3dd..0000000000
--- a/gmp/mpn/generic/toom63_mul.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/* Implementation of the algorithm for Toom-Cook 4.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
-static int
-abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
-  mp_limb_t  x, y;
-  while (--n >= 0)
-    {
-      x = ap[n];
-      y = bp[n];
-      if (x != y)
-	{
-	  n++;
-	  if (x > y)
-	    {
-	      mpn_sub_n (rp, ap, bp, n);
-	      return 0;
-	    }
-	  else
-	    {
-	      mpn_sub_n (rp, bp, ap, n);
-	      return ~0;
-	    }
-	}
-      rp[n] = 0;
-    }
-  return 0;
-}
-
-static int
-abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
-  int result;
-  result = abs_sub_n (rm, rp, rs, n);
-  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
-  return result;
-}
-
-
-/* Toom-4.5, the splitting 6x3 unbalanced version.
-   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
-
-  <--s-><--n--><--n--><--n--><--n--><--n-->
-   ____ ______ ______ ______ ______ ______
-  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
-			|b2_|__b1__|__b0__|
-			<-t-><--n--><--n-->
-
-*/
-#define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
-  do {	mpn_mul_n (p, a, b, n);				\
-  } while (0)
-
-#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
-  do {	mpn_mul (p, a, na, b, nb);			\
-  } while (0)
-
-void
-mpn_toom63_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  mp_limb_t cy;
-  int sign;
-
-  /***************************** decomposition *******************************/
-#define a5  (ap + 5 * n)
-#define b0  (bp + 0 * n)
-#define b1  (bp + 1 * n)
-#define b2  (bp + 2 * n)
-
-  ASSERT (an >= bn);
-  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
-
-  s = an - 5 * n;
-  t = bn - 2 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  /* WARNING! it assumes s+t>=n */
-  ASSERT ( s + t >= n );
-  ASSERT ( s + t > 4);
-  /* WARNING! it assumes n>1 */
-  ASSERT ( n > 2);
-
-#define   r8    pp				/* 2n   */
-#define   r7    scratch				/* 3n+1 */
-#define   r5    (pp + 3*n)			/* 3n+1 */
-#define   v0    (pp + 3*n)			/* n+1 */
-#define   v1    (pp + 4*n+1)			/* n+1 */
-#define   v2    (pp + 5*n+2)			/* n+1 */
-#define   v3    (pp + 6*n+3)			/* n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (pp + 7*n)			/* s+t <= 2*n */
-#define   ws    (scratch + 6 * n + 2)		/* ??? */
-
-  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
-  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
-  /* FIXME: use addlsh */
-  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
-  if ( n == t )
-    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
-  else
-    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
-  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
-  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
-  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
-  /* Compute bs1 and bsm1. Code taken from toom33 */
-  cy = mpn_add (ws, b0, n, b2, t);
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
-    {
-      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
-      v3[n] = cy >> 1;
-      v1[n] = 0;
-      sign = ~sign;
-    }
-  else
-    {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
-      v3[n] = cy + (cy2 >> 1);
-      v1[n] = cy - (cy2 & 1);
-    }
-#else
-  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
-  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
-    {
-      mpn_sub_n (v1, b1, ws, n);
-      v1[n] = 0;
-      sign = ~sign;
-    }
-  else
-    {
-      cy -= mpn_sub_n (v1, ws, b1, n);
-      v1[n] = cy;
-    }
-#endif
-  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
-  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
-  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
-  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
-  /* FIXME: use addlsh or addlsh2 */
-  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
-  if ( n == t )
-    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
-  else
-    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
-  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
-  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
-  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
-  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
-
-  /* A(0)*B(0) */
-  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
-
-  /* Infinity */
-  if (s > t) {
-    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
-  } else {
-    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
-  };
-
-  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
-
-#undef a5
-#undef b0
-#undef b1
-#undef b2
-#undef r1
-#undef r3
-#undef r5
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef r7
-#undef r8
-#undef ws
-}
diff --git a/gmp/mpn/generic/toom6_sqr.c b/gmp/mpn/generic/toom6_sqr.c
deleted file mode 100644
index e5ab7dcd1d..0000000000
--- a/gmp/mpn/generic/toom6_sqr.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if GMP_NUMB_BITS < 21
-#error Not implemented.
-#endif
-
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_sqr_basecase 1
-#define MAYBE_sqr_above_basecase   1
-#define MAYBE_sqr_toom2   1
-#define MAYBE_sqr_above_toom2   1
-#define MAYBE_sqr_toom3   1
-#define MAYBE_sqr_above_toom3   1
-#define MAYBE_sqr_above_toom4   1
-#else
-#ifdef  SQR_TOOM8_THRESHOLD
-#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
-#else
-#define SQR_TOOM6_MAX					\
-  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
-   ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
-   : MP_SIZE_T_MAX )
-#endif
-#define MAYBE_sqr_basecase					\
-  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_above_basecase				\
-  (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_toom2						\
-  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_above_toom2					\
-  (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_toom3						\
-  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_above_toom3					\
-  (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_above_toom4					\
-  (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
-#endif
-
-#define TOOM6_SQR_REC(p, a, n, ws)					\
-  do {									\
-    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
-	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
-      mpn_sqr_basecase (p, a, n);					\
-    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
-      mpn_toom2_sqr (p, a, n, ws);					\
-    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
-      mpn_toom3_sqr (p, a, n, ws);					\
-    else if (! MAYBE_sqr_above_toom4					\
-	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
-      mpn_toom4_sqr (p, a, n, ws);					\
-    else								\
-      mpn_toom6_sqr (p, a, n, ws);					\
-  } while (0)
-
-void
-mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
-{
-  mp_size_t n, s;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT( an >= 18 );
-
-  n = 1 + (an - 1) / (size_t) 6;
-
-  s = an - 5 * n;
-
-  ASSERT (0 < s && s <= n);
-
-#define   r4    (pp + 3 * n)			/* 3n+1 */
-#define   r2    (pp + 7 * n)			/* 3n+1 */
-#define   r0    (pp +11 * n)			/* s+t <= 2*n */
-#define   r5    (scratch)			/* 3n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   v0    (pp + 7 * n)			/* n+1 */
-#define   v2    (pp + 9 * n+2)			/* n+1 */
-#define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
-
-  /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-/*   if (scratch== NULL) */
-/*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm1/2$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
-  TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
-
-  /* $\pm1$ */
-  mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
-  TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
-
-  /* $\pm4$ */
-  mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
-  TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
-
-  /* $\pm1/4$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
-  TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
-
-  /* $\pm2$ */
-  mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
-  TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
-
-#undef v0
-#undef v2
-
-  /* A(0)*B(0) */
-  TOOM6_SQR_REC(pp, ap, n, wse);
-
-  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-
-}
-#undef TOOM6_SQR_REC
-#undef MAYBE_sqr_basecase
-#undef MAYBE_sqr_above_basecase
-#undef MAYBE_sqr_toom2
-#undef MAYBE_sqr_above_toom2
-#undef MAYBE_sqr_toom3
-#undef MAYBE_sqr_above_toom3
-#undef MAYBE_sqr_above_toom4
diff --git a/gmp/mpn/generic/toom6h_mul.c b/gmp/mpn/generic/toom6h_mul.c
deleted file mode 100644
index 420895be8f..0000000000
--- a/gmp/mpn/generic/toom6h_mul.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if GMP_NUMB_BITS < 21
-#error Not implemented.
-#endif
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_mul_basecase 1
-#define MAYBE_mul_toom22   1
-#define MAYBE_mul_toom33   1
-#define MAYBE_mul_toom6h   1
-#else
-#define MAYBE_mul_basecase						\
-  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
-#define MAYBE_mul_toom22						\
-  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
-#define MAYBE_mul_toom33						\
-  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
-#define MAYBE_mul_toom6h						\
-  (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
-#endif
-
-#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
-  do {									\
-    if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
-      mpn_mul_basecase (p, a, n, b, n);					\
-      if (f)								\
-	mpn_mul_basecase (p2, a2, n, b2, n);				\
-    } else if (MAYBE_mul_toom22						\
-	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
-    } else if (MAYBE_mul_toom33						\
-	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
-    } else if (! MAYBE_mul_toom6h					\
-	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
-      mpn_toom44_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
-    } else {								\
-      mpn_toom6h_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
-    }									\
-  } while (0)
-
-#define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
-  do { mpn_mul (p, a, na, b, nb);			\
-  } while (0)
-
-/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
-   With: an >= bn >= 46, an*6 <  bn * 17.
-   It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
-
-   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
-*/
-/* Estimate on needed scratch:
-   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
-   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
- */
-
-void
-mpn_toom6h_mul   (mp_ptr pp,
-		  mp_srcptr ap, mp_size_t an,
-		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  int p, q, half;
-  int sign;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT (an >= bn);
-  /* Can not handle too much unbalancement */
-  ASSERT (bn >= 42);
-  /* Can not handle too much unbalancement */
-  ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
-
-  /* Limit num/den is a rational number between
-     (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
-#define LIMIT_numerator (18)
-#define LIMIT_denominat (17)
-
-  if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
-    {
-      n = 1 + (an - 1) / (size_t) 6;
-      p = q = 5;
-      half = 0;
-
-      s = an - 5 * n;
-      t = bn - 5 * n;
-    }
-  else {
-    if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
-      { p = 7; q = 6; }
-    else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
-      { p = 7; q = 5; }
-    else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
-      { p = 8; q = 5; }
-    else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
-      { p = 8; q = 4; }
-    else
-      { p = 9; q = 4; }
-
-    half = (p ^ q) & 1;
-    n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
-    p--; q--;
-
-    s = an - p * n;
-    t = bn - q * n;
-
-    /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
-    if (half) { /* Recover from badly chosen splitting */
-      if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
-      else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
-    }
-  }
-#undef LIMIT_numerator
-#undef LIMIT_denominat
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  ASSERT (half || s + t > 3);
-  ASSERT (n > 2);
-
-#define   r4    (pp + 3 * n)			/* 3n+1 */
-#define   r2    (pp + 7 * n)			/* 3n+1 */
-#define   r0    (pp +11 * n)			/* s+t <= 2*n */
-#define   r5    (scratch)			/* 3n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   v0    (pp + 7 * n)			/* n+1 */
-#define   v1    (pp + 8 * n+1)			/* n+1 */
-#define   v2    (pp + 9 * n+2)			/* n+1 */
-#define   v3    (scratch + 9 * n + 3)		/* n+1 */
-#define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
-#define   wse   (scratch +10 * n + 4)		/* 2n+1 */
-
-  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
-     need all of them  */
-/*   if (scratch == NULL) */
-/*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
-  ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
-  ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm1/2$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
-  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
-  if (UNLIKELY (q == 3))
-    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
-  else
-    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
-  /* A(-1)*B(-1) */ /* A(1)*B(1) */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
-
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-4)*B(-4) */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
-
-  /* $\pm1/4$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
-	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
-  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
-
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef wse
-
-  /* A(0)*B(0) */
-  TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
-
-  /* Infinity */
-  if (UNLIKELY (half != 0)) {
-    if (s > t) {
-      TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
-    } else {
-      TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
-    };
-  };
-
-  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-#undef wsi
-}
-
-#undef TOOM6H_MUL_N_REC
-#undef TOOM6H_MUL_REC
-#undef MAYBE_mul_basecase
-#undef MAYBE_mul_toom22
-#undef MAYBE_mul_toom33
-#undef MAYBE_mul_toom6h
diff --git a/gmp/mpn/generic/toom8_sqr.c b/gmp/mpn/generic/toom8_sqr.c
deleted file mode 100644
index 0c93678815..0000000000
--- a/gmp/mpn/generic/toom8_sqr.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if GMP_NUMB_BITS < 29
-#error Not implemented.
-#endif
-
-#if GMP_NUMB_BITS < 43
-#define BIT_CORRECTION 1
-#define CORRECTION_BITS GMP_NUMB_BITS
-#else
-#define BIT_CORRECTION 0
-#define CORRECTION_BITS 0
-#endif
-
-#ifndef SQR_TOOM8_THRESHOLD
-#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
-#endif
-
-#ifndef SQR_TOOM6_THRESHOLD
-#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
-#endif
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_sqr_basecase 1
-#define MAYBE_sqr_above_basecase   1
-#define MAYBE_sqr_toom2   1
-#define MAYBE_sqr_above_toom2   1
-#define MAYBE_sqr_toom3   1
-#define MAYBE_sqr_above_toom3   1
-#define MAYBE_sqr_toom4   1
-#define MAYBE_sqr_above_toom4   1
-#define MAYBE_sqr_above_toom6   1
-#else
-#define SQR_TOOM8_MAX					\
-  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
-   ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
-   : MP_SIZE_T_MAX )
-#define MAYBE_sqr_basecase					\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_above_basecase				\
-  (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_toom2						\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_above_toom2					\
-  (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_toom3						\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_above_toom3					\
-  (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_toom4						\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
-#define MAYBE_sqr_above_toom4					\
-  (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
-#define MAYBE_sqr_above_toom6					\
-  (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
-#endif
-
-#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
-  do {									\
-    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
-	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
-      mpn_sqr_basecase (p, a, n);					\
-      if (f) mpn_sqr_basecase (p2, a2, n);				\
-    } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
-      mpn_toom2_sqr (p, a, n, ws);					\
-      if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
-    } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
-      mpn_toom3_sqr (p, a, n, ws);					\
-      if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
-    } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
-      mpn_toom4_sqr (p, a, n, ws);					\
-      if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
-    } else if (! MAYBE_sqr_above_toom6					\
-	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
-      mpn_toom6_sqr (p, a, n, ws);					\
-      if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
-    } else {								\
-      mpn_toom8_sqr (p, a, n, ws);					\
-      if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
-    }									\
-  } while (0)
-
-void
-mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
-{
-  mp_size_t n, s;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT ( an >= 40 );
-
-  n = 1 + ((an - 1)>>3);
-
-  s = an - 7 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT ( s + s > 3 );
-
-#define   r6    (pp + 3 * n)			/* 3n+1 */
-#define   r4    (pp + 7 * n)			/* 3n+1 */
-#define   r2    (pp +11 * n)			/* 3n+1 */
-#define   r0    (pp +15 * n)			/* s+t <= 2*n */
-#define   r7    (scratch)			/* 3n+1 */
-#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
-#define   v0    (pp +11 * n)			/* n+1 */
-#define   v2    (pp +13 * n+2)			/* n+1 */
-#define   wse   (scratch +12 * n + 4)		/* 3n+1 */
-
-  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-/*   if (scratch == NULL) */
-/*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm1/8$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
-  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
-  TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
-  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
-
-  /* $\pm1/4$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
-  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
-  TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
-
-  /* $\pm2$ */
-  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
-  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
-  TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
-
-  /* $\pm8$ */
-  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
-  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
-  TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
-  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
-
-  /* $\pm1/2$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
-  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
-  TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
-  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
-
-  /* $\pm1$ */
-  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
-  /* A(-1)*B(-1) */ /* A(1)*B(1) */
-  TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
-
-  /* $\pm4$ */
-  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
-  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
-  TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
-
-#undef v0
-#undef v2
-
-  /* A(0)*B(0) */
-  TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
-
-  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-#undef r6
-#undef wse
-
-}
-
-#undef TOOM8_SQR_REC
-#undef MAYBE_sqr_basecase
-#undef MAYBE_sqr_above_basecase
-#undef MAYBE_sqr_toom2
-#undef MAYBE_sqr_above_toom2
-#undef MAYBE_sqr_toom3
-#undef MAYBE_sqr_above_toom3
-#undef MAYBE_sqr_above_toom4
diff --git a/gmp/mpn/generic/toom8h_mul.c b/gmp/mpn/generic/toom8h_mul.c
deleted file mode 100644
index 8f593903f5..0000000000
--- a/gmp/mpn/generic/toom8h_mul.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if GMP_NUMB_BITS < 29
-#error Not implemented.
-#endif
-
-#if GMP_NUMB_BITS < 43
-#define BIT_CORRECTION 1
-#define CORRECTION_BITS GMP_NUMB_BITS
-#else
-#define BIT_CORRECTION 0
-#define CORRECTION_BITS 0
-#endif
-
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_mul_basecase 1
-#define MAYBE_mul_toom22   1
-#define MAYBE_mul_toom33   1
-#define MAYBE_mul_toom44   1
-#define MAYBE_mul_toom8h   1
-#else
-#define MAYBE_mul_basecase						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
-#define MAYBE_mul_toom22						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
-#define MAYBE_mul_toom33						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
-#define MAYBE_mul_toom44						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
-#define MAYBE_mul_toom8h						\
-  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
-#endif
-
-#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
-  do {									\
-    if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
-      mpn_mul_basecase (p, a, n, b, n);					\
-      if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
-    } else if (MAYBE_mul_toom22						\
-	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
-    } else if (MAYBE_mul_toom33						\
-	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
-    } else if (MAYBE_mul_toom44						\
-	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
-      mpn_toom44_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
-    } else if (! MAYBE_mul_toom8h					\
-	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
-      mpn_toom6h_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
-    } else {								\
-      mpn_toom8h_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
-    }									\
-  } while (0)
-
-#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
-  do { mpn_mul (p, a, na, b, nb); } while (0)
-
-/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
-   With: an >= bn >= 86, an*5 <  bn * 11.
-   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
-
-   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
-*/
-/* Estimate on needed scratch:
-   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
-   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
- */
-
-void
-mpn_toom8h_mul   (mp_ptr pp,
-		  mp_srcptr ap, mp_size_t an,
-		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  int p, q, half;
-  int sign;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT (an >= bn);
-  /* Can not handle too small operands */
-  ASSERT (bn >= 86);
-  /* Can not handle too much unbalancement */
-  ASSERT (an <= bn*4);
-  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
-  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
-  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
-
-  /* Limit num/den is a rational number between
-     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
-#define LIMIT_numerator (21)
-#define LIMIT_denominat (20)
-
-  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
-    {
-      half = 0;
-      n = 1 + ((an - 1)>>3);
-      p = q = 7;
-      s = an - 7 * n;
-      t = bn - 7 * n;
-    }
-  else
-    {
-      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
-	{ p = 9; q = 8; }
-      else if (GMP_NUMB_BITS <= 9*3 ||
-	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
-	{ p = 9; q = 7; }
-      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
-	{ p =10; q = 7; }
-      else if (GMP_NUMB_BITS <= 10*3 ||
-	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
-	{ p =10; q = 6; }
-      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
-	{ p =11; q = 6; }
-      else if (GMP_NUMB_BITS <= 11*3 ||
-	       an * 4 < 9 * bn)
-	{ p =11; q = 5; }
-      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
-	{ p =12; q = 5; }
-      else if (GMP_NUMB_BITS <= 12*3 ||
-	       an * 9 < 28 * bn )  /* is 4*... <12*... */
-	{ p =12; q = 4; }
-      else
-	{ p =13; q = 4; }
-
-      half = (p+q)&1;
-      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
-      p--; q--;
-
-      s = an - p * n;
-      t = bn - q * n;
-
-      if(half) { /* Recover from badly chosen splitting */
-	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
-	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
-      }
-    }
-#undef LIMIT_numerator
-#undef LIMIT_denominat
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  ASSERT (half || s + t > 3);
-  ASSERT (n > 2);
-
-#define   r6    (pp + 3 * n)			/* 3n+1 */
-#define   r4    (pp + 7 * n)			/* 3n+1 */
-#define   r2    (pp +11 * n)			/* 3n+1 */
-#define   r0    (pp +15 * n)			/* s+t <= 2*n */
-#define   r7    (scratch)			/* 3n+1 */
-#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
-#define   v0    (pp +11 * n)			/* n+1 */
-#define   v1    (pp +12 * n+1)			/* n+1 */
-#define   v2    (pp +13 * n+2)			/* n+1 */
-#define   v3    (scratch +12 * n + 4)		/* n+1 */
-#define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
-#define   wse   (scratch +13 * n + 5)		/* 2n+1 */
-
-  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
-     need all of them  */
-/*   if (scratch == NULL) */
-/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
-  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
-  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
-
-  /********************** evaluation and recursive calls *********************/
-
-  /* $\pm1/8$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
-  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
-
-  /* $\pm1/4$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
-	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
-  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
-
-  /* $\pm8$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
-	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
-  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
-
-  /* $\pm1/2$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
-  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
-  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
-    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
-  else
-    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
-  /* A(-1)*B(-1) */ /* A(1)*B(1) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
-
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
-
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef wse
-
-  /* A(0)*B(0) */
-  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
-
-  /* Infinity */
-  if (UNLIKELY (half != 0)) {
-    if (s > t) {
-      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
-    } else {
-      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
-    };
-  };
-
-  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-#undef r6
-#undef wsi
-}
-
-#undef TOOM8H_MUL_N_REC
-#undef TOOM8H_MUL_REC
-#undef MAYBE_mul_basecase
-#undef MAYBE_mul_toom22
-#undef MAYBE_mul_toom33
-#undef MAYBE_mul_toom44
-#undef MAYBE_mul_toom8h
diff --git a/gmp/mpn/generic/toom_couple_handling.c b/gmp/mpn/generic/toom_couple_handling.c
deleted file mode 100644
index 9e62bcba1c..0000000000
--- a/gmp/mpn/generic/toom_couple_handling.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Helper function for high degree Toom-Cook algorithms.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:
-     {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}
-     {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}
-   Finally recompose them obtaining:
-     {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}
-*/
-void
-mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,
-			  int nsign, mp_size_t off, int ps, int ns)
-{
-  if (nsign) {
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-    mpn_rsh1sub_n (np, pp, np, n);
-#else
-    mpn_sub_n (np, pp, np, n);
-    mpn_rshift (np, np, n, 1);
-#endif
-  } else {
-#ifdef HAVE_NATIVE_mpn_rsh1add_n
-    mpn_rsh1add_n (np, pp, np, n);
-#else
-    mpn_add_n (np, pp, np, n);
-    mpn_rshift (np, np, n, 1);
-#endif
-  }
-
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-  if (ps == 1)
-    mpn_rsh1sub_n (pp, pp, np, n);
-  else
-#endif
-  {
-    mpn_sub_n (pp, pp, np, n);
-    if (ps > 0)
-      mpn_rshift (pp, pp, n, ps);
-  }
-  if (ns > 0)
-    mpn_rshift (np, np, n, ns);
-  pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);
-  ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );
-}
diff --git a/gmp/mpn/generic/toom_eval_dgr3_pm1.c b/gmp/mpn/generic/toom_eval_dgr3_pm1.c
deleted file mode 100644
index 50411bd3ca..0000000000
--- a/gmp/mpn/generic/toom_eval_dgr3_pm1.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-int
-mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,
-			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
-{
-  int neg;
-
-  ASSERT (x3n > 0);
-  ASSERT (x3n <= n);
-
-  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
-  tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);
-
-  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
-  else
-    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
-#else
-  if (neg)
-    mpn_sub_n (xm1, tp, xp1, n + 1);
-  else
-    mpn_sub_n (xm1, xp1, tp, n + 1);
-
-  mpn_add_n (xp1, xp1, tp, n + 1);
-#endif
-
-  ASSERT (xp1[n] <= 3);
-  ASSERT (xm1[n] <= 1);
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_dgr3_pm2.c b/gmp/mpn/generic/toom_eval_dgr3_pm2.c
deleted file mode 100644
index 3ba6d15f3d..0000000000
--- a/gmp/mpn/generic/toom_eval_dgr3_pm2.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Needs n+1 limbs of temporary storage. */
-int
-mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
-			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
-{
-  mp_limb_t cy;
-  int neg;
-
-  ASSERT (x3n > 0);
-  ASSERT (x3n <= n);
-
-  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
-#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
-#if HAVE_NATIVE_mpn_addlsh2_n
-  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);
-
-  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
-#else /* HAVE_NATIVE_mpn_addlsh_n */
-  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);
-
-  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
-#endif
-  if (x3n < n)
-    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
-  tp[n] = cy;
-#else
-  cy = mpn_lshift (tp, xp + 2*n, n, 2);
-  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);
-
-  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
-  if (x3n < n)
-    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
-  else
-    tp[n] += mpn_add_n (tp, xp + n, tp, n);
-#endif
-  mpn_lshift (tp, tp, n+1, 1);
-
-  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
-  else
-    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
-#else
-  if (neg)
-    mpn_sub_n (xm2, tp, xp2, n + 1);
-  else
-    mpn_sub_n (xm2, xp2, tp, n + 1);
-
-  mpn_add_n (xp2, xp2, tp, n + 1);
-#endif
-
-  ASSERT (xp2[n] < 15);
-  ASSERT (xm2[n] < 10);
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_pm1.c b/gmp/mpn/generic/toom_eval_pm1.c
deleted file mode 100644
index 2334b0aff4..0000000000
--- a/gmp/mpn/generic/toom_eval_pm1.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
-int
-mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
-		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
-{
-  unsigned i;
-  int neg;
-
-  ASSERT (k >= 4);
-
-  ASSERT (hn > 0);
-  ASSERT (hn <= n);
-
-  /* The degree k is also the number of full-size coefficients, so
-   * that last coefficient, of size hn, starts at xp + k*n. */
-
-  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
-  for (i = 4; i < k; i += 2)
-    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
-
-  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
-  for (i = 5; i < k; i += 2)
-    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
-
-  if (k & 1)
-    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
-  else
-    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
-
-  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
-  else
-    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
-#else
-  if (neg)
-    mpn_sub_n (xm1, tp, xp1, n + 1);
-  else
-    mpn_sub_n (xm1, xp1, tp, n + 1);
-
-  mpn_add_n (xp1, xp1, tp, n + 1);
-#endif
-
-  ASSERT (xp1[n] <= k);
-  ASSERT (xm1[n] <= k/2 + 1);
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_pm2.c b/gmp/mpn/generic/toom_eval_pm2.c
deleted file mode 100644
index 67afcc638e..0000000000
--- a/gmp/mpn/generic/toom_eval_pm2.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
-
-   Contributed to the GNU project by Niels Möller and Marco Bodrato
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
-   can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
-#if HAVE_NATIVE_mpn_addlsh2_n
-#define DO_addlsh2(d, a, b, n, cy)	\
-do {					\
-  (cy) <<= 2;				\
-  (cy) += mpn_addlsh2_n(d, a, b, n);	\
-} while (0)
-#else
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_addlsh2(d, a, b, n, cy)	\
-do {					\
-  (cy) <<= 2;				\
-  (cy) += mpn_addlsh_n(d, a, b, n, 2);	\
-} while (0)
-#else
-/* The following is not a general substitute for addlsh2.
-   It is correct if d == b, but it is not if d == a.  */
-#define DO_addlsh2(d, a, b, n, cy)	\
-do {					\
-  (cy) <<= 2;				\
-  (cy) += mpn_lshift(d, b, n, 2);	\
-  (cy) += mpn_add_n(d, d, a, n);	\
-} while (0)
-#endif
-#endif
-
-/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
-   points +2 and -2. */
-int
-mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
-		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
-{
-  int i;
-  int neg;
-  mp_limb_t cy;
-
-  ASSERT (k >= 3);
-  ASSERT (k < GMP_NUMB_BITS);
-
-  ASSERT (hn > 0);
-  ASSERT (hn <= n);
-
-  /* The degree k is also the number of full-size coefficients, so
-   * that last coefficient, of size hn, starts at xp + k*n. */
-
-  cy = 0;
-  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
-  if (hn != n)
-    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
-  for (i = k - 4; i >= 0; i -= 2)
-    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
-  xp2[n] = cy;
-
-  k--;
-
-  cy = 0;
-  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
-  for (i = k - 4; i >= 0; i -= 2)
-    DO_addlsh2 (tp, xp + i * n, tp, n, cy);
-  tp[n] = cy;
-
-  if (k & 1)
-    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
-  else
-    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
-
-  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
-  else
-    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
-#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
-  if (neg)
-    mpn_sub_n (xm2, tp, xp2, n + 1);
-  else
-    mpn_sub_n (xm2, xp2, tp, n + 1);
-
-  mpn_add_n (xp2, xp2, tp, n + 1);
-#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
-
-  ASSERT (xp2[n] < (1<<(k+2))-1);
-  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
-
-  neg ^= ((k & 1) - 1);
-
-  return neg;
-}
-
-#undef DO_addlsh2
diff --git a/gmp/mpn/generic/toom_eval_pm2exp.c b/gmp/mpn/generic/toom_eval_pm2exp.c
deleted file mode 100644
index b178fcac24..0000000000
--- a/gmp/mpn/generic/toom_eval_pm2exp.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
-int
-mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
-		      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
-		      mp_ptr tp)
-{
-  unsigned i;
-  int neg;
-#if HAVE_NATIVE_mpn_addlsh_n
-  mp_limb_t cy;
-#endif
-
-  ASSERT (k >= 3);
-  ASSERT (shift*k < GMP_NUMB_BITS);
-
-  ASSERT (hn > 0);
-  ASSERT (hn <= n);
-
-  /* The degree k is also the number of full-size coefficients, so
-   * that last coefficient, of size hn, starts at xp + k*n. */
-
-#if HAVE_NATIVE_mpn_addlsh_n
-  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
-  for (i = 4; i < k; i += 2)
-    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);
-
-  tp[n] = mpn_lshift (tp, xp+n, n, shift);
-  for (i = 3; i < k; i+= 2)
-    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);
-
-  if (k & 1)
-    {
-      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
-      MPN_INCR_U (tp + hn, n+1 - hn, cy);
-    }
-  else
-    {
-      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
-      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
-    }
-
-#else /* !HAVE_NATIVE_mpn_addlsh_n */
-  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
-  xp2[n] += mpn_add_n (xp2, xp, tp, n);
-  for (i = 4; i < k; i += 2)
-    {
-      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
-      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
-    }
-
-  tp[n] = mpn_lshift (tp, xp+n, n, shift);
-  for (i = 3; i < k; i+= 2)
-    {
-      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
-      tp[n] += mpn_add_n (tp, tp, xm2, n);
-    }
-
-  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
-  if (k & 1)
-    mpn_add (tp, tp, n+1, xm2, hn+1);
-  else
-    mpn_add (xp2, xp2, n+1, xm2, hn+1);
-#endif /* !HAVE_NATIVE_mpn_addlsh_n */
-
-  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
-  else
-    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
-#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
-  if (neg)
-    mpn_sub_n (xm2, tp, xp2, n + 1);
-  else
-    mpn_sub_n (xm2, xp2, tp, n + 1);
-
-  mpn_add_n (xp2, xp2, tp, n + 1);
-#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
-
-  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
-  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
-	  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
-  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
-	  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_pm2rexp.c b/gmp/mpn/generic/toom_eval_pm2rexp.c
deleted file mode 100644
index 3cac46bd90..0000000000
--- a/gmp/mpn/generic/toom_eval_pm2rexp.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k
-
-   Contributed to the GNU project by Marco Bodrato
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_add_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-/* Evaluates a polynomial of degree k >= 3. */
-int
-mpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,
-		      unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,
-		      unsigned int s, mp_ptr ws)
-{
-  unsigned int i;
-  int neg;
-  /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/
-  ASSERT (n >= t);
-  ASSERT (s != 0); /* or _eval_pm1 should be used */
-  ASSERT (q > 1);
-  ASSERT (s*q < GMP_NUMB_BITS);
-  rp[n] = mpn_lshift(rp, ap, n, s*q);
-  ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));
-  if( (q & 1) != 0) {
-    ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));
-    rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);
-  } else {
-    ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));
-  }
-  for(i=2; i<q-1; i++)
-  {
-    rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);
-    i++;
-    ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);
-  };
-
-  neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (rp, rm, ws, rp, n + 1);
-  else
-    mpn_add_n_sub_n (rp, rm, rp, ws, n + 1);
-#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
-  if (neg)
-    mpn_sub_n (rm, ws, rp, n + 1);
-  else
-    mpn_sub_n (rm, rp, ws, n + 1);
-
-  ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));
-#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_interpolate_12pts.c b/gmp/mpn/generic/toom_interpolate_12pts.c
deleted file mode 100644
index 180b0329a3..0000000000
--- a/gmp/mpn/generic/toom_interpolate_12pts.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/* Interpolation for the algorithm Toom-Cook 6.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_sub_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_add_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_subrsh
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
-#else
-/* FIXME: This is not a correct definition, it assumes no carry */
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
-do {									\
-  mp_limb_t __cy;							\
-  MPN_DECR_U (dst, nd, src[0] >> s);					\
-  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
-  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
-} while (0)
-#endif
-
-
-#if GMP_NUMB_BITS < 21
-#error Not implemented: Both sublsh_n(,,,20) should be corrected.
-#endif
-
-#if GMP_NUMB_BITS < 16
-#error Not implemented: divexact_by42525 needs splitting.
-#endif
-
-#if GMP_NUMB_BITS < 12
-#error Not implemented: Hard to adapt...
-#endif
-
-/* FIXME: tuneup should decide the best variant */
-#ifndef AORSMUL_FASTER_AORS_AORSLSH
-#define AORSMUL_FASTER_AORS_AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_AORS_2AORSLSH
-#define AORSMUL_FASTER_AORS_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_2AORSLSH
-#define AORSMUL_FASTER_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_3AORSLSH
-#define AORSMUL_FASTER_3AORSLSH 1
-#endif
-
-#define BINVERT_9 \
-  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
-
-#define BINVERT_255 \
-  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
-
-  /* FIXME: find some more general expressions for 2835^-1, 42525^-1 */
-#if GMP_LIMB_BITS == 32
-#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
-#else
-#if GMP_LIMB_BITS == 64
-#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
-#endif
-#endif
-
-#ifndef mpn_divexact_by255
-#if GMP_NUMB_BITS % 8 == 0
-#define mpn_divexact_by255(dst,src,size) \
-  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
-#else
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
-#else
-#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
-#endif
-#endif
-#endif
-
-#ifndef mpn_divexact_by9x4
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by9x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,2)
-#else
-#define mpn_divexact_by9x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<2)
-#endif
-#endif
-
-#ifndef mpn_divexact_by42525
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
-#define mpn_divexact_by42525(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,0)
-#else
-#define mpn_divexact_by42525(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525))
-#endif
-#endif
-
-#ifndef mpn_divexact_by2835x4
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
-#define mpn_divexact_by2835x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,2)
-#else
-#define mpn_divexact_by2835x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<2)
-#endif
-#endif
-
-/* Interpolation for Toom-6.5 (or Toom-6), using the evaluation
-   points: infinity(6.5 only), +-4, +-2, +-1, +-1/4, +-1/2, 0. More precisely,
-   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
-   degree 11 (or 10), given the 12 (rsp. 11) values:
-
-     r0 = limit at infinity of f(x) / x^7,
-     r1 = f(4),f(-4),
-     r2 = f(2),f(-2),
-     r3 = f(1),f(-1),
-     r4 = f(1/4),f(-1/4),
-     r5 = f(1/2),f(-1/2),
-     r6 = f(0).
-
-   All couples of the form f(n),f(-n) must be already mixed with
-   toom_couple_handling(f(n),...,f(-n),...)
-
-   The result is stored in {pp, spt + 7*n (or 6*n)}.
-   At entry, r6 is stored at {pp, 2n},
-   r4 is stored at {pp + 3n, 3n + 1}.
-   r2 is stored at {pp + 7n, 3n + 1}.
-   r0 is stored at {pp +11n, spt}.
-
-   The other values are 3n+1 limbs each (with most significant limbs small).
-
-   Negative intermediate results are stored two-complemented.
-   Inputs are destroyed.
-*/
-
-void
-mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
-			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
-{
-  mp_limb_t cy;
-  mp_size_t n3;
-  mp_size_t n3p1;
-  n3 = 3 * n;
-  n3p1 = n3 + 1;
-
-#define   r4    (pp + n3)			/* 3n+1 */
-#define   r2    (pp + 7 * n)			/* 3n+1 */
-#define   r0    (pp +11 * n)			/* s+t <= 2*n */
-
-  /******************************* interpolation *****************************/
-  if (half != 0) {
-    cy = mpn_sub_n (r3, r3, r0, spt);
-    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
-
-    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
-    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);
-
-    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
-    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
-  };
-
-  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
-  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
-#else
-  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
-  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
-  MP_PTR_SWAP(r1, wsi);
-#endif
-
-  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
-  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
-#else
-  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
-  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
-  MP_PTR_SWAP(r5, wsi);
-#endif
-
-  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);
-
-#if AORSMUL_FASTER_AORS_AORSLSH
-  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
-#else
-  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
-  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
-#endif
-  /* A division by 2835x4 follows. Warning: the operand can be negative! */
-  mpn_divexact_by2835x4(r4, r4, n3p1);
-  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
-    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
-
-#if AORSMUL_FASTER_2AORSLSH
-  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
-#else
-  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
-  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
-#endif
-  mpn_divexact_by255(r5, r5, n3p1);
-
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));
-
-#if AORSMUL_FASTER_3AORSLSH
-  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
-#else
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
-#endif
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
-  mpn_divexact_by42525(r1, r1, n3p1);
-
-#if AORSMUL_FASTER_AORS_2AORSLSH
-  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
-#else
-  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
-  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
-#endif
-  mpn_divexact_by9x4(r2, r2, n3p1);
-
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));
-
-  mpn_sub_n (r4, r2, r4, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));
-
-  mpn_add_n (r5, r5, r1, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
-
-  /* last interpolation steps... */
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
-  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
-  /* ... could be mixed with recomposition
-	||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
-  */
-
-  /***************************** recomposition *******************************/
-  /*
-    pp[] prior to operations:
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
-
-    summation scheme for remaining operations:
-    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
-	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
-  */
-
-  cy = mpn_add_n (pp + n, pp + n, r5, n);
-  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
-  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);
-
-  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
-  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
-  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
-
-  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
-  if (half) {
-    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
-#if HAVE_NATIVE_mpn_add_nc
-    if (LIKELY (spt > n)) {
-      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
-      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
-    }
-#else
-    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
-    if (LIKELY (spt > n)) {
-      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
-      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
-    }
-#endif
-  } else {
-    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
-  }
-
-#undef   r0
-#undef   r2
-#undef   r4
-}
diff --git a/gmp/mpn/generic/toom_interpolate_16pts.c b/gmp/mpn/generic/toom_interpolate_16pts.c
deleted file mode 100644
index 5afe6641f6..0000000000
--- a/gmp/mpn/generic/toom_interpolate_16pts.c
+++ /dev/null
@@ -1,527 +0,0 @@
-/* Interpolation for the algorithm Toom-Cook 8.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if GMP_NUMB_BITS < 29
-#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
-#endif
-
-#if GMP_NUMB_BITS < 28
-#error Not implemented: divexact_by188513325 and _by182712915 will not work.
-#endif
-
-
-#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_sub_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_add_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_subrsh
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
-#else
-/* FIXME: This is not a correct definition, it assumes no carry */
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
-do {									\
-  mp_limb_t __cy;							\
-  MPN_DECR_U (dst, nd, src[0] >> s);					\
-  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
-  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
-} while (0)
-#endif
-
-
-/* FIXME: tuneup should decide the best variant */
-#ifndef AORSMUL_FASTER_AORS_AORSLSH
-#define AORSMUL_FASTER_AORS_AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_AORS_2AORSLSH
-#define AORSMUL_FASTER_AORS_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_2AORSLSH
-#define AORSMUL_FASTER_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_3AORSLSH
-#define AORSMUL_FASTER_3AORSLSH 1
-#endif
-
-#if GMP_NUMB_BITS < 43
-#define BIT_CORRECTION 1
-#define CORRECTION_BITS GMP_NUMB_BITS
-#else
-#define BIT_CORRECTION 0
-#define CORRECTION_BITS 0
-#endif
-
-#define BINVERT_9 \
-  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
-
-#define BINVERT_255 \
-  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
-
-  /* FIXME: find some more general expressions for inverses */
-#if GMP_LIMB_BITS == 32
-#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
-#define BINVERT_182712915 (GMP_NUMB_MASK &	CNST_LIMB(0x550659DB))
-#define BINVERT_188513325 (GMP_NUMB_MASK &	CNST_LIMB(0xFBC333A5))
-#define BINVERT_255x182712915L (GMP_NUMB_MASK &	CNST_LIMB(0x6FC4CB25))
-#define BINVERT_255x188513325L (GMP_NUMB_MASK &	CNST_LIMB(0x6864275B))
-#if GMP_NAIL_BITS == 0
-#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
-#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
-#else /* GMP_NAIL_BITS != 0 */
-#define BINVERT_255x182712915H \
-  (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
-#define BINVERT_255x188513325H \
-  (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
-#endif
-#else
-#if GMP_LIMB_BITS == 64
-#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
-#define BINVERT_255x182712915  (GMP_NUMB_MASK &	CNST_LIMB(0x1B649A076FC4CB25))
-#define BINVERT_255x188513325  (GMP_NUMB_MASK &	CNST_LIMB(0x06DB993A6864275B))
-#endif
-#endif
-
-#ifndef mpn_divexact_by255
-#if GMP_NUMB_BITS % 8 == 0
-#define mpn_divexact_by255(dst,src,size) \
-  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
-#else
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
-#else
-#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
-#endif
-#endif
-#endif
-
-#ifndef mpn_divexact_by255x4
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
-#else
-#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
-#endif
-#endif
-
-#ifndef mpn_divexact_by9x16
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
-#else
-#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
-#endif
-#endif
-
-#ifndef mpn_divexact_by42525x16
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
-#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
-#else
-#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
-#endif
-#endif
-
-#ifndef mpn_divexact_by2835x64
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
-#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
-#else
-#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
-#endif
-#endif
-
-#ifndef  mpn_divexact_by255x182712915
-#if GMP_NUMB_BITS < 36
-#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
-/* FIXME: use mpn_bdiv_q_2_pi2 */
-#endif
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
-#define mpn_divexact_by255x182712915(dst,src,size)				\
-  do {										\
-    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);	\
-    mpn_divexact_by255(dst,dst,size);						\
-  } while(0)
-#else
-#define mpn_divexact_by255x182712915(dst,src,size)	\
-  do {							\
-    mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));	\
-    mpn_divexact_by255(dst,dst,size);			\
-  } while(0)
-#endif
-#else /* GMP_NUMB_BITS > 35 */
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
-#define mpn_divexact_by255x182712915(dst,src,size) \
-  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
-#else
-#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
-#endif
-#endif /* GMP_NUMB_BITS >?< 36 */
-#endif
-
-#ifndef  mpn_divexact_by255x188513325
-#if GMP_NUMB_BITS < 36
-#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
-/* FIXME: use mpn_bdiv_q_1_pi2 */
-#endif
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
-#define mpn_divexact_by255x188513325(dst,src,size)			\
-  do {									\
-    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);	\
-    mpn_divexact_by255(dst,dst,size);					\
-  } while(0)
-#else
-#define mpn_divexact_by255x188513325(dst,src,size)	\
-  do {							\
-    mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));	\
-    mpn_divexact_by255(dst,dst,size);			\
-  } while(0)
-#endif
-#else /* GMP_NUMB_BITS > 35 */
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
-#define mpn_divexact_by255x188513325(dst,src,size) \
-  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
-#else
-#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
-#endif
-#endif /* GMP_NUMB_BITS >?< 36 */
-#endif
-
-/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
-   points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
-   +-1/8, 0. More precisely, we want to compute
-   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
-   14), given the 16 (rsp. 15) values:
-
-     r0 = limit at infinity of f(x) / x^7,
-     r1 = f(8),f(-8),
-     r2 = f(4),f(-4),
-     r3 = f(2),f(-2),
-     r4 = f(1),f(-1),
-     r5 = f(1/4),f(-1/4),
-     r6 = f(1/2),f(-1/2),
-     r7 = f(1/8),f(-1/8),
-     r8 = f(0).
-
-   All couples of the form f(n),f(-n) must be already mixed with
-   toom_couple_handling(f(n),...,f(-n),...)
-
-   The result is stored in {pp, spt + 7*n (or 8*n)}.
-   At entry, r8 is stored at {pp, 2n},
-   r6 is stored at {pp + 3n, 3n + 1}.
-   r4 is stored at {pp + 7n, 3n + 1}.
-   r2 is stored at {pp +11n, 3n + 1}.
-   r0 is stored at {pp +15n, spt}.
-
-   The other values are 3n+1 limbs each (with most significant limbs small).
-
-   Negative intermediate results are stored two-complemented.
-   Inputs are destroyed.
-*/
-
-void
-mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
-			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
-{
-  mp_limb_t cy;
-  mp_size_t n3;
-  mp_size_t n3p1;
-  n3 = 3 * n;
-  n3p1 = n3 + 1;
-
-#define   r6    (pp + n3)			/* 3n+1 */
-#define   r4    (pp + 7 * n)			/* 3n+1 */
-#define   r2    (pp +11 * n)			/* 3n+1 */
-#define   r0    (pp +15 * n)			/* s+t <= 2*n */
-
-  ASSERT( spt <= 2 * n );
-  /******************************* interpolation *****************************/
-  if( half != 0) {
-    cy = mpn_sub_n (r4, r4, r0, spt);
-    MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
-
-    cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
-    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
-
-    cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
-    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
-
-    cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
-#if BIT_CORRECTION
-    cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
-		    n3p1 - spt - BIT_CORRECTION, cy);
-    ASSERT (BIT_CORRECTION > 0 || cy == 0);
-    /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
-    cy = r7[n3p1];
-    r7[n3p1] = 0x80;
-#else
-    MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
-#endif
-    DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
-#if BIT_CORRECTION
-    /* FIXME: assumes r7[n3p1] is writable. */
-    ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
-    r7[n3p1] = cy;
-#endif
-  };
-
-  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
-  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
-#else
-  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
-  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
-  MP_PTR_SWAP(r5, wsi);
-#endif
-
-  r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
-  DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
-#else
-  ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
-  mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
-  MP_PTR_SWAP(r3, wsi);
-#endif
-
-  cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
-#if BIT_CORRECTION
-  MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
-  cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
-  cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
-  ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
-#else
-  r7[n3] -= cy;
-  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
-#endif
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
-#else
-  mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
-  mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
-  MP_PTR_SWAP(r7, wsi);
-#endif
-
-  r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
-
-#if AORSMUL_FASTER_2AORSLSH
-  mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
-#else
-  DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
-  DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
-#endif
-
-  mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
-#if AORSMUL_FASTER_3AORSLSH
-  mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
-#else
-  DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
-  DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
-  DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
-#endif
-  mpn_divexact_by255x188513325(r7, r7, n3p1);
-
-  mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
-  /* A division by 2835x64 follows. Warning: the operand can be negative! */
-  mpn_divexact_by2835x64(r5, r5, n3p1);
-  if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
-    r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
-
-#if AORSMUL_FASTER_AORS_AORSLSH
-  mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
-#else
-  mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
-  DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
-#endif
-#if AORSMUL_FASTER_2AORSLSH
-  mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
-#else
-  DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
-  DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
-#endif
-  /* A division by 255x4 follows. Warning: the operand can be negative! */
-  mpn_divexact_by255x4(r6, r6, n3p1);
-  if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
-    r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
-
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
-
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
-  ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
-
-  /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
-  DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
-  mpn_submul_1 (r1, r2, n3p1, 1428);
-  mpn_submul_1 (r1, r3, n3p1, 112896);
-  mpn_divexact_by255x182712915(r1, r1, n3p1);
-
-  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
-  mpn_divexact_by42525x16(r2, r2, n3p1);
-
-#if AORSMUL_FASTER_AORS_2AORSLSH
-  ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
-#else
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
-  ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
-#endif
-  ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
-  mpn_divexact_by9x16(r3, r3, n3p1);
-
-  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
-  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
-  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
-
-  mpn_add_n (r6, r2, r6, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
-
-  mpn_sub_n (r5, r3, r5, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
-
-  mpn_add_n (r7, r1, r7, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
-
-  /* last interpolation steps... */
-  /* ... could be mixed with recomposition
-	||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
-  */
-
-  /***************************** recomposition *******************************/
-  /*
-    pp[] prior to operations:
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
-
-    summation scheme for remaining operations:
-    |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
-	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
-  */
-
-  cy = mpn_add_n (pp + n, pp + n, r7, n);
-  cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r7 + 2 * n, n + 1, cy);
-  cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
-
-  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
-  cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
-  cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
-
-  pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
-  cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
-  cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
-
-  pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
-  if ( half ) {
-    cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
-#if HAVE_NATIVE_mpn_add_nc
-    if(LIKELY(spt > n)) {
-      cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
-      MPN_INCR_U (pp + 16 * n, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
-    }
-#else
-    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
-    if(LIKELY(spt > n)) {
-      cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
-      MPN_INCR_U (pp + 16 * n, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
-    }
-#endif
-  } else {
-    ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
-  }
-
-#undef   r0
-#undef   r2
-#undef   r4
-#undef   r6
-}
diff --git a/gmp/mpn/generic/toom_interpolate_5pts.c b/gmp/mpn/generic/toom_interpolate_5pts.c
index 9fa5f0b7a6..67260cc3d5 100644
--- a/gmp/mpn/generic/toom_interpolate_5pts.c
+++ b/gmp/mpn/generic/toom_interpolate_5pts.c
@@ -7,33 +7,23 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -41,29 +31,28 @@ see https://www.gnu.org/licenses/.  */
 void
 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
 			   mp_size_t k, mp_size_t twor, int sa,
-			   mp_limb_t vinf0)
+			   mp_limb_t vinf0, mp_ptr ws)
 {
   mp_limb_t cy, saved;
-  mp_size_t twok;
-  mp_size_t kk1;
-  mp_ptr c1, v1, c3, vinf;
-
-  twok = k + k;
-  kk1 = twok + 1;
+  mp_size_t twok = k + k;
+  mp_size_t kk1 = twok + 1;
+  mp_ptr c1, v1, c3, vinf, c5;
+  mp_limb_t cout; /* final carry, should be zero at the end */
 
   c1 = c  + k;
   v1 = c1 + k;
   c3 = v1 + k;
   vinf = c3 + k;
+  c5 = vinf + k;
 
 #define v0 (c)
   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
   */
-  if (sa)
-    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
+  if (sa <= 0)
+    mpn_add_n (v2, v2, vm1, kk1);
   else
-    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
+    mpn_sub_n (v2, v2, vm1, kk1);
 
   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
@@ -74,18 +63,17 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
 
-  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
-     tm1 >= 0                                         (0  1 0  1 0)
+  /* (2) vm1 <- tm1 := (v1 - sa*vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+     tm1 >= 0                                            (0  1 0  1 0)
      No carry comes out from {v1, kk1} +/- {vm1, kk1},
-     and the division by two is exact.
-     If (sa!=0) the sign of vm1 is negative */
-  if (sa)
+     and the division by two is exact */
+  if (sa <= 0)
     {
 #ifdef HAVE_NATIVE_mpn_rsh1add_n
       mpn_rsh1add_n (vm1, v1, vm1, kk1);
 #else
-      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
-      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+      mpn_add_n (vm1, v1, vm1, kk1);
+      mpn_rshift (vm1, vm1, kk1, 1);
 #endif
     }
   else
@@ -93,8 +81,8 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
       mpn_rsh1sub_n (vm1, v1, vm1, kk1);
 #else
-      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
-      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+      mpn_sub_n (vm1, v1, vm1, kk1);
+      mpn_rshift (vm1, vm1, kk1, 1);
 #endif
     }
 
@@ -115,8 +103,8 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
   mpn_rsh1sub_n (v2, v2, v1, kk1);
 #else
-  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
-  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
+  mpn_sub_n (v2, v2, v1, kk1);
+  mpn_rshift (v2, v2, kk1, 1);
 #endif
 
   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
@@ -125,75 +113,58 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
      result is v1 >= 0
   */
-  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
+  mpn_sub_n (v1, v1, vm1, kk1);
 
-  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
-  cy = mpn_add_n (c1, c1, vm1, kk1);
-  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
-  /* Memory allocated for vm1 is now free, it can be recycled ...*/
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0   v1-v0-tm1      hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
 
   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
      result is v2 >= 0 */
   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
   vinf[0] = vinf0;       /* Set the right value for vinf0                     */
-#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
-  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
+#ifdef HAVE_NATIVE_mpn_sublsh1_n
+  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
 #else
-  /* Overwrite unused vm1 */
-  cy = mpn_lshift (vm1, vinf, twor, 1);
-  cy += mpn_sub_n (v2, v2, vm1, twor);
+  cy = mpn_lshift (ws, vinf, twor, 1);
+  cy += mpn_sub_n (v2, v2, ws, twor);
 #endif
   MPN_DECR_U (v2 + twor, kk1 - twor, cy);
 
-  /* Current matrix is
-     [1 0 0 0 0; vinf
-      0 1 0 0 0; v2
-      1 0 1 0 0; v1
-      0 1 0 1 0; vm1
-      0 0 0 0 1] v0
-     Some values already are in-place (we added vm1 in the correct position)
-     | vinf|  v1 |  v0 |
-	      | vm1 |
-     One still is in a separated area
-	| +v2 |
-     We have to compute v1-=vinf; vm1 -= v2,
-	   |-vinf|
-	      | -v2 |
-     Carefully reordering operations we can avoid to compute twice the sum
-     of the high half of v2 plus the low half of vinf.
-  */
-
-  /* Add the high half of t2 in {vinf} */
-  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
-    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
-    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
-  } else { /* triggered only by very unbalanced cases like
-	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
-    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
-  }
   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
      result is >= 0 */
-  /* Side effect: we also subtracted (high half) vm1 -= v2 */
   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
-  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
   vinf[0] = saved;
   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
+  __GMPN_ADD_1 (cout, vinf, vinf, twor, vinf0); /* Add vinf0, propagate carry. */
 
-  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
-     Operate only on the low half.
+  /* (8) vm1 <- vm1-t2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+     vm1 >= 0
   */
-  cy = mpn_sub_n (c1, c1, v2, k);
-  MPN_DECR_U (v1, kk1, cy);
+  mpn_sub_n (vm1, vm1, v2, kk1);            /* No overlapping here.        */
 
   /********************* Beginning the final phase **********************/
 
-  /* Most of the recomposition was done */
+  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (9) add t2 in {c+3k, ...} */
+  cy = mpn_add_n (c3, c3, v2, kk1);
+  __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+
+  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
+  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
+     v0       t1         vinf      tm1  t2
+		    +t2 */
+
+  /* add vm1 in {c+k, ...} */
+  cy = mpn_add_n (c1, c1, vm1, kk1);
+  __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
 
-  /* add t2 in {c+3k, ...}, but only the low half */
-  cy = mpn_add_n (c3, c3, v2, k);
-  vinf[0] += cy;
-  ASSERT(vinf[0] >= cy); /* No carry */
-  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
+     v0       t1         vinf      tm1  t2
+	  +tm1      +t2    */
 
 #undef v0
+#undef t2
 }
diff --git a/gmp/mpn/generic/toom_interpolate_6pts.c b/gmp/mpn/generic/toom_interpolate_6pts.c
deleted file mode 100644
index bdb2e95b89..0000000000
--- a/gmp/mpn/generic/toom_interpolate_6pts.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
-#ifndef mpn_divexact_by3
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
-#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
-#else
-#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
-#endif
-#endif
-
-/* Interpolation for Toom-3.5, using the evaluation points: infinity,
-   1, -1, 2, -2. More precisely, we want to compute
-   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
-   six values
-
-     w5 = f(0),
-     w4 = f(-1),
-     w3 = f(1)
-     w2 = f(-2),
-     w1 = f(2),
-     w0 = limit at infinity of f(x) / x^5,
-
-   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
-   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
-   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
-   significant limbs small). f(-1) and f(-2) may be negative, signs
-   determined by the flag bits. All intermediate results are positive.
-   Inputs are destroyed.
-
-   Interpolation sequence was taken from the paper: "Integer and
-   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
-   Some slight variations were introduced: adaptation to "gmp
-   instruction set", and a final saving of an operation by interlacing
-   interpolation and recomposition phases.
-*/
-
-void
-mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
-			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
-			   mp_size_t w0n)
-{
-  mp_limb_t cy;
-  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
-  mp_limb_t cy4, cy6, embankment;
-
-  ASSERT( n > 0 );
-  ASSERT( 2*n >= w0n && w0n > 0 );
-
-#define w5  pp					/* 2n   */
-#define w3  (pp + 2 * n)			/* 2n+1 */
-#define w0  (pp + 5 * n)			/* w0n  */
-
-  /* Interpolate with sequence:
-     W2 =(W1 - W2)>>2
-     W1 =(W1 - W5)>>1
-     W1 =(W1 - W2)>>1
-     W4 =(W3 - W4)>>1
-     W2 =(W2 - W4)/3
-     W3 = W3 - W4 - W5
-     W1 =(W1 - W3)/3
-     // Last steps are mixed with recomposition...
-     W2 = W2 - W0<<2
-     W4 = W4 - W2
-     W3 = W3 - W1
-     W2 = W2 - W0
-  */
-
-  /* W2 =(W1 - W2)>>2 */
-  if (flags & toom6_vm2_neg)
-    mpn_add_n (w2, w1, w2, 2 * n + 1);
-  else
-    mpn_sub_n (w2, w1, w2, 2 * n + 1);
-  mpn_rshift (w2, w2, 2 * n + 1, 2);
-
-  /* W1 =(W1 - W5)>>1 */
-  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
-  mpn_rshift (w1, w1, 2 * n + 1, 1);
-
-  /* W1 =(W1 - W2)>>1 */
-#if HAVE_NATIVE_mpn_rsh1sub_n
-  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
-#else
-  mpn_sub_n (w1, w1, w2, 2 * n + 1);
-  mpn_rshift (w1, w1, 2 * n + 1, 1);
-#endif
-
-  /* W4 =(W3 - W4)>>1 */
-  if (flags & toom6_vm1_neg)
-    {
-#if HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
-#else
-      mpn_add_n (w4, w3, w4, 2 * n + 1);
-      mpn_rshift (w4, w4, 2 * n + 1, 1);
-#endif
-    }
-  else
-    {
-#if HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
-#else
-      mpn_sub_n (w4, w3, w4, 2 * n + 1);
-      mpn_rshift (w4, w4, 2 * n + 1, 1);
-#endif
-    }
-
-  /* W2 =(W2 - W4)/3 */
-  mpn_sub_n (w2, w2, w4, 2 * n + 1);
-  mpn_divexact_by3 (w2, w2, 2 * n + 1);
-
-  /* W3 = W3 - W4 - W5 */
-  mpn_sub_n (w3, w3, w4, 2 * n + 1);
-  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
-
-  /* W1 =(W1 - W3)/3 */
-  mpn_sub_n (w1, w1, w3, 2 * n + 1);
-  mpn_divexact_by3 (w1, w1, 2 * n + 1);
-
-  /*
-    [1 0 0 0 0 0;
-     0 1 0 0 0 0;
-     1 0 1 0 0 0;
-     0 1 0 1 0 0;
-     1 0 1 0 1 0;
-     0 0 0 0 0 1]
-
-    pp[] prior to operations:
-     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
-
-    summation scheme for remaining operations:
-     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
-     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
-				    || H w4  | L w4  |
-		    || H w2  | L w2  |
-	    || H w1  | L w1  |
-			    ||-H w1  |-L w1  |
-		     |-H w0  |-L w0 ||-H w2  |-L w2  |
-  */
-  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
-  MPN_INCR_U (pp + 3 * n + 1, n, cy);
-
-  /* W2 -= W0<<2 */
-#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
-#else
-  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
-#endif
-#else
-  /* {W4,2*n+1} is now free and can be overwritten. */
-  cy = mpn_lshift(w4, w0, w0n, 2);
-  cy+= mpn_sub_n(w2, w2, w4, w0n);
-#endif
-  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
-
-  /* W4L = W4L - W2L */
-  cy = mpn_sub_n (pp + n, pp + n, w2, n);
-  MPN_DECR_U (w3, 2 * n + 1, cy);
-
-  /* W3H = W3H + W2L */
-  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
-  /* W1L + W2H */
-  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
-  MPN_INCR_U (w1 + n, n + 1, cy);
-
-  /* W0 = W0 + W1H */
-  if (LIKELY (w0n > n))
-    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
-  else
-    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
-
-  /*
-    summation scheme for the next operation:
-     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
-     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
-		     ...-w0___|-w1_w2 |
-  */
-  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
-  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
-
-  /* embankment is a "dirty trick" to avoid carry/borrow propagation
-     beyond allocated memory */
-  embankment = w0[w0n - 1] - 1;
-  w0[w0n - 1] = 1;
-  if (LIKELY (w0n > n)) {
-    if (cy4 > cy6)
-      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
-    else
-      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
-    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
-    MPN_INCR_U (w0 + n, w0n - n, cy6);
-  } else {
-    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
-    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
-  }
-  w0[w0n - 1] += embankment;
-
-#undef w5
-#undef w3
-#undef w0
-
-}
diff --git a/gmp/mpn/generic/toom_interpolate_7pts.c b/gmp/mpn/generic/toom_interpolate_7pts.c
index 2a67dba82f..872da26309 100644
--- a/gmp/mpn/generic/toom_interpolate_7pts.c
+++ b/gmp/mpn/generic/toom_interpolate_7pts.c
@@ -1,7 +1,6 @@
 /* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
 
-   Contributed to the GNU project by Niels Möller.
-   Improvements by Marco Bodrato.
+   Contributed to the GNU project by Niels M�ller.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
@@ -12,216 +11,149 @@ Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-#define BINVERT_3 MODLIMB_INVERSE_3
-
-#define BINVERT_9 \
-  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
-
-#define BINVERT_15 \
-  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
-
-/* For the various mpn_divexact_byN here, fall back to using either
-   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
-   many faster if it is native.  For now, since mpn_divexact_1 is native on
-   several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
-   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
+/* Arithmetic right shift, requiring that the shifted out bits are zero. */
+static inline void
+divexact_2exp (mp_ptr rp, mp_srcptr sp, mp_size_t n, unsigned shift)
+{
+  mp_limb_t sign;
+  sign = LIMB_HIGHBIT_TO_MASK (sp[n-1] << GMP_NAIL_BITS) << (GMP_NUMB_BITS - shift);
+  ASSERT_NOCARRY (mpn_rshift (rp, sp, n, shift));
+  rp[n-1] |= sign & GMP_NUMB_MASK;
+}
 
 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
 #ifndef mpn_divexact_by3
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
-#else
 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
 #endif
-#endif
-
 #ifndef mpn_divexact_by9
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)
-#else
 #define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
 #endif
-#endif
-
 #ifndef mpn_divexact_by15
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)
-#else
 #define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
 #endif
-#endif
 
-/* Interpolation for toom4, using the evaluation points 0, infinity,
-   1, -1, 2, -2, 1/2. More precisely, we want to compute
+/* Interpolation for toom4, using the evaluation points infinity, 2,
+   1, -1, 1/2, -1/2. More precisely, we want to compute
    f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
    seven values
 
      w0 = f(0),
-     w1 = f(-2),
-     w2 = f(1),
+     w1 = 64 f(-1/2),
+     w2 = 64 f(1/2),
      w3 = f(-1),
-     w4 = f(2)
-     w5 = 64 * f(1/2)
+     w4 = f(1)
+     w5 = f(2)
      w6 = limit at infinity of f(x) / x^6,
 
    The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
    w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
    w6n }. The other values are 2n + 1 limbs each (with most
    significant limbs small). f(-1) and f(-1/2) may be negative, signs
-   determined by the flag bits. Inputs are destroyed.
+   determined by the flag bits. All intermediate results are
+   represented in two's complement. Inputs are destroyed.
 
    Needs (2*n + 1) limbs of temporary storage.
 */
 
 void
-mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom4_flags flags,
 			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
 			   mp_size_t w6n, mp_ptr tp)
 {
-  mp_size_t m;
+  mp_size_t m = 2*n + 1;
+  mp_ptr w2 = rp + 2*n;
+  mp_ptr w6 = rp + 6*n;
   mp_limb_t cy;
 
-  m = 2*n + 1;
-#define w0 rp
-#define w2 (rp + 2*n)
-#define w6 (rp + 6*n)
-
   ASSERT (w6n > 0);
   ASSERT (w6n <= 2*n);
 
-  /* Using formulas similar to Marco Bodrato's
+  /* Using Marco Bodrato's formulas
 
-     W5 = W5 + W4
-     W1 =(W4 - W1)/2
-     W4 = W4 - W0
-     W4 =(W4 - W1)/4 - W6*16
-     W3 =(W2 - W3)/2
-     W2 = W2 - W3
+     W5 = W5 + W2
+     W3 =(W3 + W4)/2
+     W1 = W1 + W2
+     W2 = W2 - W6 - W0*64
+     W2 =(W2*2 - W1)/8
+     W4 = W4 - W3
 
-     W5 = W5 - W2*65      May be negative.
-     W2 = W2 - W6 - W0
-     W5 =(W5 + W2*45)/2   Now >= 0 again.
-     W4 =(W4 - W2)/3
-     W2 = W2 - W4
+     W5 = W5 - W4*65
+     W4 = W4 - W6 - W0
+     W5 = W5 + W4*45
+     W2 =(W2 - W4)/3
+     W4 = W4 - W2
 
-     W1 = W5 - W1         May be negative.
-     W5 =(W5 - W3*8)/9
+     W1 = W1 - W5
+     W5 =(W5 - W3*16)/ 18
      W3 = W3 - W5
-     W1 =(W1/15 + W5)/2   Now >= 0 again.
+     W1 =(W1/30 + W5)/ 2
      W5 = W5 - W1
 
-     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
-	   W4 = f(2), W5 = f(1/2), W6 = f(oo),
-
-     Note that most intermediate results are positive; the ones that
-     may be negative are represented in two's complement. We must
-     never shift right a value that may be negative, since that would
-     invalidate the sign bit. On the other hand, divexact by odd
-     numbers work fine with two's complement.
+     where W0 = f(0), W1 = 64 f(-1/2), W2 = 64 f(1/2), W3 = f(-1),
+	   W4 = f(1), W5 = f(2), W6 = f(oo),
   */
 
-  mpn_add_n (w5, w5, w4, m);
-  if (flags & toom7_w1_neg)
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (w1, w1, w4, m);
-#else
-      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
-      mpn_rshift (w1, w1, m, 1);
-#endif
-    }
+  mpn_add_n (w5, w5, w2, m);
+  if (flags & toom4_w3_neg)
+    mpn_add_n (w3, w3, w4, m);
   else
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (w1, w4, w1, m);
-#else
-      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
-      mpn_rshift (w1, w1, m, 1);
-#endif
-    }
-  mpn_sub (w4, w4, m, w0, 2*n);
-  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
-  mpn_rshift (w4, w4, m, 2); /* w4>=0 */
-
-  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
-  mpn_sub (w4, w4, m, tp, w6n+1);
-
-  if (flags & toom7_w3_neg)
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (w3, w3, w2, m);
-#else
-      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
-      mpn_rshift (w3, w3, m, 1);
-#endif
-    }
+    mpn_sub_n (w3, w4, w3, m);
+  divexact_2exp (w3, w3, m, 1);
+  if (flags & toom4_w1_neg)
+    mpn_add_n (w1, w1, w2, m);
   else
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (w3, w2, w3, m);
-#else
-      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
-      mpn_rshift (w3, w3, m, 1);
-#endif
-    }
-
-  mpn_sub_n (w2, w2, w3, m);
-
-  mpn_submul_1 (w5, w2, m, 65);
+    mpn_sub_n (w1, w2, w1, m);
   mpn_sub (w2, w2, m, w6, w6n);
-  mpn_sub (w2, w2, m, w0, 2*n);
-
-  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
-  mpn_rshift (w5, w5, m, 1);
-  mpn_sub_n (w4, w4, w2, m);
-
-  mpn_divexact_by3 (w4, w4, m);
+  tp[2*n] = mpn_lshift (tp, rp, 2*n, 6);
+  mpn_sub_n (w2, w2, tp, m);
+  mpn_lshift (w2, w2, m, 1);
+  mpn_sub_n (w2, w2, w1, m);
+  divexact_2exp (w2, w2, m, 3);
+  mpn_sub_n (w4, w4, w3, m);
+
+  mpn_submul_1 (w5, w4, m, 65);
+  mpn_sub (w4, w4, m, w6, w6n);
+  mpn_sub (w4, w4, m, rp, 2*n);
+  mpn_addmul_1 (w5, w4, m, 45);
   mpn_sub_n (w2, w2, w4, m);
+  /* Rely on divexact working with two's complement */
+  mpn_divexact_by3 (w2, w2, m);
+  mpn_sub_n (w4, w4, w2, m);
 
-  mpn_sub_n (w1, w5, w1, m);
-  mpn_lshift (tp, w3, m, 3);
+  mpn_sub_n (w1, w1, w5, m);
+  mpn_lshift (tp, w3, m, 4);
   mpn_sub_n (w5, w5, tp, m);
+  divexact_2exp (w5, w5, m, 1);
   mpn_divexact_by9 (w5, w5, m);
   mpn_sub_n (w3, w3, w5, m);
-
+  divexact_2exp (w1, w1, m, 1);
   mpn_divexact_by15 (w1, w1, m);
-  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
-  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
+  mpn_add_n (w1, w1, w5, m);
+  divexact_2exp (w1, w1, m, 1);
   mpn_sub_n (w5, w5, w1, m);
 
-  /* These bounds are valid for the 4x4 polynomial product of toom44,
-   * and they are conservative for toom53 and toom62. */
-  ASSERT (w1[2*n] < 2);
-  ASSERT (w2[2*n] < 3);
-  ASSERT (w3[2*n] < 4);
-  ASSERT (w4[2*n] < 3);
-  ASSERT (w5[2*n] < 2);
+  /* Two's complement coefficients must be non-negative at the end of
+     this procedure. */
+  ASSERT ( !(w1[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w2[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w3[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w4[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w5[2*n] & GMP_LIMB_HIGHBIT));
 
   /* Addition chain. Note carries and the 2n'th limbs that need to be
    * added in.
@@ -242,8 +174,8 @@ mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
    *        c7   c6   c5   c4   c3                 Carries to propagate
    */
 
-  cy = mpn_add_n (rp + n, rp + n, w1, m);
-  MPN_INCR_U (w2 + n + 1, n , cy);
+  cy = mpn_add_n (rp + n, rp + n, w1, 2*n);
+  MPN_INCR_U (w2 + n, n + 1, w1[2*n] + cy);
   cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
   MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
   cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
@@ -251,7 +183,10 @@ mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
   cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
   MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
   if (w6n > n + 1)
-    ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1));
+    {
+      mp_limb_t c7 = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
+      MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, c7);
+    }
   else
     {
       ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
diff --git a/gmp/mpn/generic/toom_interpolate_8pts.c b/gmp/mpn/generic/toom_interpolate_8pts.c
deleted file mode 100644
index 9e8808334e..0000000000
--- a/gmp/mpn/generic/toom_interpolate_8pts.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#define BINVERT_3 MODLIMB_INVERSE_3
-
-#define BINVERT_15 \
-  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
-
-#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
-
-#ifndef mpn_divexact_by3
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
-#else
-#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
-#endif
-#endif
-
-#ifndef mpn_divexact_by45
-#if GMP_NUMB_BITS % 12 == 0
-#define mpn_divexact_by45(dst,src,size) \
-  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
-#else
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
-#else
-#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
-#endif
-#endif
-#endif
-
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
-#else
-#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
-#endif
-
-#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift (ws,src,n,s);
-  return __cy + mpn_sub_n (dst,dst,ws,n);
-#endif
-}
-#endif
-
-
-#if HAVE_NATIVE_mpn_subrsh
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
-#else
-/* This is not a correct definition, it assumes no carry */
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
-do {									\
-  mp_limb_t __cy;							\
-  MPN_DECR_U (dst, nd, src[0] >> s);					\
-  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
-  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
-} while (0)
-#endif
-
-/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
-   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
-   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
-   degree 7 (or 6), given the 8 (rsp. 7) values:
-
-     r1 = limit at infinity of f(x) / x^7,
-     r2 = f(4),
-     r3 = f(-4),
-     r4 = f(2),
-     r5 = f(-2),
-     r6 = f(1),
-     r7 = f(-1),
-     r8 = f(0).
-
-   All couples of the form f(n),f(-n) must be already mixed with
-   toom_couple_handling(f(n),...,f(-n),...)
-
-   The result is stored in {pp, spt + 7*n (or 6*n)}.
-   At entry, r8 is stored at {pp, 2n},
-   r5 is stored at {pp + 3n, 3n + 1}.
-
-   The other values are 2n+... limbs each (with most significant limbs small).
-
-   All intermediate results are positive.
-   Inputs are destroyed.
-*/
-
-void
-mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
-			   mp_ptr r3, mp_ptr r7,
-			   mp_size_t spt, mp_ptr ws)
-{
-  mp_limb_signed_t cy;
-  mp_ptr r5, r1;
-  r5 = (pp + 3 * n);			/* 3n+1 */
-  r1 = (pp + 7 * n);			/* spt */
-
-  /******************************* interpolation *****************************/
-
-  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
-  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
-  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
-
-  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
-  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
-  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
-
-  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
-  cy = mpn_sub_n (r7, r7, r1, spt);
-  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
-
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
-  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
-
-  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
-
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
-
-  mpn_divexact_by45 (r3, r3, 3 * n + 1);
-
-  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
-
-  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
-
-  /* last interpolation steps... */
-  /* ... are mixed with recomposition */
-
-  /***************************** recomposition *******************************/
-  /*
-    pp[] prior to operations:
-     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
-
-    summation scheme for remaining operations:
-     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
-     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
-	  ||_H r3|_M r3|_L*r3|
-				  ||_H_r7|_M_r7|_L_r7|
-		      ||-H r3|-M r3|-L*r3|
-				  ||-H*r5|-M_r5|-L_r5|
-  */
-
-  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
-  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
-  if (0 > cy)
-    MPN_DECR_U (r7 + n, 2*n + 1, 1);
-  else
-    MPN_INCR_U (r7 + n, 2*n + 1, cy);
-
-  cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
-  MPN_DECR_U (r7 + 2*n, n + 1, cy);
-
-  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
-  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
-  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
-  if (UNLIKELY(0 > cy))
-    MPN_DECR_U (r5 + n + 1, 2*n, 1);
-  else
-    MPN_INCR_U (r5 + n + 1, 2*n, cy);
-
-  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
-
-  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
-  MPN_INCR_U (r3 + 2*n, n + 1, cy);
-  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
-  if (LIKELY(spt != n))
-    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
-  else
-    ASSERT (r3[3*n] | cy == 0);
-}
diff --git a/gmp/mpn/generic/trialdiv.c b/gmp/mpn/generic/trialdiv.c
deleted file mode 100644
index cad159c3a0..0000000000
--- a/gmp/mpn/generic/trialdiv.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* mpn_trialdiv -- find small factors of an mpn number using trial division.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/*
-   This function finds the first (smallest) factor represented in
-   trialdivtab.h.  It does not stop the factoring effort just because it has
-   reached some sensible limit, such as the square root of the input number.
-
-   The caller can limit the factoring effort by passing NPRIMES.  The function
-   will then divide until that limit, or perhaps a few primes more.  A position
-   which only mpn_trialdiv can make sense of is returned in the WHERE
-   parameter.  It can be used for restarting the factoring effort; the first
-   call should pass 0 here.
-
-   Input:        1. A non-negative number T = {tp,tn}
-                 2. NPRIMES as described above,
-                 3. *WHERE as described above.
-   Output:       1. *WHERE updated as described above.
-                 2. Return value is non-zero if we found a factor, else zero
-                    To get the actual prime factor, compute the mod B inverse
-                    of the return value.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-struct gmp_primes_dtab {
-  mp_limb_t binv;
-  mp_limb_t lim;
-};
-
-struct gmp_primes_ptab {
-  mp_limb_t ppp;	/* primes, multiplied together */
-  mp_limb_t cps[7];	/* ppp values pre-computed for mpn_mod_1s_4p */
-  unsigned int idx:24;	/* index of  first primes in dtab */
-  unsigned int np :8;	/* number of primes related to this entry */
-};
-
-
-static const struct gmp_primes_dtab gmp_primes_dtab[] =
-{
-#define WANT_dtab
-#define P(p,inv,lim) {inv,lim}
-#include "trialdivtab.h"
-#undef WANT_dtab
-#undef P
-  {0,0}
-};
-
-static const struct gmp_primes_ptab gmp_primes_ptab[] =
-{
-#define WANT_ptab
-#include "trialdivtab.h"
-#undef WANT_ptab
-};
-
-#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
-
-/* FIXME: We could optimize out one of the outer loop conditions if we
-   had a final ptab entry with a huge nd field.  */
-mp_limb_t
-mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
-{
-  mp_limb_t ppp;
-  const mp_limb_t *cps;
-  const struct gmp_primes_dtab *dp;
-  long i, j, idx, np;
-  mp_limb_t r, q;
-
-  ASSERT (tn >= 1);
-
-  for (i = *where; i < PTAB_LINES; i++)
-    {
-      ppp = gmp_primes_ptab[i].ppp;
-      cps = gmp_primes_ptab[i].cps;
-
-      r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
-
-      idx = gmp_primes_ptab[i].idx;
-      np = gmp_primes_ptab[i].np;
-
-      /* Check divisibility by individual primes.  */
-      dp = &gmp_primes_dtab[idx] + np;
-      for (j = -np; j < 0; j++)
-	{
-	  q = r * dp[j].binv;
-	  if (q <= dp[j].lim)
-	    {
-	      *where = i;
-	      return dp[j].binv;
-	    }
-	}
-
-      nprimes -= np;
-      if (nprimes <= 0)
-	return 0;
-    }
-  return 0;
-}
diff --git a/gmp/mpn/generic/udiv_w_sdiv.c b/gmp/mpn/generic/udiv_w_sdiv.c
index 7136429f0f..f876aa5734 100644
--- a/gmp/mpn/generic/udiv_w_sdiv.c
+++ b/gmp/mpn/generic/udiv_w_sdiv.c
@@ -9,40 +9,30 @@
    GNU MP RELEASE.
 
 
-Copyright 1992, 1994, 1996, 2000, 2011, 2012 Free Software Foundation, Inc.
+Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 mp_limb_t
-mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
+mpn_udiv_w_sdiv (rp, a1, a0, d)
+     mp_limb_t *rp, a1, a0, d;
 {
   mp_limb_t q, r;
   mp_limb_t c0, c1, b1;
@@ -52,7 +42,7 @@ mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
 
   if ((mp_limb_signed_t) d >= 0)
     {
-      if (a1 < d - a1 - (a0 >> (GMP_LIMB_BITS - 1)))
+      if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
 	{
 	  /* dividend, divisor, and quotient are nonnegative */
 	  sdiv_qrnnd (q, r, a1, a0, d);
@@ -60,18 +50,18 @@ mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
       else
 	{
 	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
-	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (GMP_LIMB_BITS - 1));
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
 	  /* Divide (c1*2^32 + c0) by d */
 	  sdiv_qrnnd (q, r, c1, c0, d);
 	  /* Add 2^31 to quotient */
-	  q += (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+	  q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
 	}
     }
   else
     {
       b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
       c1 = a1 >> 1;			/* A/2 */
-      c0 = (a1 << (GMP_LIMB_BITS - 1)) + (a0 >> 1);
+      c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
 
       if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
 	{
@@ -126,12 +116,12 @@ mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
 	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
 	  if (a0 >= -d)
 	    {
-	      q = -CNST_LIMB(1);
+	      q = -1;
 	      r = a0 + d;
 	    }
 	  else
 	    {
-	      q = -CNST_LIMB(2);
+	      q = -2;
 	      r = a0 + 2*d;
 	    }
 	}
diff --git a/gmp/mpn/generic/zero.c b/gmp/mpn/generic/zero.c
deleted file mode 100644
index e6e7fd3101..0000000000
--- a/gmp/mpn/generic/zero.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* mpn_zero
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_zero (mp_ptr rp, mp_size_t n)
-{
-  mp_size_t i;
-
-  rp += n;
-  for (i = -n; i != 0; i++)
-    rp[i] = 0;
-}
diff --git a/gmp/mpn/i960/README b/gmp/mpn/i960/README
new file mode 100644
index 0000000000..d68a0a83eb
--- /dev/null
+++ b/gmp/mpn/i960/README
@@ -0,0 +1,9 @@
+This directory contains mpn functions for Intel i960 processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The code in this directory is not well optimized.
+
+STATUS
+
+The code in this directory has not been tested.
diff --git a/gmp/mpn/i960/add_n.s b/gmp/mpn/i960/add_n.s
new file mode 100644
index 0000000000..24abc6b0c9
--- /dev/null
+++ b/gmp/mpn/i960/add_n.s
@@ -0,0 +1,41 @@
+# I960 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align 4
+	.globl ___gmpn_add_n
+___gmpn_add_n:
+	mov	0,g6		# clear carry-save register
+	cmpo	1,0		# clear cy
+
+Loop:	subo	1,g3,g3		# update loop counter
+	ld	(g1),g5		# load from s1_ptr
+	addo	4,g1,g1		# s1_ptr++
+	ld	(g2),g4		# load from s2_ptr
+	addo	4,g2,g2		# s2_ptr++
+	cmpo	g6,1		# restore cy from g6, relies on cy being 0
+	addc	g4,g5,g4	# main add
+	subc	0,0,g6		# save cy in g6
+	st	g4,(g0)		# store result to res_ptr
+	addo	4,g0,g0		# res_ptr++
+	cmpobne	0,g3,Loop	# when branch is taken, clears C bit
+
+	mov	g6,g0
+	ret
diff --git a/gmp/mpn/i960/addmul_1.s b/gmp/mpn/i960/addmul_1.s
new file mode 100644
index 0000000000..984f540f7b
--- /dev/null
+++ b/gmp/mpn/i960/addmul_1.s
@@ -0,0 +1,46 @@
+# I960 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	4
+	.globl	___gmpn_mul_1
+___gmpn_mul_1:
+	subo	g2,0,g2
+	shlo	2,g2,g4
+	subo	g4,g1,g1
+	subo	g4,g0,g13
+	mov	0,g0
+
+	cmpo	1,0		# clear C bit on AC.cc
+
+Loop:	ld	(g1)[g2*4],g5
+	emul	g3,g5,g6
+	ld	(g13)[g2*4],g5
+
+	addc	g0,g6,g6	# relies on that C bit is clear
+	addc	0,g7,g7
+	addc	g5,g6,g6	# relies on that C bit is clear
+	st	g6,(g13)[g2*4]
+	addc	0,g7,g0
+
+	addo	g2,1,g2
+	cmpobne	0,g2,Loop	# when branch is taken, clears C bit
+
+	ret
diff --git a/gmp/mpn/i960/mul_1.s b/gmp/mpn/i960/mul_1.s
new file mode 100644
index 0000000000..7912aa1fb7
--- /dev/null
+++ b/gmp/mpn/i960/mul_1.s
@@ -0,0 +1,43 @@
+# I960 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	4
+	.globl	___gmpn_mul_1
+___gmpn_mul_1:
+	subo	g2,0,g2
+	shlo	2,g2,g4
+	subo	g4,g1,g1
+	subo	g4,g0,g13
+	mov	0,g0
+
+	cmpo	1,0		# clear C bit on AC.cc
+
+Loop:	ld	(g1)[g2*4],g5
+	emul	g3,g5,g6
+
+	addc	g0,g6,g6	# relies on that C bit is clear
+	st	g6,(g13)[g2*4]
+	addc	0,g7,g0
+
+	addo	g2,1,g2
+	cmpobne	0,g2,Loop	# when branch is taken, clears C bit
+
+	ret
diff --git a/gmp/mpn/i960/sub_n.s b/gmp/mpn/i960/sub_n.s
new file mode 100644
index 0000000000..87adcbf6a2
--- /dev/null
+++ b/gmp/mpn/i960/sub_n.s
@@ -0,0 +1,41 @@
+# I960 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align 4
+	.globl ___gmpn_sub_n
+___gmpn_sub_n:
+	mov	1,g6		# set carry-save register
+	cmpo	1,0		# clear cy
+
+Loop:	subo	1,g3,g3		# update loop counter
+	ld	(g1),g5		# load from s1_ptr
+	addo	4,g1,g1		# s1_ptr++
+	ld	(g2),g4		# load from s2_ptr
+	addo	4,g2,g2		# s2_ptr++
+	cmpo	g6,1		# restore cy from g6, relies on cy being 0
+	subc	g4,g5,g4	# main subtract
+	subc	0,0,g6		# save cy in g6
+	st	g4,(g0)		# store result to res_ptr
+	addo	4,g0,g0		# res_ptr++
+	cmpobne	0,g3,Loop	# when branch is taken, cy will be 0
+
+	mov	g6,g0
+	ret
diff --git a/gmp/mpn/ia64/README b/gmp/mpn/ia64/README
index 45c2d6337f..9252271ab7 100644
--- a/gmp/mpn/ia64/README
+++ b/gmp/mpn/ia64/README
@@ -1,30 +1,19 @@
-Copyright 2000-2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -166,7 +155,7 @@ ldfp8 with all alignment headache that implies.
 ================================================================
 mpn_addmul_N
 
-For best speed, we need to give up using mpn_addmul_2 as the main multiply
+For best speed, we need to give up using mpn_addmul_1 as the main multiply
 building block, and instead take multiple v limbs per loop.  For the Itanium
 1, we need to take about 8 limbs at a time for full speed.  For the Itanium
 2, something like mpn_addmul_4 should be enough.
@@ -248,7 +237,7 @@ loops, since that will allow us to do better load-use scheduling without too
 much unrolling.
 
 Depending on size or operand alignment, we get 1 c/l or 0.5 c/l on Itanium
-2, according to tune/speed.  Cache bank conflicts?
+2, according to tests/devel/try.  Cache bank conflicts?
 
 
 
@@ -279,3 +268,10 @@ Optimization, Intel document 251110-003, May 2004.
 All the above documents can be found online at
 
     http://developer.intel.com/design/itanium/manuals.htm
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/gmp/mpn/ia64/add_n_sub_n.asm b/gmp/mpn/ia64/add_n_sub_n.asm
deleted file mode 100644
index 34a506568f..0000000000
--- a/gmp/mpn/ia64/add_n_sub_n.asm
+++ /dev/null
@@ -1,309 +0,0 @@
-dnl  IA-64 mpn_add_n_sub_n -- mpn parallel addition and subtraction.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      ?
-C Itanium 2:    2.25
-
-C INPUT PARAMETERS
-define(`sp', `r32')
-define(`dp', `r33')
-define(`up', `r34')
-define(`vp', `r35')
-define(`n',  `r36')
-
-C Some useful aliases for registers we use
-define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
-define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')
-define(`s0',`r24') define(`s1',`r25') define(`s2',`r26') define(`s3',`r27')
-define(`d0',`r28') define(`d1',`r29') define(`d2',`r30') define(`d3',`r31')
-define(`up0',`up')
-define(`up1',`r14')
-define(`vp0',`vp')
-define(`vp1',`r15')
-
-define(`cmpltu',  `cmp.ltu')
-define(`cmpeqor', `cmp.eq.or')
-
-ASM_START()
-PROLOGUE(mpn_add_n_sub_n)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',`
-	addp4	sp = 0, sp		C				M I
-	addp4	dp = 0, dp		C				M I
-	nop.i	0
-	addp4	up = 0, up		C				M I
-	addp4	vp = 0, vp		C				M I
-	zxt4	n = n			C				I
-	;;
-')
-
-	and	r9 = 3, n		C				M I
-	mov.i	r2 = ar.lc		C				I0
-	add	up1 = 8, up0		C				M I
-	add	vp1 = 8, vp0		C				M I
-	add	r8 = -2, n		C				M I
-	add	r10 = 256, up		C				M I
-	;;
-	shr.u	r8 = r8, 2		C				I0
-	cmp.eq	p10, p0 = 0, r9		C				M I
-	cmp.eq	p11, p0 = 2, r9		C				M I
-	cmp.eq	p12, p0 = 3, r9		C				M I
-	add	r11 = 256, vp		C				M I
-	;;
-	mov.i	ar.lc = r8		C				I0
-  (p10)	br	L(b0)			C				B
-  (p11)	br	L(b2)			C				B
-  (p12)	br	L(b3)			C				B
-
-L(b1):	ld8	u3 = [up0], 8		C				M01
-	add	up1 = 8, up1		C				M I
-	cmpltu	p14, p15 = 4, n		C				M I
-	ld8	v3 = [vp0], 8		C				M01
-	add	vp1 = 8, vp1		C				M I
-	;;
-	add	s3 = u3, v3		C				M I
-	sub	d3 = u3, v3		C				M I
-	mov	r8 = 0			C				M I
-	;;
-	cmpltu	p9, p0 = s3, v3		C  carry from add3		M I
-	cmpltu	p13, p0 = u3, v3	C borrow from sub3		M I
-  (p15)	br	L(cj1)			C				B
-	st8	[sp] = s3, 8		C				M23
-	st8	[dp] = d3, 8		C				M23
-	br	L(c0)			C				B
-
-L(b0):	cmp.ne	p9, p0 = r0, r0		C				M I
-	cmp.ne	p13, p0 = r0, r0	C				M I
-L(c0):	ld8	u0 = [up0], 16		C				M01
-	ld8	u1 = [up1], 16		C				M01
-	;;
-	ld8	v0 = [vp0], 16		C				M01
-	ld8	v1 = [vp1], 16		C				M01
-	;;
-	ld8	u2 = [up0], 16		C				M01
-	ld8	u3 = [up1], 16		C				M01
-	;;
-	ld8	v2 = [vp0], 16		C				M01
-	ld8	v3 = [vp1], 16		C				M01
-	;;
-	add	s0 = u0, v0		C				M I
-	add	s1 = u1, v1		C				M I
-	sub	d0 = u0, v0		C				M I
-	sub	d1 = u1, v1		C				M I
-	;;
-	cmpltu	p6, p0 = s0, v0		C  carry from add0		M I
-	cmpltu	p7, p0 = s1, v1		C  carry from add1		M I
-	cmpltu	p10, p0 = u0, v0	C borrow from sub0		M I
-	cmpltu	p11, p0 = u1, v1	C borrow from sub1		M I
-	;;
-	nop	0			C
-	br.cloop.dptk	L(top)		C				B
-	br	L(end)			C				B
-
-L(b3):	ld8	u1 = [up0], 8		C				M01
-	add	up1 = 8, up1		C				M I
-	ld8	v1 = [vp0], 8		C				M01
-	;;
-	add	vp1 = 8, vp1		C				M I
-	add	s1 = u1, v1		C				M I
-	sub	d1 = u1, v1		C				M I
-	;;
-	cmpltu	p7, p0 = s1, v1		C  carry from add1		M I
-	cmpltu	p11, p0 = u1, v1	C borrow from sub1		M I
-	;;
-	st8	[sp] = s1, 8		C				M23
-	st8	[dp] = d1, 8		C				M23
-	br	L(c2)			C				B
-
-	ALIGN(32)
-L(b2):	cmp.ne	p7, p0 = r0, r0		C				M I
-	cmp.ne	p11, p0 = r0, r0	C				M I
-	nop	0
-L(c2):	ld8	u2 = [up0], 16		C				M01
-	ld8	u3 = [up1], 16		C				M01
-	cmpltu	p14, p0 = 4, n		C				M I
-	;;
-	ld8	v2 = [vp0], 16		C				M01
-	ld8	v3 = [vp1], 16		C				M01
-  (p14)	br	L(gt4)			C				B
-	;;
-	add	s2 = u2, v2		C				M I
-	add	s3 = u3, v3		C				M I
-	sub	d2 = u2, v2		C				M I
-	sub	d3 = u3, v3		C				M I
-	;;
-	cmpltu	p8, p0 = s2, v2		C  carry from add0		M I
-	cmpltu	p9, p0 = s3, v3		C  carry from add3		M I
-	cmpltu	p12, p0 = u2, v2	C borrow from sub2		M I
-	cmpltu	p13, p0 = u3, v3	C borrow from sub3		M I
-	br	L(cj2)			C				B
-	;;
-L(gt4):	ld8	u0 = [up0], 16		C				M01
-	ld8	u1 = [up1], 16		C				M01
-	;;
-	ld8	v0 = [vp0], 16		C				M01
-	ld8	v1 = [vp1], 16		C				M01
-	;;
-	add	s2 = u2, v2		C				M I
-	add	s3 = u3, v3		C				M I
-	sub	d2 = u2, v2		C				M I
-	sub	d3 = u3, v3		C				M I
-	;;
-	cmpltu	p8, p0 = s2, v2		C  carry from add0		M I
-	cmpltu	p9, p0 = s3, v3		C  carry from add1		M I
-	cmpltu	p12, p0 = u2, v2	C borrow from sub0		M I
-	cmpltu	p13, p0 = u3, v3	C borrow from sub1		M I
-	br.cloop.dptk	L(mid)		C				B
-
-	ALIGN(32)
-L(top):
-	ld8	u0 = [up0], 16		C				M01
-	ld8	u1 = [up1], 16		C				M01
-   (p9)	cmpeqor	p6, p0 = -1, s0		C				M I
-   (p9)	add	s0 = 1, s0		C				M I
-  (p13)	cmpeqor	p10, p0 = 0, d0		C				M I
-  (p13)	add	d0 = -1, d0		C				M I
-	;;
-	ld8	v0 = [vp0], 16		C				M01
-	ld8	v1 = [vp1], 16		C				M01
-   (p6)	cmpeqor	p7, p0 = -1, s1		C				M I
-   (p6)	add	s1 = 1, s1		C				M I
-  (p10)	cmpeqor	p11, p0 = 0, d1		C				M I
-  (p10)	add	d1 = -1, d1		C				M I
-	;;
-	st8	[sp] = s0, 8		C				M23
-	st8	[dp] = d0, 8		C				M23
-	add	s2 = u2, v2		C				M I
-	add	s3 = u3, v3		C				M I
-	sub	d2 = u2, v2		C				M I
-	sub	d3 = u3, v3		C				M I
-	;;
-	st8	[sp] = s1, 8		C				M23
-	st8	[dp] = d1, 8		C				M23
-	cmpltu	p8, p0 = s2, v2		C  carry from add2		M I
-	cmpltu	p9, p0 = s3, v3		C  carry from add3		M I
-	cmpltu	p12, p0 = u2, v2	C borrow from sub2		M I
-	cmpltu	p13, p0 = u3, v3	C borrow from sub3		M I
-	;;
-L(mid):
-	ld8	u2 = [up0], 16		C				M01
-	ld8	u3 = [up1], 16		C				M01
-   (p7)	cmpeqor	p8, p0 = -1, s2		C				M I
-   (p7)	add	s2 = 1, s2		C				M I
-  (p11)	cmpeqor	p12, p0 = 0, d2		C				M I
-  (p11)	add	d2 = -1, d2		C				M I
-	;;
-	ld8	v2 = [vp0], 16		C				M01
-	ld8	v3 = [vp1], 16		C				M01
-   (p8)	cmpeqor	p9, p0 = -1, s3		C				M I
-   (p8)	add	s3 = 1, s3		C				M I
-  (p12)	cmpeqor	p13, p0 = 0, d3		C				M I
-  (p12)	add	d3 = -1, d3		C				M I
-	;;
-	st8	[sp] = s2, 8		C				M23
-	st8	[dp] = d2, 8		C				M23
-	add	s0 = u0, v0		C				M I
-	add	s1 = u1, v1		C				M I
-	sub	d0 = u0, v0		C				M I
-	sub	d1 = u1, v1		C				M I
-	;;
-	st8	[sp] = s3, 8		C				M23
-	st8	[dp] = d3, 8		C				M23
-	cmpltu	p6, p0 = s0, v0		C  carry from add0		M I
-	cmpltu	p7, p0 = s1, v1		C  carry from add1		M I
-	cmpltu	p10, p0 = u0, v0	C borrow from sub0		M I
-	cmpltu	p11, p0 = u1, v1	C borrow from sub1		M I
-	;;
-	lfetch	[r10], 32		C				M?
-	lfetch	[r11], 32		C				M?
-	br.cloop.dptk	L(top)		C				B
-	;;
-
-L(end):
-	nop	0
-	nop	0
-   (p9)	cmpeqor	p6, p0 = -1, s0		C				M I
-   (p9)	add	s0 = 1, s0		C				M I
-  (p13)	cmpeqor	p10, p0 = 0, d0		C				M I
-  (p13)	add	d0 = -1, d0		C				M I
-	;;
-	nop	0
-	nop	0
-   (p6)	cmpeqor	p7, p0 = -1, s1		C				M I
-   (p6)	add	s1 = 1, s1		C				M I
-  (p10)	cmpeqor	p11, p0 = 0, d1		C				M I
-  (p10)	add	d1 = -1, d1		C				M I
-	;;
-	st8	[sp] = s0, 8		C				M23
-	st8	[dp] = d0, 8		C				M23
-	add	s2 = u2, v2		C				M I
-	add	s3 = u3, v3		C				M I
-	sub	d2 = u2, v2		C				M I
-	sub	d3 = u3, v3		C				M I
-	;;
-	st8	[sp] = s1, 8		C				M23
-	st8	[dp] = d1, 8		C				M23
-	cmpltu	p8, p0 = s2, v2		C  carry from add2		M I
-	cmpltu	p9, p0 = s3, v3		C  carry from add3		M I
-	cmpltu	p12, p0 = u2, v2	C borrow from sub2		M I
-	cmpltu	p13, p0 = u3, v3	C borrow from sub3		M I
-	;;
-L(cj2):
-   (p7)	cmpeqor	p8, p0 = -1, s2		C				M I
-   (p7)	add	s2 = 1, s2		C				M I
-  (p11)	cmpeqor	p12, p0 = 0, d2		C				M I
-  (p11)	add	d2 = -1, d2		C				M I
-	mov	r8 = 0			C				M I
-	nop	0
-	;;
-	st8	[sp] = s2, 8		C				M23
-	st8	[dp] = d2, 8		C				M23
-   (p8)	cmpeqor	p9, p0 = -1, s3		C				M I
-   (p8)	add	s3 = 1, s3		C				M I
-  (p12)	cmpeqor	p13, p0 = 0, d3		C				M I
-  (p12)	add	d3 = -1, d3		C				M I
-	;;
-L(cj1):
-   (p9)	mov	r8 = 2			C				M I
-	;;
-	mov.i	ar.lc = r2		C				I0
-  (p13)	add	r8 = 1, r8		C				M I
-	st8	[sp] = s3		C				M23
-	st8	[dp] = d3		C				M23
-	br.ret.sptk.many b0		C				B
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/ia64/addmul_1.asm b/gmp/mpn/ia64/addmul_1.asm
index ffa3297763..6cd9d2b755 100644
--- a/gmp/mpn/ia64/addmul_1.asm
+++ b/gmp/mpn/ia64/addmul_1.asm
@@ -1,35 +1,23 @@
 dnl  IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
 dnl  result to a second limb vector.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2000-2005, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ia64/addmul_2.asm b/gmp/mpn/ia64/addmul_2.asm
index f5bc46b75d..2c258022ae 100644
--- a/gmp/mpn/ia64/addmul_2.asm
+++ b/gmp/mpn/ia64/addmul_2.asm
@@ -1,35 +1,22 @@
 dnl  IA-64 mpn_addmul_2 -- Multiply a n-limb number with a 2-limb number and
 dnl  add the result to a (n+1)-limb number.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2004, 2005, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -37,11 +24,16 @@ C         cycles/limb
 C Itanium:    3.65
 C Itanium 2:  1.625
 
+C Note that this is very similar to mul_2.asm.  If you change this file,
+C please change that file too.
+
 C TODO
 C  * Clean up variable names, and try to decrease the number of distinct
 C    registers used.
-C  * Clean up feed-in code to not require zeroing several registers.
-C  * Make sure we don't depend on uninitialised predicate registers.
+C  * Cleanup feed-in code to not require zeroing several registers.
+C  * Make sure we don't depend on uninitialized predicate registers.
+C  * We currently cross-jump very aggressively, at the expense of a few cycles
+C    per operation.  Consider changing that.
 C  * Could perhaps save a few cycles by using 1 c/l carry propagation in
 C    wind-down code.
 C  * Ultimately rewrite.  The problem with this code is that it first uses a
@@ -102,607 +94,564 @@ define(`ry',`f50')
 define(`uy',`f51')
 
 ASM_START()
-PROLOGUE(mpn_addmul_2s)
-	.prologue
-	.save	ar.lc, r2
-	.body
-
-ifdef(`HAVE_ABI_32',`
-.mmi;		addp4	rp = 0, rp		C			M I
-		addp4	up = 0, up		C			M I
-		addp4	vp = 0, vp		C			M I
-.mmi;		nop	1
-		nop	1
-		zxt4	n = n			C			I
-	;;')
-
-.mmi;		ldf8	ux = [up], 8		C			M
-		ldf8	v0 = [vp], 8		C			M
-		mov	r2 = ar.lc		C			I0
-.mmi;		ldf8	rx = [rp], 8		C			M
-		and	r14 = 3, n		C			M I
-		add	n = -2, n		C			M I
-	;;
-.mmi;		ldf8	uy = [up], 8		C			M
-		ldf8	v1 = [vp]		C			M
-		shr.u	n = n, 2		C			I0
-.mmi;		ldf8	ry = [rp], -8		C			M
-		cmp.eq	p14, p0 = 1, r14	C			M I
-		cmp.eq	p11, p0 = 2, r14	C			M I
-	;;
-.mmi;		add	srp = 16, rp		C			M I
-		cmp.eq	p15, p0 = 3, r14	C			M I
-		mov	ar.lc = n		C			I0
-.bbb;	(p14)	br.dptk	L(x01)			C			B
-	(p11)	br.dptk	L(x10)			C			B
-	(p15)	br.dptk	L(x11)			C			B
-	;;
-
-L(x00):		cmp.ne	p6, p0 = r0, r0		C suppress initial xma pair
-		mov	fp2a_3 = f0
-		br	L(b00)
-L(x01):		cmp.ne	p14, p0 = r0, r0	C suppress initial xma pair
-		mov	fp2a_2 = f0
-		br	L(b01)
-L(x10):		cmp.ne	p11, p0 = r0, r0	C suppress initial xma pair
-		mov	fp2a_1 = f0
-		br	L(b10)
-L(x11):		cmp.ne	p15, p0 = r0, r0	C suppress initial xma pair
-		mov	fp2a_0 = f0
-		br	L(b11)
-
-EPILOGUE()
-
 PROLOGUE(mpn_addmul_2)
 	.prologue
 	.save	ar.lc, r2
 	.body
 
-ifdef(`HAVE_ABI_32',`
-.mmi;		addp4	rp = 0, rp		C			M I
-		addp4	up = 0, up		C			M I
-		addp4	vp = 0, vp		C			M I
-.mmi;		nop	1
-		nop	1
-		zxt4	n = n			C			I
+ifdef(`HAVE_ABI_32',
+`	addp4		rp = 0, rp		C			M I
+	addp4		up = 0, up		C			M I
+	addp4		vp = 0, vp		C			M I
+	zxt4		n = n			C			I
 	;;')
 
-.mmi;		ldf8	ux = [up], 8		C			M
-		ldf8	v0 = [vp], 8		C			M
-		mov	r2 = ar.lc		C			I0
-.mmi;		ldf8	rx = [rp], 8		C			M
-		and	r14 = 3, n		C			M I
-		add	n = -2, n		C			M I
-	;;
-.mmi;		ldf8	uy = [up], 8		C			M
-		ldf8	v1 = [vp]		C			M
-		shr.u	n = n, 2		C			I0
-.mmi;		ldf8	ry = [rp], -8		C			M
-		cmp.eq	p14, p0 = 1, r14	C			M I
-		cmp.eq	p11, p0 = 2, r14	C			M I
-	;;
-.mmi;		add	srp = 16, rp		C			M I
-		cmp.eq	p15, p6 = 3, r14	C			M I
-		mov	ar.lc = n		C			I0
-.bbb;	(p14)	br.dptk	L(b01)			C			B
-	(p11)	br.dptk	L(b10)			C			B
-	(p15)	br.dptk	L(b11)			C			B
-	;;
+{.mmi		C 00
+	ldf8		ux = [up], 8		C			M
+	ldf8		v0 = [vp], 8		C			M
+	mov.i		r2 = ar.lc		C			I0
+}{.mmi
+	ldf8		rx = [rp], 8		C			M
+	and		r14 = 3, n		C			M I
+	add		n = -2, n		C			M I
+	;;
+}{.mmi		C 01
+	ldf8		uy = [up], 8		C			M
+	ldf8		v1 = [vp]		C			M
+	shr.u		n = n, 2		C			I0
+}{.mmi
+	ldf8		ry = [rp], -8		C			M
+	cmp.eq		p10, p0 = 1, r14	C			M I
+	cmp.eq		p11, p0 = 2, r14	C			M I
+	;;
+}{.mmi		C 02
+	add		srp = 16, rp		C			M I
+	cmp.eq		p12, p0 = 3, r14	C			M I
+	mov.i		ar.lc = n		C			I0
+}{.bbb
+  (p10) br.dptk		.Lb01			C			B
+  (p11) br.dptk		.Lb10			C			B
+  (p12) br.dptk		.Lb11			C			B
+	;;
+}
 
 	ALIGN(32)
-L(b00):
-.mmi;		ldf8	r_1 = [srp], 8
-		ldf8	u_1 = [up], 8
-		mov	acc1_2 = 0
-.mmi;		mov	pr1_2 = 0
-		mov	pr0_3 = 0
-		cmp.ne	p8, p9 = r0, r0
-	;;
-.mfi;		ldf8	r_2 = [srp], 8
-		xma.l	fp0b_3 = ux, v0, rx
-		cmp.ne	p12, p13 = r0, r0
-.mfb;		ldf8	u_2 = [up], 8
-		xma.hu	fp1b_3 = ux, v0, rx
-		br.cloop.dptk	L(gt4)
-
-		xma.l	fp0b_0 = uy, v0, ry
-		xma.hu	fp1a_0 = uy, v0, ry
-	;;
-		getfsig	acc0 = fp0b_3
-	(p6)	xma.hu	fp2a_3 = ux, v1, fp1b_3		C suppressed for addmul_2s
-	(p6)	xma.l	fp1b_3 = ux, v1, fp1b_3		C suppressed for addmul_2s
-	;;
-		xma.l	fp0b_1 = u_1, v0, r_1
-		xma.hu	fp1a_1 = u_1, v0, r_1
-	;;
-		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = uy, v1, fp1a_0
-		xma.hu	fp2a_0 = uy, v1, fp1a_0
-	;;
-		getfsig	pr1_3 = fp1b_3
-		getfsig	acc1_3 = fp2a_3
-		xma.l	fp0b_2 = u_2, v0, r_2
-		xma.hu	fp1a_2 = u_2, v0, r_2
-		br	L(cj4)
-
-L(gt4):		xma.l	fp0b_0 = uy, v0, ry
-		xma.hu	fp1a_0 = uy, v0, ry
-	;;
-		ldf8	r_3 = [srp], 8
-		getfsig	acc0 = fp0b_3
-	(p6)	xma.hu	fp2a_3 = ux, v1, fp1b_3		C suppressed for addmul_2s
-		ldf8	u_3 = [up], 8
-	(p6)	xma.l	fp1b_3 = ux, v1, fp1b_3		C suppressed for addmul_2s
-	;;
-		xma.l	fp0b_1 = u_1, v0, r_1
-		xma.hu	fp1a_1 = u_1, v0, r_1
-	;;
-		ldf8	r_0 = [srp], 8
-		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = uy, v1, fp1a_0
-		xma.hu	fp2a_0 = uy, v1, fp1a_0
-	;;
-		ldf8	u_0 = [up], 8
-		getfsig	pr1_3 = fp1b_3
-		xma.l	fp0b_2 = u_2, v0, r_2
-	;;
-		getfsig	acc1_3 = fp2a_3
-		xma.hu	fp1a_2 = u_2, v0, r_2
-		br	L(00)
+.Lb00:	ldf8		r_1 = [srp], 8
+	ldf8		u_1 = [up], 8
+	mov		acc1_2 = 0
+	mov		pr1_2 = 0
+	mov		pr0_3 = 0
+	cmp.ne		p8, p9 = r0, r0
+	;;
+	ldf8		r_2 = [srp], 8
+	xma.l		fp0b_3 = ux, v0, rx
+	cmp.ne		p12, p13 = r0, r0
+	ldf8		u_2 = [up], 8
+	xma.hu		fp1a_3 = ux, v0, rx
+	br.cloop.dptk	.grt4
+
+	xma.l		fp0b_0 = uy, v0, ry
+	xma.hu		fp1a_0 = uy, v0, ry
+	;;
+	getf.sig	acc0 = fp0b_3
+	xma.l		fp1b_3 = ux, v1, fp1a_3
+	xma.hu		fp2a_3 = ux, v1, fp1a_3
+	;;
+	xma.l		fp0b_1 = u_1, v0, r_1
+	xma.hu		fp1a_1 = u_1, v0, r_1
+	;;
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = uy, v1, fp1a_0
+	xma.hu		fp2a_0 = uy, v1, fp1a_0
+	;;
+	getf.sig	pr1_3 = fp1b_3
+	getf.sig	acc1_3 = fp2a_3
+	xma.l		fp0b_2 = u_2, v0, r_2
+	xma.hu		fp1a_2 = u_2, v0, r_2
+	br		.Lcj4
+
+.grt4:	xma.l		fp0b_0 = uy, v0, ry
+	xma.hu		fp1a_0 = uy, v0, ry
+	;;
+	ldf8		r_3 = [srp], 8
+	getf.sig	acc0 = fp0b_3
+	xma.l		fp1b_3 = ux, v1, fp1a_3
+	ldf8		u_3 = [up], 8
+	xma.hu		fp2a_3 = ux, v1, fp1a_3
+	;;
+	xma.l		fp0b_1 = u_1, v0, r_1
+	xma.hu		fp1a_1 = u_1, v0, r_1
+	;;
+	ldf8		r_0 = [srp], 8
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = uy, v1, fp1a_0
+	xma.hu		fp2a_0 = uy, v1, fp1a_0
+	;;
+	ldf8		u_0 = [up], 8
+	getf.sig	pr1_3 = fp1b_3
+	;;
+	getf.sig	acc1_3 = fp2a_3
+	xma.l		fp0b_2 = u_2, v0, r_2
+	xma.hu		fp1a_2 = u_2, v0, r_2
+	br		.LL00
 
 
 	ALIGN(32)
-L(b01):
-.mmi;		ldf8	r_0 = [srp], 8		C M
-		ldf8	u_0 = [up], 8		C M
-		mov	acc1_1 = 0		C M I
-.mmi;		mov	pr1_1 = 0		C M I
-		mov	pr0_2 = 0		C M I
-		cmp.ne	p6, p7 = r0, r0		C M I
-	;;
-.mfi;		ldf8	r_1 = [srp], 8		C M
-		xma.l	fp0b_2 = ux, v0, rx	C F
-		cmp.ne	p10, p11 = r0, r0	C M I
-.mfi;		ldf8	u_1 = [up], 8		C M
-		xma.hu	fp1b_2 = ux, v0, rx	C F
-		nop	1
-	;;
-		xma.l	fp0b_3 = uy, v0, ry	C F
-		xma.hu	fp1a_3 = uy, v0, ry	C F
-	;;
-.mmf;		getfsig	acc0 = fp0b_2		C M
-		ldf8	r_2 = [srp], 8		C M
-	(p14)	xma.hu	fp2a_2 = ux, v1,fp1b_2	C F	suppressed for addmul_2s
-.mfb;		ldf8	u_2 = [up], 8		C M
-	(p14)	xma.l	fp1b_2 = ux, v1,fp1b_2	C F	suppressed for addmul_2s
-		br.cloop.dptk	L(gt5)
-
-		xma.l	fp0b_0 = u_0, v0, r_0	C F
-		xma.hu	fp1a_0 = u_0, v0, r_0	C F
-	;;
-		getfsig	pr0_3 = fp0b_3		C M
-		xma.l	fp1b_3 = uy, v1,fp1a_3	C F
-		xma.hu	fp2a_3 = uy, v1,fp1a_3	C F
-	;;
-		getfsig	pr1_2 = fp1b_2		C M
-		getfsig	acc1_2 = fp2a_2		C M
-		xma.l	fp0b_1 = u_1, v0, r_1	C F
-		xma.hu	fp1a_1 = u_1, v0, r_1	C F
-		br	L(cj5)
-
-L(gt5):		xma.l	fp0b_0 = u_0, v0, r_0
-		xma.hu	fp1a_0 = u_0, v0, r_0
-	;;
-		getfsig	pr0_3 = fp0b_3
-		ldf8	r_3 = [srp], 8
-		xma.l	fp1b_3 = uy, v1, fp1a_3
-		xma.hu	fp2a_3 = uy, v1, fp1a_3
-	;;
-		ldf8	u_3 = [up], 8
-		getfsig	pr1_2 = fp1b_2
-		xma.l	fp0b_1 = u_1, v0, r_1
-	;;
-		getfsig	acc1_2 = fp2a_2
-		xma.hu	fp1a_1 = u_1, v0, r_1
-		br	L(01)
+.Lb01:	ldf8		r_0 = [srp], 8		C M
+	ldf8		u_0 = [up], 8		C M
+	mov		acc1_1 = 0		C M I
+	mov		pr1_1 = 0		C M I
+	mov		pr0_2 = 0		C M I
+	cmp.ne		p6, p7 = r0, r0		C M I
+	;;
+	ldf8		r_1 = [srp], 8		C M
+	xma.l		fp0b_2 = ux, v0, rx	C F
+	cmp.ne		p10, p11 = r0, r0	C M I
+	ldf8		u_1 = [up], 8		C M
+	xma.hu		fp1a_2 = ux, v0, rx	C F
+	;;
+	xma.l		fp0b_3 = uy, v0, ry	C F
+	xma.hu		fp1a_3 = uy, v0, ry	C F
+	;;
+	getf.sig	acc0 = fp0b_2		C M
+	ldf8		r_2 = [srp], 8		C M
+	xma.l		fp1b_2 = ux, v1,fp1a_2	C F
+	xma.hu		fp2a_2 = ux, v1,fp1a_2	C F
+	ldf8		u_2 = [up], 8		C M
+	br.cloop.dptk	.grt5
+
+	xma.l		fp0b_0 = u_0, v0, r_0	C F
+	xma.hu		fp1a_0 = u_0, v0, r_0	C F
+	;;
+	getf.sig	pr0_3 = fp0b_3		C M
+	xma.l		fp1b_3 = uy, v1,fp1a_3	C F
+	xma.hu		fp2a_3 = uy, v1,fp1a_3	C F
+	;;
+	getf.sig	pr1_2 = fp1b_2		C M
+	getf.sig	acc1_2 = fp2a_2		C M
+	xma.l		fp0b_1 = u_1, v0, r_1	C F
+	xma.hu		fp1a_1 = u_1, v0, r_1	C F
+	br		.Lcj5
+
+.grt5:	xma.l		fp0b_0 = u_0, v0, r_0
+	xma.hu		fp1a_0 = u_0, v0, r_0
+	;;
+	getf.sig	pr0_3 = fp0b_3
+	ldf8		r_3 = [srp], 8
+	xma.l		fp1b_3 = uy, v1, fp1a_3
+	xma.hu		fp2a_3 = uy, v1, fp1a_3
+	;;
+	ldf8		u_3 = [up], 8
+	getf.sig	pr1_2 = fp1b_2
+	;;
+	getf.sig	acc1_2 = fp2a_2
+	xma.l		fp0b_1 = u_1, v0, r_1
+	xma.hu		fp1a_1 = u_1, v0, r_1
+	br		.LL01
 
 
 	ALIGN(32)
-L(b10):		br.cloop.dptk	L(gt2)
-		xma.l	fp0b_1 = ux, v0, rx
-		xma.hu	fp1b_1 = ux, v0, rx
-	;;
-		xma.l	fp0b_2 = uy, v0, ry
-		xma.hu	fp1a_2 = uy, v0, ry
-	;;
-		stf8	[rp] = fp0b_1, 8
-	(p11)	xma.hu	fp2a_1 = ux, v1, fp1b_1		C suppressed for addmul_2s
-	(p11)	xma.l	fp1b_1 = ux, v1, fp1b_1		C suppressed for addmul_2s
-	;;
-		getfsig	acc0 = fp0b_2
-		xma.l	fp1b_2 = uy, v1, fp1a_2
-		xma.hu	fp2a_2 = uy, v1, fp1a_2
-	;;
-		getfsig	pr1_1 = fp1b_1
-		getfsig	acc1_1 = fp2a_1
-		mov	ar.lc = r2
-		getfsig	pr1_2 = fp1b_2
-		getfsig	r8 = fp2a_2
-	;;
-		add	s0 = pr1_1, acc0
-	;;
-		st8	[rp] = s0, 8
-		cmp.ltu	p8, p9 = s0, pr1_1
-		sub	r31 = -1, acc1_1
-	;;
-		.pred.rel "mutex", p8, p9
-	(p8)	add	acc0 = pr1_2, acc1_1, 1
-	(p9)	add	acc0 = pr1_2, acc1_1
-	(p8)	cmp.leu	p10, p0 = r31, pr1_2
-	(p9)	cmp.ltu	p10, p0 = r31, pr1_2
-	;;
-		st8	[rp] = acc0, 8
-	(p10)	add	r8 = 1, r8
-		br.ret.sptk.many b0
-
-
-L(gt2):
-.mmi;		ldf8	r_3 = [srp], 8
-		ldf8	u_3 = [up], 8
-		mov	acc1_0 = 0
-	;;
-.mfi;		ldf8	r_0 = [srp], 8
-		xma.l	fp0b_1 = ux, v0, rx
-		mov	pr1_0 = 0
-.mfi;		ldf8	u_0 = [up], 8
-		xma.hu	fp1b_1 = ux, v0, rx
-		mov	pr0_1 = 0
-	;;
-		xma.l	fp0b_2 = uy, v0, ry
-		xma.hu	fp1a_2 = uy, v0, ry
-	;;
-		getfsig	acc0 = fp0b_1
-		ldf8	r_1 = [srp], 8
-	(p11)	xma.hu	fp2a_1 = ux, v1, fp1b_1		C suppressed for addmul_2s
-	(p11)	xma.l	fp1b_1 = ux, v1, fp1b_1		C suppressed for addmul_2s
-	;;
-		ldf8	u_1 = [up], 8
-		xma.l	fp0b_3 = u_3, v0, r_3
-		xma.hu	fp1a_3 = u_3, v0, r_3
-	;;
-		getfsig	pr0_2 = fp0b_2
-		ldf8	r_2 = [srp], 8
-		xma.l	fp1b_2 = uy, v1, fp1a_2
-		xma.hu	fp2a_2 = uy, v1, fp1a_2
-	;;
-		ldf8	u_2 = [up], 8
-		getfsig	pr1_1 = fp1b_1
-	;;
-.mfi;		getfsig	acc1_1 = fp2a_1
-		xma.l	fp0b_0 = u_0, v0, r_0
-		cmp.ne	p8, p9 = r0, r0
-.mfb;		cmp.ne	p12, p13 = r0, r0
-		xma.hu	fp1a_0 = u_0, v0, r_0
-		br.cloop.sptk.clr	L(top)
-		br.many	L(end)
+.Lb10:		C 03
+	br.cloop.dptk	.grt2
+		C 04
+		C 05
+		C 06
+	xma.l		fp0b_1 = ux, v0, rx
+	xma.hu		fp1a_1 = ux, v0, rx
+	;;	C 07
+	xma.l		fp0b_2 = uy, v0, ry
+	xma.hu		fp1a_2 = uy, v0, ry
+	;;	C 08
+		C 09
+		C 10
+	stf8		[rp] = fp0b_1, 8
+	xma.l		fp1b_1 = ux, v1, fp1a_1
+	xma.hu		fp2a_1 = ux, v1, fp1a_1
+	;;	C 11
+	getf.sig	acc0 = fp0b_2
+	xma.l		fp1b_2 = uy, v1, fp1a_2
+	xma.hu		fp2a_2 = uy, v1, fp1a_2
+	;;	C 12
+		C 13
+		C 14
+	getf.sig	pr1_1 = fp1b_1
+		C 15
+	getf.sig	acc1_1 = fp2a_1
+		C 16
+	getf.sig	pr1_2 = fp1b_2
+		C 17
+	getf.sig	r8 = fp2a_2
+	;;	C 18
+		C 19
+	add		s0 = pr1_1, acc0
+	;;	C 20
+	st8		[rp] = s0, 8
+	cmp.ltu		p8, p9 = s0, pr1_1
+	sub		r31 = -1, acc1_1
+	;;	C 21
+	.pred.rel "mutex", p8, p9
+  (p8)	add		acc0 = pr1_2, acc1_1, 1
+  (p9)	add		acc0 = pr1_2, acc1_1
+  (p8)	cmp.leu		p10, p0 = r31, pr1_2
+  (p9)	cmp.ltu		p10, p0 = r31, pr1_2
+	;;	C 22
+	st8		[rp] = acc0, 8
+	mov.i		ar.lc = r2
+  (p10)	add		r8 = 1, r8
+	br.ret.sptk.many b0
+
+
+.grt2:	ldf8		r_3 = [srp], 8
+	ldf8		u_3 = [up], 8
+	mov		acc1_0 = 0
+	;;
+	ldf8		r_0 = [srp], 8
+	xma.l		fp0b_1 = ux, v0, rx
+	mov		pr1_0 = 0
+	ldf8		u_0 = [up], 8
+	xma.hu		fp1a_1 = ux, v0, rx
+	mov		pr0_1 = 0
+	;;
+	xma.l		fp0b_2 = uy, v0, ry
+	xma.hu		fp1a_2 = uy, v0, ry
+	;;
+	getf.sig	acc0 = fp0b_1
+	ldf8		r_1 = [srp], 8
+	xma.l		fp1b_1 = ux, v1, fp1a_1
+	xma.hu		fp2a_1 = ux, v1, fp1a_1
+	;;
+	ldf8		u_1 = [up], 8
+	xma.l		fp0b_3 = u_3, v0, r_3
+	xma.hu		fp1a_3 = u_3, v0, r_3
+	;;
+	getf.sig	pr0_2 = fp0b_2
+	ldf8		r_2 = [srp], 8
+	xma.l		fp1b_2 = uy, v1, fp1a_2
+	xma.hu		fp2a_2 = uy, v1, fp1a_2
+	;;
+	ldf8		u_2 = [up], 8
+	getf.sig	pr1_1 = fp1b_1
+	;;
+	getf.sig	acc1_1 = fp2a_1
+	xma.l		fp0b_0 = u_0, v0, r_0
+	cmp.ne		p8, p9 = r0, r0
+	cmp.ne		p12, p13 = r0, r0
+	xma.hu		fp1a_0 = u_0, v0, r_0
+	br		.LL10
 
 
 	ALIGN(32)
-L(b11):		ldf8	r_2 = [srp], 8
-		mov	pr1_3 = 0
-		mov	pr0_0 = 0
+.Lb11:	mov		acc1_3 = 0
+	mov		pr1_3 = 0
+	mov		pr0_0 = 0
+	cmp.ne		p6, p7 = r0, r0
 	;;
-		ldf8	u_2 = [up], 8
-		mov	acc1_3 = 0
-		br.cloop.dptk	L(gt3)
+	ldf8		r_2 = [srp], 8
+	ldf8		u_2 = [up], 8
+	br.cloop.dptk	.grt3
 	;;
-		cmp.ne	p6, p7 = r0, r0
-		xma.l	fp0b_0 = ux, v0, rx
-		xma.hu	fp1b_0 = ux, v0, rx
+	xma.l		fp0b_0 = ux, v0, rx
+	xma.hu		fp1a_0 = ux, v0, rx
 	;;
-		cmp.ne	p10, p11 = r0, r0
-		xma.l	fp0b_1 = uy, v0, ry
-		xma.hu	fp1a_1 = uy, v0, ry
+	cmp.ne		p10, p11 = r0, r0
+	xma.l		fp0b_1 = uy, v0, ry
+	xma.hu		fp1a_1 = uy, v0, ry
 	;;
-		getfsig	acc0 = fp0b_0
-	(p15)	xma.hu	fp2a_0 = ux, v1, fp1b_0		C suppressed for addmul_2s
-	(p15)	xma.l	fp1b_0 = ux, v1, fp1b_0		C suppressed for addmul_2s
+	getf.sig	acc0 = fp0b_0
+	xma.l		fp1b_0 = ux, v1, fp1a_0
+	xma.hu		fp2a_0 = ux, v1, fp1a_0
 	;;
-		xma.l	fp0b_2 = uy, v1, r_2
-		xma.hu	fp1a_2 = uy, v1, r_2
+	xma.l		fp0b_2 = u_2, v0, r_2
+	xma.hu		fp1a_2 = u_2, v0, r_2
 	;;
-		getfsig	pr0_1 = fp0b_1
-		xma.l	fp1b_1 = u_2, v0, fp1a_1
-		xma.hu	fp2a_1 = u_2, v0, fp1a_1
+	getf.sig	pr0_1 = fp0b_1
+	xma.l		fp1b_1 = uy, v1, fp1a_1
+	xma.hu		fp2a_1 = uy, v1, fp1a_1
 	;;
-		getfsig	pr1_0 = fp1b_0
-		getfsig	acc1_0 = fp2a_0
-		br	L(cj3)
+	getf.sig	pr1_0 = fp1b_0
+	getf.sig	acc1_0 = fp2a_0
+	br		.Lcj3
 
-L(gt3):		ldf8	r_3 = [srp], 8
-		xma.l	fp0b_0 = ux, v0, rx
-		cmp.ne	p10, p11 = r0, r0
-		ldf8	u_3 = [up], 8
-		xma.hu	fp1b_0 = ux, v0, rx
-		cmp.ne	p6, p7 = r0, r0
+.grt3:	ldf8		r_3 = [srp], 8
+	xma.l		fp0b_0 = ux, v0, rx
+	cmp.ne		p10, p11 = r0, r0
+	ldf8		u_3 = [up], 8
+	xma.hu		fp1a_0 = ux, v0, rx
 	;;
-		xma.l	fp0b_1 = uy, v0, ry
-		xma.hu	fp1a_1 = uy, v0, ry
+	xma.l		fp0b_1 = uy, v0, ry
+	xma.hu		fp1a_1 = uy, v0, ry
 	;;
-		getfsig	acc0 = fp0b_0
-		ldf8	r_0 = [srp], 8
-	(p15)	xma.hu	fp2a_0 = ux, v1, fp1b_0		C suppressed for addmul_2s
-		ldf8	u_0 = [up], 8
-	(p15)	xma.l	fp1b_0 = ux, v1, fp1b_0		C suppressed for addmul_2s
+	getf.sig	acc0 = fp0b_0
+	ldf8		r_0 = [srp], 8
+	xma.l		fp1b_0 = ux, v1, fp1a_0
+	ldf8		u_0 = [up], 8
+	xma.hu		fp2a_0 = ux, v1, fp1a_0
 	;;
-		xma.l	fp0b_2 = u_2, v0, r_2
-		xma.hu	fp1a_2 = u_2, v0, r_2
+	xma.l		fp0b_2 = u_2, v0, r_2
+	xma.hu		fp1a_2 = u_2, v0, r_2
 	;;
-		getfsig	pr0_1 = fp0b_1
-		ldf8	r_1 = [srp], 8
-		xma.l	fp1b_1 = uy, v1, fp1a_1
-		xma.hu	fp2a_1 = uy, v1, fp1a_1
+	getf.sig	pr0_1 = fp0b_1
+	ldf8		r_1 = [srp], 8
+	xma.l		fp1b_1 = uy, v1, fp1a_1
+	xma.hu		fp2a_1 = uy, v1, fp1a_1
 	;;
-		ldf8	u_1 = [up], 8
-		getfsig	pr1_0 = fp1b_0
+	ldf8		u_1 = [up], 8
+	getf.sig	pr1_0 = fp1b_0
 	;;
-		getfsig	acc1_0 = fp2a_0
-		xma.l	fp0b_3 = u_3, v0, r_3
-		xma.hu	fp1a_3 = u_3, v0, r_3
-		br	L(11)
+	getf.sig	acc1_0 = fp2a_0
+	xma.l		fp0b_3 = u_3, v0, r_3
+	xma.hu		fp1a_3 = u_3, v0, r_3
+	br		.LL11
 
 
 C *** MAIN LOOP START ***
 	ALIGN(32)
-L(top):						C 00
-		.pred.rel "mutex", p12, p13
-		getfsig	pr0_3 = fp0b_3
-		ldf8	r_3 = [srp], 8
-		xma.l	fp1b_3 = u_3, v1, fp1a_3
-	(p12)	add	s0 = pr1_0, acc0, 1
-	(p13)	add	s0 = pr1_0, acc0
-		xma.hu	fp2a_3 = u_3, v1, fp1a_3
+.Loop:						C 00
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_3 = fp0b_3
+	ldf8		r_3 = [srp], 8
+	xma.l		fp1b_3 = u_3, v1, fp1a_3
+  (p12)	add		s0 = pr1_0, acc0, 1
+  (p13)	add		s0 = pr1_0, acc0
+	xma.hu		fp2a_3 = u_3, v1, fp1a_3
 	;;					C 01
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-		ldf8	u_3 = [up], 8
-		getfsig	pr1_2 = fp1b_2
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
-	(p9)	cmp.ltu	p6, p7 = acc0, pr0_1
-	(p12)	cmp.leu	p10, p11 = s0, pr1_0
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	ldf8		u_3 = [up], 8
+	getf.sig	pr1_2 = fp1b_2
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_1
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_1
+  (p12)	cmp.leu		p10, p11 = s0, pr1_0
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_0
 	;;					C 02
-		.pred.rel "mutex", p6, p7
-		getfsig	acc1_2 = fp2a_2
-		st8	[rp] = s0, 8
-		xma.l	fp0b_1 = u_1, v0, r_1
-	(p6)	add	acc0 = pr0_2, acc1_0, 1
-	(p7)	add	acc0 = pr0_2, acc1_0
-		xma.hu	fp1a_1 = u_1, v0, r_1
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_2 = fp2a_2
+	st8		[rp] = s0, 8
+	xma.l		fp0b_1 = u_1, v0, r_1
+  (p6)	add		acc0 = pr0_2, acc1_0, 1
+  (p7)	add		acc0 = pr0_2, acc1_0
+	xma.hu		fp1a_1 = u_1, v0, r_1
 	;;					C 03
-L(01):
-		.pred.rel "mutex", p10, p11
-		getfsig	pr0_0 = fp0b_0
-		ldf8	r_0 = [srp], 8
-		xma.l	fp1b_0 = u_0, v1, fp1a_0
-	(p10)	add	s0 = pr1_1, acc0, 1
-	(p11)	add	s0 = pr1_1, acc0
-		xma.hu	fp2a_0 = u_0, v1, fp1a_0
+.LL01:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_0 = fp0b_0
+	ldf8		r_0 = [srp], 8
+	xma.l		fp1b_0 = u_0, v1, fp1a_0
+  (p10)	add		s0 = pr1_1, acc0, 1
+  (p11)	add		s0 = pr1_1, acc0
+	xma.hu		fp2a_0 = u_0, v1, fp1a_0
 	;;					C 04
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-		ldf8	u_0 = [up], 8
-		getfsig	pr1_3 = fp1b_3
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
-	(p7)	cmp.ltu	p8, p9 = acc0, pr0_2
-	(p10)	cmp.leu	p12, p13 = s0, pr1_1
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	ldf8		u_0 = [up], 8
+	getf.sig	pr1_3 = fp1b_3
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_2
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_2
+  (p10)	cmp.leu		p12, p13 = s0, pr1_1
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_1
 	;;					C 05
-		.pred.rel "mutex", p8, p9
-		getfsig	acc1_3 = fp2a_3
-		st8	[rp] = s0, 8
-		xma.l	fp0b_2 = u_2, v0, r_2
-	(p8)	add	acc0 = pr0_3, acc1_1, 1
-	(p9)	add	acc0 = pr0_3, acc1_1
-		xma.hu	fp1a_2 = u_2, v0, r_2
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_3 = fp2a_3
+	st8		[rp] = s0, 8
+	xma.l		fp0b_2 = u_2, v0, r_2
+  (p8)	add		acc0 = pr0_3, acc1_1, 1
+  (p9)	add		acc0 = pr0_3, acc1_1
+	xma.hu		fp1a_2 = u_2, v0, r_2
 	;;					C 06
-L(00):
-		.pred.rel "mutex", p12, p13
-		getfsig	pr0_1 = fp0b_1
-		ldf8	r_1 = [srp], 8
-		xma.l	fp1b_1 = u_1, v1, fp1a_1
-	(p12)	add	s0 = pr1_2, acc0, 1
-	(p13)	add	s0 = pr1_2, acc0
-		xma.hu	fp2a_1 = u_1, v1, fp1a_1
+.LL00:
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_1 = fp0b_1
+	ldf8		r_1 = [srp], 8
+	xma.l		fp1b_1 = u_1, v1, fp1a_1
+  (p12)	add		s0 = pr1_2, acc0, 1
+  (p13)	add		s0 = pr1_2, acc0
+	xma.hu		fp2a_1 = u_1, v1, fp1a_1
 	;;					C 07
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-		ldf8	u_1 = [up], 8
-		getfsig	pr1_0 = fp1b_0
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_3
-	(p9)	cmp.ltu	p6, p7 = acc0, pr0_3
-	(p12)	cmp.leu	p10, p11 = s0, pr1_2
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_2
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	ldf8		u_1 = [up], 8
+	getf.sig	pr1_0 = fp1b_0
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_3
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_3
+  (p12)	cmp.leu		p10, p11 = s0, pr1_2
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_2
 	;;					C 08
-		.pred.rel "mutex", p6, p7
-		getfsig	acc1_0 = fp2a_0
-		st8	[rp] = s0, 8
-		xma.l	fp0b_3 = u_3, v0, r_3
-	(p6)	add	acc0 = pr0_0, acc1_2, 1
-	(p7)	add	acc0 = pr0_0, acc1_2
-		xma.hu	fp1a_3 = u_3, v0, r_3
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_0 = fp2a_0
+	st8		[rp] = s0, 8
+	xma.l		fp0b_3 = u_3, v0, r_3
+  (p6)	add		acc0 = pr0_0, acc1_2, 1
+  (p7)	add		acc0 = pr0_0, acc1_2
+	xma.hu		fp1a_3 = u_3, v0, r_3
 	;;					C 09
-L(11):
-		.pred.rel "mutex", p10, p11
-		getfsig	pr0_2 = fp0b_2
-		ldf8	r_2 = [srp], 8
-		xma.l	fp1b_2 = u_2, v1, fp1a_2
-	(p10)	add	s0 = pr1_3, acc0, 1
-	(p11)	add	s0 = pr1_3, acc0
-		xma.hu	fp2a_2 = u_2, v1, fp1a_2
+.LL11:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_2 = fp0b_2
+	ldf8		r_2 = [srp], 8
+	xma.l		fp1b_2 = u_2, v1, fp1a_2
+  (p10)	add		s0 = pr1_3, acc0, 1
+  (p11)	add		s0 = pr1_3, acc0
+	xma.hu		fp2a_2 = u_2, v1, fp1a_2
 	;;					C 10
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-		ldf8	u_2 = [up], 8
-		getfsig	pr1_1 = fp1b_1
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_0
-	(p7)	cmp.ltu	p8, p9 = acc0, pr0_0
-	(p10)	cmp.leu	p12, p13 = s0, pr1_3
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_3
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	ldf8		u_2 = [up], 8
+	getf.sig	pr1_1 = fp1b_1
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_0
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_0
+  (p10)	cmp.leu		p12, p13 = s0, pr1_3
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_3
 	;;					C 11
-		.pred.rel "mutex", p8, p9
-		getfsig	acc1_1 = fp2a_1
-		st8	[rp] = s0, 8
-		xma.l	fp0b_0 = u_0, v0, r_0
-	(p8)	add	acc0 = pr0_1, acc1_3, 1
-	(p9)	add	acc0 = pr0_1, acc1_3
-		xma.hu	fp1a_0 = u_0, v0, r_0
-L(10):		br.cloop.sptk.clr	L(top)			C 12
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_1 = fp2a_1
+	st8		[rp] = s0, 8
+	xma.l		fp0b_0 = u_0, v0, r_0
+  (p8)	add		acc0 = pr0_1, acc1_3, 1
+  (p9)	add		acc0 = pr0_1, acc1_3
+	xma.hu		fp1a_0 = u_0, v0, r_0
+.LL10:	br.cloop.dptk	.Loop			C 12
 	;;
 C *** MAIN LOOP END ***
-L(end):
-		.pred.rel "mutex", p12, p13
-.mfi;		getfsig	pr0_3 = fp0b_3
-		xma.l	fp1b_3 = u_3, v1, fp1a_3
-	(p12)	add	s0 = pr1_0, acc0, 1
-.mfi;	(p13)	add	s0 = pr1_0, acc0
-		xma.hu	fp2a_3 = u_3, v1, fp1a_3
-		nop	1
-	;;
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-.mmi;		getfsig	pr1_2 = fp1b_2
-		st8	[rp] = s0, 8
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
-.mmi;	(p9)	cmp.ltu	p6, p7 = acc0, pr0_1
-	(p12)	cmp.leu	p10, p11 = s0, pr1_0
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
-	;;
-		.pred.rel "mutex", p6, p7
-.mfi;		getfsig	acc1_2 = fp2a_2
-		xma.l	fp0b_1 = u_1, v0, r_1
-		nop	1
-.mmf;	(p6)	add	acc0 = pr0_2, acc1_0, 1
-	(p7)	add	acc0 = pr0_2, acc1_0
-		xma.hu	fp1a_1 = u_1, v0, r_1
-	;;
-L(cj5):
-		.pred.rel "mutex", p10, p11
-.mfi;		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = u_0, v1, fp1a_0
-	(p10)	add	s0 = pr1_1, acc0, 1
-.mfi;	(p11)	add	s0 = pr1_1, acc0
-		xma.hu	fp2a_0 = u_0, v1, fp1a_0
-		nop	1
-	;;
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-.mmi;		getfsig	pr1_3 = fp1b_3
-		st8	[rp] = s0, 8
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
-.mmi;	(p7)	cmp.ltu	p8, p9 = acc0, pr0_2
-	(p10)	cmp.leu	p12, p13 = s0, pr1_1
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
-	;;
-		.pred.rel "mutex", p8, p9
-.mfi;		getfsig	acc1_3 = fp2a_3
-		xma.l	fp0b_2 = u_2, v0, r_2
-		nop	1
-.mmf;	(p8)	add	acc0 = pr0_3, acc1_1, 1
-	(p9)	add	acc0 = pr0_3, acc1_1
-		xma.hu	fp1a_2 = u_2, v0, r_2
-	;;
-L(cj4):
-		.pred.rel "mutex", p12, p13
-.mfi;		getfsig	pr0_1 = fp0b_1
-		xma.l	fp1b_1 = u_1, v1, fp1a_1
-	(p12)	add	s0 = pr1_2, acc0, 1
-.mfi;	(p13)	add	s0 = pr1_2, acc0
-		xma.hu	fp2a_1 = u_1, v1, fp1a_1
-		nop	1
-	;;
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-.mmi;		getfsig	pr1_0 = fp1b_0
-		st8	[rp] = s0, 8
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_3
-.mmi;	(p9)	cmp.ltu	p6, p7 = acc0, pr0_3
-	(p12)	cmp.leu	p10, p11 = s0, pr1_2
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_2
-	;;
-		.pred.rel "mutex", p6, p7
-.mmi;		getfsig	acc1_0 = fp2a_0
-	(p6)	add	acc0 = pr0_0, acc1_2, 1
-	(p7)	add	acc0 = pr0_0, acc1_2
-	;;
-L(cj3):
-		.pred.rel "mutex", p10, p11
-.mfi;		getfsig	pr0_2 = fp0b_2
-		xma.l	fp1b_2 = u_2, v1, fp1a_2
-	(p10)	add	s0 = pr1_3, acc0, 1
-.mfi;	(p11)	add	s0 = pr1_3, acc0
-		xma.hu	fp2a_2 = u_2, v1, fp1a_2
-		nop	1
-	;;
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-.mmi;		getfsig	pr1_1 = fp1b_1
-		st8	[rp] = s0, 8
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_0
-.mmi;	(p7)	cmp.ltu	p8, p9 = acc0, pr0_0
-	(p10)	cmp.leu	p12, p13 = s0, pr1_3
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_3
-	;;
-		.pred.rel "mutex", p8, p9
-.mmi;		getfsig	acc1_1 = fp2a_1
-	(p8)	add	acc0 = pr0_1, acc1_3, 1
-	(p9)	add	acc0 = pr0_1, acc1_3
-	;;
-		.pred.rel "mutex", p12, p13
-.mmi;	(p12)	add	s0 = pr1_0, acc0, 1
-	(p13)	add	s0 = pr1_0, acc0
-		nop	1
-	;;
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-.mmi;		getfsig	pr1_2 = fp1b_2
-		st8	[rp] = s0, 8
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
-.mmi;	(p9)	cmp.ltu	p6, p7 = acc0, pr0_1
-	(p12)	cmp.leu	p10, p11 = s0, pr1_0
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
-	;;
-		.pred.rel "mutex", p6, p7
-.mmi;		getfsig	r8 = fp2a_2
-	(p6)	add	acc0 = pr0_2, acc1_0, 1
-	(p7)	add	acc0 = pr0_2, acc1_0
-	;;
-		.pred.rel "mutex", p10, p11
-.mmi;	(p10)	add	s0 = pr1_1, acc0, 1
-	(p11)	add	s0 = pr1_1, acc0
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
-	;;
-		.pred.rel "mutex", p10, p11
-.mmi;	(p7)	cmp.ltu	p8, p9 = acc0, pr0_2
-	(p10)	cmp.leu	p12, p13 = s0, pr1_1
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
-	;;
-		.pred.rel "mutex", p8, p9
-.mmi;		st8	[rp] = s0, 8
-	(p8)	add	acc0 = pr1_2, acc1_1, 1
-	(p9)	add	acc0 = pr1_2, acc1_1
-	;;
-		.pred.rel "mutex", p8, p9
-.mmi;	(p8)	cmp.leu	p10, p11 = acc0, pr1_2
-	(p9)	cmp.ltu	p10, p11 = acc0, pr1_2
-	(p12)	add	acc0 = 1, acc0
-	;;
-.mmi;		st8	[rp] = acc0, 8
-	(p12)	cmpeqor	p10, p0 = 0, acc0
-		nop	1
-	;;
-.mib;	(p10)	add	r8 = 1, r8
-		mov	ar.lc = r2
-		br.ret.sptk.many b0
+
+.Lcj6:
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_3 = fp0b_3
+	xma.l		fp1b_3 = u_3, v1, fp1a_3
+  (p12)	add		s0 = pr1_0, acc0, 1
+  (p13)	add		s0 = pr1_0, acc0
+	xma.hu		fp2a_3 = u_3, v1, fp1a_3
+	;;
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr1_2 = fp1b_2
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_1
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_1
+  (p12)	cmp.leu		p10, p11 = s0, pr1_0
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_0
+	;;
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_2 = fp2a_2
+	st8		[rp] = s0, 8
+	xma.l		fp0b_1 = u_1, v0, r_1
+  (p6)	add		acc0 = pr0_2, acc1_0, 1
+  (p7)	add		acc0 = pr0_2, acc1_0
+	xma.hu		fp1a_1 = u_1, v0, r_1
+	;;
+.Lcj5:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = u_0, v1, fp1a_0
+  (p10)	add		s0 = pr1_1, acc0, 1
+  (p11)	add		s0 = pr1_1, acc0
+	xma.hu		fp2a_0 = u_0, v1, fp1a_0
+	;;
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr1_3 = fp1b_3
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_2
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_2
+  (p10)	cmp.leu		p12, p13 = s0, pr1_1
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_1
+	;;
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_3 = fp2a_3
+	st8		[rp] = s0, 8
+	xma.l		fp0b_2 = u_2, v0, r_2
+  (p8)	add		acc0 = pr0_3, acc1_1, 1
+  (p9)	add		acc0 = pr0_3, acc1_1
+	xma.hu		fp1a_2 = u_2, v0, r_2
+	;;
+.Lcj4:
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_1 = fp0b_1
+	xma.l		fp1b_1 = u_1, v1, fp1a_1
+  (p12)	add		s0 = pr1_2, acc0, 1
+  (p13)	add		s0 = pr1_2, acc0
+	xma.hu		fp2a_1 = u_1, v1, fp1a_1
+	;;
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr1_0 = fp1b_0
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_3
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_3
+  (p12)	cmp.leu		p10, p11 = s0, pr1_2
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_2
+	;;
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_0 = fp2a_0
+	st8		[rp] = s0, 8
+  (p6)	add		acc0 = pr0_0, acc1_2, 1
+  (p7)	add		acc0 = pr0_0, acc1_2
+	;;
+.Lcj3:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_2 = fp0b_2
+	xma.l		fp1b_2 = u_2, v1, fp1a_2
+  (p10)	add		s0 = pr1_3, acc0, 1
+  (p11)	add		s0 = pr1_3, acc0
+	xma.hu		fp2a_2 = u_2, v1, fp1a_2
+	;;
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr1_1 = fp1b_1
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_0
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_0
+  (p10)	cmp.leu		p12, p13 = s0, pr1_3
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_3
+	;;
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_1 = fp2a_1
+	st8		[rp] = s0, 8
+  (p8)	add		acc0 = pr0_1, acc1_3, 1
+  (p9)	add		acc0 = pr0_1, acc1_3
+	;;
+.Lcj2:
+	.pred.rel "mutex", p12, p13
+  (p12)	add		s0 = pr1_0, acc0, 1
+  (p13)	add		s0 = pr1_0, acc0
+	;;
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr1_2 = fp1b_2
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_1
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_1
+  (p12)	cmp.leu		p10, p11 = s0, pr1_0
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_0
+	;;
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_2 = fp2a_2
+	st8		[rp] = s0, 8
+  (p6)	add		acc0 = pr0_2, acc1_0, 1
+  (p7)	add		acc0 = pr0_2, acc1_0
+	;;
+	.pred.rel "mutex", p10, p11
+  (p10)	add		s0 = pr1_1, acc0, 1
+  (p11)	add		s0 = pr1_1, acc0
+	;;
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_2
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_2
+  (p10)	cmp.leu		p12, p13 = s0, pr1_1
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_1
+	;;
+	.pred.rel "mutex", p8, p9
+	st8		[rp] = s0, 8
+  (p8)	add		acc0 = pr1_2, acc1_1, 1
+  (p9)	add		acc0 = pr1_2, acc1_1
+	;;
+	.pred.rel "mutex", p8, p9
+  (p8)	cmp.leu		p10, p11 = acc0, pr1_2
+  (p9)	cmp.ltu		p10, p11 = acc0, pr1_2
+  (p12)	add		acc0 = 1, acc0
+	;;
+	st8		[rp] = acc0, 8
+  (p12)	cmp.eq.or	p10, p0 = 0, acc0
+	mov		r8 = acc1_2
+	;;
+	.pred.rel "mutex", p10, p11
+  (p10)	add		r8 = 1, r8
+	mov.i		ar.lc = r2
+	br.ret.sptk.many b0
 EPILOGUE()
 ASM_END()
diff --git a/gmp/mpn/ia64/aors_n.asm b/gmp/mpn/ia64/aors_n.asm
index 81be606190..fd3aaac460 100644
--- a/gmp/mpn/ia64/aors_n.asm
+++ b/gmp/mpn/ia64/aors_n.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2003-2005, 2010, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -39,818 +26,586 @@ C Itanium 2:    1.25
 C TODO
 C  * Consider using special code for small n, using something like
 C    "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
-C  * The non-nc code was trimmed cycle for cycle to its current state.  It is
-C    probably hard to save more that an odd cycle there.  The nc code is much
-C    cruder (since tune/speed doesn't have any applicable direct measurements).
-C  * Without the nc entry points, this becomes around 1800 bytes of object
-C    code; the nc code adds over 1000 bytes.  We should perhaps sacrifice a
-C    few cycles for the non-nc code and let it fall into the nc code.
 
 C INPUT PARAMETERS
-define(`rp', `r32')
-define(`up', `r33')
-define(`vp', `r34')
-define(`n',  `r35')
-define(`cy', `r36')
+define(`rp',`r32')
+define(`up',`r33')
+define(`vp',`r34')
+define(`n',`r35')
 
 ifdef(`OPERATION_add_n',`
   define(ADDSUB,	add)
-  define(CND,		ltu)
+  define(PRED,		ltu)
   define(INCR,		1)
   define(LIM,		-1)
-  define(LIM2,		0)
-  define(func,    mpn_add_n)
-  define(func_nc, mpn_add_nc)
+  define(func, mpn_add_n)
 ')
 ifdef(`OPERATION_sub_n',`
   define(ADDSUB,	sub)
-  define(CND,		gtu)
+  define(PRED,		gtu)
   define(INCR,		-1)
   define(LIM,		0)
-  define(LIM2,		-1)
-  define(func,    mpn_sub_n)
-  define(func_nc, mpn_sub_nc)
+  define(func, mpn_sub_n)
 ')
 
-define(cmpeqor, `cmp.eq.or')
-define(PFDIST, 500)
-
 C Some useful aliases for registers we use
 define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')
 define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
-define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
+define(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')
+define(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')
+define(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')
 define(`rpx',`r3')
-define(`upadv',`r20') define(`vpadv',`r21')
 
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
 
 ASM_START()
-PROLOGUE(func_nc)
+PROLOGUE(func)
 	.prologue
 	.save	ar.lc, r2
 	.body
 ifdef(`HAVE_ABI_32',`
-	addp4	rp = 0, rp		C			M I
-	addp4	up = 0, up		C			M I
-	nop.i	0
-	addp4	vp = 0, vp		C			M I
-	nop.m	0
-	zxt4	n = n			C			I
+	addp4		rp = 0, rp		C			M I
+	addp4		up = 0, up		C			M I
+	addp4		vp = 0, vp		C			M I
+	zxt4		n = n			C			I
 	;;
 ')
-
- {.mmi;	ld8	r11 = [vp], 8		C			M01
-	ld8	r10 = [up], 8		C			M01
-	mov	r2 = ar.lc		C			I0
-}{.mmi;	and	r14 = 7, n		C			M I
-	cmp.lt	p15, p14 = 8, n		C			M I
-	add	n = -6, n		C			M I
+{.mmi		C 00
+	ld8		r11 = [vp], 8		C			M01
+	ld8		r10 = [up], 8		C			M01
+	mov.i		r2 = ar.lc		C			I0
+}
+{.mmi
+	and		r14 = 7, n		C			M I
+	cmp.lt		p15, p14 = 8, n		C			M I
+	add		n = -8, n		C			M I
 	;;
 }
-.mmi;	add	upadv = PFDIST, up	C Merging these lines into the feed-in
-	add	vpadv = PFDIST, vp	C code could save a cycle per call at
-	mov	r23 = cy		C the expense of code size.
-	;;
-{.mmi;	cmp.eq	p6, p0 = 1, r14		C			M I
-	cmp.eq	p7, p0 = 2, r14		C			M I
-	cmp.eq	p8, p0 = 3, r14		C			M I
-}{.bbb
-   (p6)	br.dptk	.Lc001			C			B
-   (p7)	br.dptk	.Lc010			C			B
-   (p8)	br.dptk	.Lc011			C			B
-	;;
-}{.mmi;	cmp.eq	p9, p0 = 4, r14		C			M I
-	cmp.eq	p10, p0 = 5, r14	C			M I
-	cmp.eq	p11, p0 = 6, r14	C			M I
-}{.bbb
-   (p9)	br.dptk	.Lc100			C			B
-  (p10)	br.dptk	.Lc101			C			B
-  (p11)	br.dptk	.Lc110			C			B
-	;;
-}{.mmi;	ld8	r19 = [vp], 8		C			M01
-	ld8	r18 = [up], 8		C			M01
-	cmp.ne	p13, p0 = 0, cy		C copy cy to p13	M I
-}{.mmb;	cmp.eq	p12, p0 = 7, r14	C			M I
-	nop	0
-  (p12)	br.dptk	.Lc111			C			B
+{.mmi		C 01
+	cmp.eq		p6, p0 = 1, r14		C			M I
+	cmp.eq		p7, p0 = 2, r14		C			M I
+	cmp.eq		p8, p0 = 3, r14		C			M I
+}
+{.bbb
+   (p6)	br.dptk		.Lb001			C			B
+   (p7)	br.dptk		.Lb010			C			B
+   (p8)	br.dptk		.Lb011			C			B
 	;;
 }
+{.mmi		C 02
+	cmp.eq		p9, p0 = 4, r14		C			M I
+	cmp.eq		p10, p0 = 5, r14	C			M I
+	cmp.eq		p11, p0 = 6, r14	C			M I
+}
+{.bbb
+   (p9)	br.dptk		.Lb100			C			B
+  (p10)	br.dptk		.Lb101			C			B
+  (p11)	br.dptk		.Lb110			C			B
+	;;
+}		C 03
+{.mmb
+	cmp.eq		p12, p0 = 7, r14	C			M I
+	add		n = -1, n		C loop count		M I
+  (p12)	br.dptk		.Lb111			C			B
+}
 
-.Lc000:
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	add	vpadv = PFDIST, vp	C			M I
-	ld8	v0 = [vp], 8		C			M01
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = r10, r11		C			M I
-	nop	0
-	;;
-.mmi;	add	upadv = PFDIST, up	C			M I
-	ld8	v1 = [vp], 8		C			M01
-	cmp.CND	p7, p0 = w1, r10	C			M I
-.mmi;	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w2 = r18, r19		C			M I
-	add	rpx = 8, rp		C			M I
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, r18	C			M I
-  (p13)	cmpeqor	p7, p0 = LIM, w1	C			M I
-.mmi;	ld8	u2 = [up], 8		C			M01
-  (p13)	add	w1 = INCR, w1		C			M I
-	ADDSUB	w3 = u3, v3		C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-.mmb;	ld8	u3 = [up], 8		C			M01
-   (p7)	add	w2 = INCR, w2		C			M I
-	br	L(m0)
-
-
-.Lc001:
-.mmi;
-  (p15)	ld8	v1 = [vp], 8		C			M01
-  (p15)	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w0 = r10, r11		C			M I
-.mmb;	nop	0
-	nop	0
-  (p15)	br	1f
-	;;
-.mmi;	cmp.ne	p9, p0 = 0, r23		C			M I
-	mov	r8 = 0
-	cmp.CND	p6, p0 = w0, r10	C			M I
-	;;
-.mmb;
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C			M I
-   (p9)	add	w0 = INCR, w0		C			M I
-	br	L(cj1)			C			B
-1:
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	mov	ar.lc = n		C			I0
-.mmi;	nop	0
-	cmp.ne	p9, p0 = 0, r23		C			M I
-	nop	0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	cmp.CND	p6, p0 = w0, r10	C			M I
-	add	rpx = 16, rp		C			M I
-.mmb;	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = u1, v1		C			M I
-	br	L(c1)			C			B
-
-
-.Lc010:
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	mov	r8 = 0			C			M I
-.mmb;	ADDSUB	w3 = r10, r11		C			M I
-	cmp.ne	p8, p0 = 0, r23		C			M I
-  (p15)	br	1f			C			B
-	;;
-.mmi;	cmp.CND	p9, p0 = w3, r10	C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-   (p8)	add	w3 = INCR, w3		C			M I
-	;;
-.mmb;	cmp.CND	p6, p0 = w0, u0		C			M I
-   (p8)	cmpeqor	p9, p0 = LIM2, w3	C			M I
-	br	L(cj2)			C			B
-1:
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	mov	ar.lc = n		C			I0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	cmp.CND	p9, p0 = w3, r10	C			M I
-	;;
-.mmi;
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-.mmb;	add	rpx = 24, rp		C			M I
-	nop	0
-	br	L(m23)			C			B
-
-
-.Lc011:
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-.mmi;	ADDSUB	w2 = r10, r11		C			M I
-	cmp.ne	p7, p0 = 0, r23		C			M I
-	nop	0
-	;;
-.mmb;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-  (p15)	br	1f			C			B
-.mmi;	cmp.CND	p8, p0 = w2, r10	C			M I
-	ADDSUB	w3 = u3, v3		C			M I
-	nop	0
-	;;
-.mmb;
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-   (p7)	add	w2 = INCR, w2		C			M I
-	br	L(cj3)			C			B
-1:
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w3 = u3, v3		C			M I
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	cmp.CND	p8, p0 = w2, r10	C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	u3 = [up], 8		C			M01
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-   (p7)	add	w2 = INCR, w2		C			M I
-	;;
-.mmi;	add	rpx = 32, rp		C			M I
-	st8	[rp] = w2, 8		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-.mmb;
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	br	L(m23)
-
-
-.Lc100:
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-.mmi;	ADDSUB	w1 = r10, r11		C			M I
-	nop	0
-	nop	0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	add	rpx = 8, rp		C			M I
-.mmi;	cmp.ne	p6, p0 = 0, r23		C			M I
-	cmp.CND	p7, p0 = w1, r10	C			M I
-	nop	0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w2 = u2, v2		C			M I
-.mmb;
-   (p6)	cmpeqor	p7, p0 = LIM, w1	C			M I
-   (p6)	add	w1 = INCR, w1		C			M I
-  (p14)	br	L(cj4)
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	mov	ar.lc = n		C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, u2		C			M I
-	nop	0
-.mmi;	ld8	u2 = [up], 8		C			M01
-	nop	0
-	ADDSUB	w3 = u3, v3		C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-.mmb;	ld8	u3 = [up], 8		C			M01
-   (p7)	add	w2 = INCR, w2		C			M I
-	br	L(m4)
-
-
-.Lc101:
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	mov	ar.lc = n		C			I0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	ADDSUB	w0 = r10, r11		C			M I
-.mmi;	cmp.ne	p9, p0 = 0, r23		C			M I
-	add	rpx = 16, rp		C			M I
-	nop	0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	cmp.CND	p6, p0 = w0, r10	C			M I
-.mbb;	ADDSUB	w1 = u1, v1		C			M I
-  (p15)	br	L(c5)			C			B
-	br	L(end)			C			B
-
-
-.Lc110:
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	add	upadv = PFDIST, up	C			M I
-	add	vpadv = PFDIST, vp	C			M I
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w3 = r10, r11		C			M I
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	ADDSUB	w0 = u0, v0		C			M I
-.mmi;	cmp.CND	p9, p0 = w3, r10	C			M I
-	cmp.ne	p8, p0 = 0, r23		C			M I
-	add	rpx = 24, rp		C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	nop	0
-.mmb;
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-   (p8)	add	w3 = INCR, w3		C			M I
-	br	L(m67)			C			B
-
-
-.Lc111:
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	add	upadv = PFDIST, up	C			M I
-	ld8	v1 = [vp], 8		C			M01
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w2 = r10, r11		C			M I
-	nop	0
-	;;
-.mmi;	add	vpadv = PFDIST, vp	C			M I
-	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, r10	C			M I
-.mmi;	ld8	u2 = [up], 8		C			M01
-	ADDSUB	w3 = r18, r19		C			M I
-	nop	0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, r18	C			M I
-  (p13)	cmpeqor	p8, p0 = LIM, w2	C			M I
-.mmi;	ld8	u3 = [up], 8		C			M01
-  (p13)	add	w2 = INCR, w2		C			M I
-	nop	0
-	;;
-.mmi;	add	rpx = 32, rp		C			M I
-	st8	[rp] = w2, 8		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-.mmb;
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	br	L(m67)
 
-EPILOGUE()
-
-PROLOGUE(func)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',`
-	addp4	rp = 0, rp		C			M I
-	addp4	up = 0, up		C			M I
-	nop.i	0
-	addp4	vp = 0, vp		C			M I
-	nop.m	0
-	zxt4	n = n			C			I
+.Lb000:	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	add		rpx = 8, rp		C			M I
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	ADDSUB		w1 = r10, r11		C			M I
 	;;
-')
-
- {.mmi;	ld8	r11 = [vp], 8		C			M01
-	ld8	r10 = [up], 8		C			M01
-	mov	r2 = ar.lc		C			I0
-}{.mmi;	and	r14 = 7, n		C			M I
-	cmp.lt	p15, p14 = 8, n		C			M I
-	add	n = -6, n		C			M I
-	;;
-}{.mmi;	cmp.eq	p6, p0 = 1, r14		C			M I
-	cmp.eq	p7, p0 = 2, r14		C			M I
-	cmp.eq	p8, p0 = 3, r14		C			M I
-}{.bbb
-   (p6)	br.dptk	.Lb001			C			B
-   (p7)	br.dptk	.Lb010			C			B
-   (p8)	br.dptk	.Lb011			C			B
-	;;
-}{.mmi;	cmp.eq	p9, p0 = 4, r14		C			M I
-	cmp.eq	p10, p0 = 5, r14	C			M I
-	cmp.eq	p11, p0 = 6, r14	C			M I
-}{.bbb
-   (p9)	br.dptk	.Lb100			C			B
-  (p10)	br.dptk	.Lb101			C			B
-  (p11)	br.dptk	.Lb110			C			B
-	;;
-}{.mmi;	ld8	r19 = [vp], 8		C			M01
-	ld8	r18 = [up], 8		C			M01
-	cmp.ne	p13, p0 = r0, r0	C clear "CF"		M I
-}{.mmb;	cmp.eq	p12, p0 = 7, r14	C			M I
-	mov	r23 = 0			C			M I
-  (p12)	br.dptk	.Lb111			C			B
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	cmp.PRED	p7, p0 = w1, r10	C			M I
 	;;
-}
-
-.Lb000:
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = r10, r11		C			M I
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	cmp.CND	p7, p0 = w1, r10	C			M I
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w2 = r18, r19		C			M I
-	add	rpx = 8, rp		C			M I
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	cmp.CND	p8, p0 = w2, r18	C			M I
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	ADDSUB	w3 = u3, v3		C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-.mmb;	ld8	u3 = [up], 8		C			M01
-   (p7)	add	w2 = INCR, w2		C			M I
-	br	L(m0)			C			B
-
-
-	ALIGN(32)
-.Lb001:
-.mmi;	ADDSUB	w0 = r10, r11		C			M I
-  (p15)	ld8	v1 = [vp], 8		C			M01
-	mov	r8 = 0			C			M I
-	;;
-.mmb;	cmp.CND	p6, p0 = w0, r10	C			M I
-  (p15)	ld8	u1 = [up], 8		C			M01
-  (p14)	br	L(cj1)			C			B
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	shr.u	n = n, 3		C			I0
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	cmp.CND	p6, p0 = w0, r10	C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	mov	ar.lc = n		C			I0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = u1, v1		C			M I
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	cmp.CND	p7, p0 = w1, u1		C			M I
-	ADDSUB	w2 = u2, v2		C			M I
-.mmb;	ld8	u1 = [up], 8		C			M01
-	add	rpx = 16, rp		C			M I
-	br	L(m1)			C			B
-
-
-	ALIGN(32)
-.Lb010:
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-.mmb;	ADDSUB	w3 = r10, r11		C			M I
-	nop	0
-  (p15)	br	L(gt2)			C			B
-	;;
-.mmi;	cmp.CND	p9, p0 = w3, r10	C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	mov	r8 = 0			C			M I
-	;;
-.mmb;	nop	0
-	cmp.CND	p6, p0 = w0, u0		C			M I
-	br	L(cj2)			C			B
-L(gt2):
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	nop	0
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	nop	0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, r10	C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-.mmb;	ld8	u3 = [up], 8		C			M01
-	add	rpx = 24, rp		C			M I
-	br	L(m23)			C			B
-
-
-	ALIGN(32)
-.Lb011:
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	ADDSUB	w2 = r10, r11		C			M I
-	;;
-.mmb;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-  (p15)	br	1f			C			B
-.mmb;	cmp.CND	p8, p0 = w2, r10	C			M I
-	ADDSUB	w3 = u3, v3		C			M I
-	br	L(cj3)			C			B
-1:
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	ADDSUB	w3 = u3, v3		C			M I
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	cmp.CND	p8, p0 = w2, r10	C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	u3 = [up], 8		C			M01
-	nop	0
-	nop	0
-	;;
-.mmi;	add	rpx = 32, rp		C			M I
-	st8	[rp] = w2, 8		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-.mmb;
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	br	L(m23)			C			B
-
-
-	ALIGN(32)
-.Lb100:
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	ADDSUB	w1 = r10, r11		C			M I
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	cmp.CND	p7, p0 = w1, r10	C			M I
-.mmb;	nop	0
-	ADDSUB	w2 = u2, v2		C			M I
-  (p14)	br	L(cj4)			C			B
-	;;
-L(gt4):
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	nop	0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, u2		C			M I
-	nop	0
-.mmi;	ld8	u2 = [up], 8		C			M01
-	ADDSUB	w3 = u3, v3		C			M I
-	add	rpx = 8, rp		C			M I
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-.mmb;	ld8	u3 = [up], 8		C			M01
-   (p7)	add	w2 = INCR, w2		C			M I
-	br	L(m4)			C			B
-
-
-	ALIGN(32)
-.Lb101:
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	ADDSUB	w0 = r10, r11		C			M I
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	add	rpx = 16, rp		C			M I
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	nop	0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	cmp.CND	p6, p0 = w0, r10	C			M I
-	nop	0
-.mmb;	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = u1, v1		C			M I
-  (p14)	br	L(cj5)			C			B
-	;;
-L(gt5):
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	cmp.CND	p7, p0 = w1, u1		C			M I
-	mov	ar.lc = n		C			I0
-.mmb;	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w2 = u2, v2		C			M I
-	br	L(m5)			C			B
-
-
-	ALIGN(32)
-.Lb110:
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w3 = r10, r11		C			M I
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	nop	0
-	;;
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, r10	C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-.mmb;	ld8	u3 = [up], 8		C			M01
-	add	rpx = 24, rp		C			M I
-	br	L(m67)			C			B
-
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	ADDSUB		w2 = u2, v2		C			M I
+	;;
+	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	cmp.PRED	p8, p0 = w2, u2		C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	ADDSUB		w3 = u3, v3		C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	cmp.PRED	p9, p0 = w3, u3		C			M I
+   (p7)	cmp.eq.or	p8, p0 = LIM, w2	C			M I
+   (p7)	add		w2 = INCR, w2		C			M I
+  (p14)	br.cond.dptk	.Lcj8			C			B
+	;;
+
+.grt8:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+	;;
+	add		r11 = 512, vp
+	ld8		v2 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u2 = [up], 8		C			M01
+	nop.i		0
+	nop.b		0
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	br		.LL000			C			B
 
-	ALIGN(32)
-.Lb111:
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	shr.u	n = n, 3		C			I0
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	ADDSUB	w2 = r10, r11		C			M I
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, r10	C			M I
-	mov	ar.lc = n		C			I0
-.mmi;	ld8	u2 = [up], 8		C			M01
-	ADDSUB	w3 = r18, r19		C			M I
-	nop	0
-	;;
-.mmi;	add	upadv = PFDIST, up
-	add	vpadv = PFDIST, vp
-	nop	0
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	cmp.CND	p9, p0 = w3, r18	C			M I
-	;;
-.mmi;	add	rpx = 32, rp		C			M I
-	st8	[rp] = w2, 8		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-.mmb;
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	br	L(m67)			C			B
+.Lb001:	add		rpx = 16, rp		C			M I
+	ADDSUB		w0 = r10, r11		C			M I
+  (p15)	br.cond.dpnt	.grt1			C			B
+	;;
+	cmp.PRED	p6, p0 = w0, r10	C			M I
+	mov		r8 = 0			C			M I
+	br		.Lcj1			C			B
 
+.grt1:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	cmp.ne		p9, p0 = r0, r0		C read near Loop
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	;;
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	cmp.PRED	p6, p0 = w0, r10	C			M I
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	ADDSUB		w1 = u1, v1		C			M I
+	;;
+	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	cmp.PRED	p7, p0 = w1, u1		C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	ADDSUB		w2 = u2, v2		C			M I
+	;;
+	add		r11 = 512, vp
+	ld8		v0 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u0 = [up], 8		C			M01
+	br.cloop.dptk	.Loop			C			B
+	br		.Lcj9			C			B
+
+.Lb010:	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	add		rpx = 24, rp		C			M I
+	ADDSUB		w7 = r10, r11		C			M I
+  (p15)	br.cond.dpnt	.grt2			C			B
+	;;
+	cmp.PRED	p9, p0 = w7, r10	C			M I
+	ADDSUB		w0 = u0, v0		C			M I
+	br		.Lcj2			C			B
+
+.grt2:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	;;
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	cmp.PRED	p9, p0 = w7, r10	C			M I
+	;;
+	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	ADDSUB		w0 = u0, v0		C			M I
+	;;
+	add		r11 = 512, vp
+	ld8		v7 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u7 = [up], 8		C			M01
+	br		.LL01x			C			B
+
+.Lb011:	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	ADDSUB		w6 = r10, r11		C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+  (p15)	br.cond.dpnt	.grt3			C			B
+	;;
+	cmp.PRED	p8, p0 = w6, r10	C			M I
+	ADDSUB		w7 = u7, v7		C			M I
+	;;
+	st8		[rp] = w6, 8		C			M23
+	cmp.PRED	p9, p0 = w7, u7		C			M I
+	br		.Lcj3			C			B
+
+.grt3:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	add		rpx = 32, rp		C			M I
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	cmp.PRED	p8, p0 = w6, r10	C			M I
+	;;
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	ADDSUB		w7 = u7, v7		C			M I
+	nop.i		0
+	nop.b		0
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	cmp.PRED	p9, p0 = w7, u7		C			M I
+	;;
+	add		r11 = 512, vp
+	ld8		v6 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u6 = [up], 8		C			M01
+   (p8)	cmp.eq.or	p9, p0 = LIM, w7	C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+   (p8)	add		w7 = INCR, w7		C			M I
+	st8		[rp] = w6, 8		C			M23
+	ADDSUB		w0 = u0, v0		C			M I
+	br		.LL01x			C			B
+
+.Lb100:	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	add		rpx = 8, rp		C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	ADDSUB		w5 = r10, r11		C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+  (p15)	br.cond.dpnt	.grt4			C			B
+	;;
+	cmp.PRED	p7, p0 = w5, r10	C			M I
+	ADDSUB		w6 = u6, v6		C			M I
+	;;
+	cmp.PRED	p8, p0 = w6, u6		C			M I
+	ADDSUB		w7 = u7, v7		C			M I
+	br		.Lcj4			C			B
+
+.grt4:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+	cmp.PRED	p7, p0 = w5, r10	C			M I
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	ADDSUB		w6 = u6, v6		C			M I
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	cmp.PRED	p8, p0 = w6, u6		C			M I
+	;;
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	ADDSUB		w7 = u7, v7		C			M I
+	;;
+	add		r11 = 512, vp
+	ld8		v6 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u6 = [up], 8		C			M01
+	cmp.PRED	p9, p0 = w7, u7		C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+   (p7)	cmp.eq.or	p8, p0 = LIM, w6	C			M I
+   (p7)	add		w6 = INCR, w6		C			M I
+	br		.LL100			C			B
+
+.Lb101:	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	add		rpx = 16, rp		C			M I
+	;;
+	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	ADDSUB		w4 = r10, r11		C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	cmp.PRED	p6, p0 = w4, r10	C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w5 = u5, v5		C			M I
+	shr.u		n = n, 3		C			I0
+  (p15)	br.cond.dpnt	.grt5			C			B
+	;;
+	cmp.PRED	p7, p0 = w5, u5		C			M I
+	ADDSUB		w6 = u6, v6		C			M I
+	br		.Lcj5			C			B
+
+.grt5:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	cmp.PRED	p7, p0 = w5, u5		C			M I
+	;;
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	ADDSUB		w6 = u6, v6		C			M I
+	;;
+	add		r11 = 512, vp
+	ld8		v5 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u5 = [up], 8		C			M01
+	br		.LL101			C			B
+
+.Lb110:	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	add		rpx = 24, rp		C			M I
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	ADDSUB		w3 = r10, r11		C			M I
+	;;
+	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	cmp.PRED	p9, p0 = w3, r10	C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w4 = u4, v4		C			M I
+  (p14)	br.cond.dptk	.Lcj67			C			B
+	;;
+
+.grt6:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	cmp.PRED	p9, p0 = w3, r10	C			M I
+	nop.i		0
+	nop.b		0
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	ADDSUB		w4 = u4, v4		C			M I
+	;;
+	add		r11 = 512, vp
+	ld8		v3 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u3 = [up], 8		C			M01
+	br		.LL11x			C			B
+
+.Lb111:	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	add		rpx = 32, rp		C			M I
+	;;
+	ld8		v4 = [vp], 8		C			M01
+	ld8		u4 = [up], 8		C			M01
+	ADDSUB		w2 = r10, r11		C			M I
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	ld8		u5 = [up], 8		C			M01
+	cmp.PRED	p8, p0 = w2, r10	C			M I
+	;;
+	ld8		v6 = [vp], 8		C			M01
+	ld8		u6 = [up], 8		C			M01
+	ADDSUB		w3 = u3, v3		C			M I
+	;;
+	ld8		v7 = [vp], 8		C			M01
+	ld8		u7 = [up], 8		C			M01
+	cmp.PRED	p9, p0 = w3, u3		C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+  (p15)	br.cond.dpnt	.grt7			C			B
+	;;
+	st8		[rp] = w2, 8		C			M23
+   (p8)	cmp.eq.or	p9, p0 = LIM, w3	C			M I
+   (p8)	add		w3 = INCR, w3		C			M I
+	ADDSUB		w4 = u4, v4		C			M I
+	br		.Lcj67			C			B
+
+.grt7:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shr.u		n = n, 3		C			I0
+   (p8)	cmp.eq.or	p9, p0 = LIM, w3	C			M I
+	nop.i		0
+	nop.b		0
+	;;
+	add		r11 = 512, vp
+	ld8		v2 = [vp], 8		C			M01
+	add		r10 = 512, up
+	ld8		u2 = [up], 8		C			M01
+   (p8)	add		w3 = INCR, w3		C			M I
+	nop.b		0
+	;;
+	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	st8		[rp] = w2, 8		C			M23
+	ADDSUB		w4 = u4, v4		C			M I
+	br		.LL11x			C			B
 
 C *** MAIN LOOP START ***
 	ALIGN(32)
-L(top):
-L(c5):	ld8	v1 = [vp], 8		C			M01
-	cmp.CND	p7, p0 = w1, u1		C			M I
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C			M I
-	ld8	u1 = [up], 8		C			M01
-   (p9)	add	w0 = INCR, w0		C			M I
-	ADDSUB	w2 = u2, v2		C			M I
-	;;
-L(m5):	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, u2		C			M I
-   (p6)	cmpeqor	p7, p0 = LIM, w1	C			M I
-	ld8	u2 = [up], 8		C			M01
-   (p6)	add	w1 = INCR, w1		C			M I
-	ADDSUB	w3 = u3, v3		C			M I
-	;;
-	st8	[rp] = w0, 8		C			M23
-	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-	ld8	u3 = [up], 8		C			M01
-   (p7)	add	w2 = INCR, w2		C			M I
-	;;
-L(m4):	st8	[rp] = w1, 16		C			M23
-	st8	[rpx] = w2, 32		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-	lfetch	[upadv], 64
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	;;
-L(m23):	st8	[rp] = w3, 8		C			M23
-	ld8	v0 = [vp], 8		C			M01
-	cmp.CND	p6, p0 = w0, u0		C			M I
-	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = u1, v1		C			M I
-	nop.b	0
-	;;
-L(c1):	ld8	v1 = [vp], 8		C			M01
-	cmp.CND	p7, p0 = w1, u1		C			M I
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C			M I
-	ld8	u1 = [up], 8		C			M01
-   (p9)	add	w0 = INCR, w0		C			M I
-	ADDSUB	w2 = u2, v2		C			M I
-	;;
-L(m1):	ld8	v2 = [vp], 8		C			M01
-	cmp.CND	p8, p0 = w2, u2		C			M I
-   (p6)	cmpeqor	p7, p0 = LIM, w1	C			M I
-	ld8	u2 = [up], 8		C			M01
-   (p6)	add	w1 = INCR, w1		C			M I
-	ADDSUB	w3 = u3, v3		C			M I
-	;;
-	st8	[rp] = w0, 8		C			M23
-	ld8	v3 = [vp], 8		C			M01
-	cmp.CND	p9, p0 = w3, u3		C			M I
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-	ld8	u3 = [up], 8		C			M01
-   (p7)	add	w2 = INCR, w2		C			M I
-	;;
-L(m0):	st8	[rp] = w1, 16		C			M23
-	st8	[rpx] = w2, 32		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-	lfetch	[vpadv], 64
-   (p8)	add	w3 = INCR, w3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	;;
-L(m67):	st8	[rp] = w3, 8		C			M23
-	ld8	v0 = [vp], 8		C			M01
-	cmp.CND	p6, p0 = w0, u0		C			M I
-	ld8	u0 = [up], 8		C			M01
-	ADDSUB	w1 = u1, v1		C			M I
-	br.cloop.dptk	L(top)		C			B
+.Loop:	ld8		v1 = [vp], 8		C			M01
+	cmp.PRED	p7, p0 = w1, u1		C			M I
+   (p9)	cmp.eq.or	p6, p0 = LIM, w0	C			M I
+	ld8		u1 = [up], 8		C			M01
+   (p9)	add		w0 = INCR, w0		C			M I
+	ADDSUB		w2 = u2, v2		C			M I
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	cmp.PRED	p8, p0 = w2, u2		C			M I
+   (p6)	cmp.eq.or	p7, p0 = LIM, w1	C			M I
+	ld8		u2 = [up], 8		C			M01
+   (p6)	add		w1 = INCR, w1		C			M I
+	ADDSUB		w3 = u3, v3		C			M I
+	;;
+	st8		[rp] = w0, 8		C			M23
+	ld8		v3 = [vp], 8		C			M01
+	cmp.PRED	p9, p0 = w3, u3		C			M I
+   (p7)	cmp.eq.or	p8, p0 = LIM, w2	C			M I
+	ld8		u3 = [up], 8		C			M01
+   (p7)	add		w2 = INCR, w2		C			M I
+	;;
+.LL000:	st8		[rp] = w1, 16		C			M23
+	st8		[rpx] = w2, 32		C			M23
+   (p8)	cmp.eq.or	p9, p0 = LIM, w3	C			M I
+	lfetch		[r10], 64
+   (p8)	add		w3 = INCR, w3		C			M I
+	ADDSUB		w4 = u4, v4		C			M I
+	;;
+.LL11x:	st8		[rp] = w3, 8		C			M23
+	ld8		v4 = [vp], 8		C			M01
+	cmp.PRED	p6, p0 = w4, u4		C			M I
+	ld8		u4 = [up], 8		C			M01
+	ADDSUB		w5 = u5, v5		C			M I
+	;;
+	ld8		v5 = [vp], 8		C			M01
+	cmp.PRED	p7, p0 = w5, u5		C			M I
+   (p9)	cmp.eq.or	p6, p0 = LIM, w4	C			M I
+	ld8		u5 = [up], 8		C			M01
+   (p9)	add		w4 = INCR, w4		C			M I
+	ADDSUB		w6 = u6, v6		C			M I
+	;;
+.LL101:	ld8		v6 = [vp], 8		C			M01
+	cmp.PRED	p8, p0 = w6, u6		C			M I
+   (p6)	cmp.eq.or	p7, p0 = LIM, w5	C			M I
+	ld8		u6 = [up], 8		C			M01
+   (p6)	add		w5 = INCR, w5		C			M I
+	ADDSUB		w7 = u7, v7		C			M I
+	;;
+	st8		[rp] = w4, 8		C			M23
+	ld8		v7 = [vp], 8		C			M01
+	cmp.PRED	p9, p0 = w7, u7		C			M I
+   (p7)	cmp.eq.or	p8, p0 = LIM, w6	C			M I
+	ld8		u7 = [up], 8		C			M01
+   (p7)	add		w6 = INCR, w6		C			M I
+	;;
+.LL100:	st8		[rp] = w5, 16		C			M23
+	st8		[rpx] = w6, 32		C			M23
+   (p8)	cmp.eq.or	p9, p0 = LIM, w7	C			M I
+	lfetch		[r11], 64
+   (p8)	add		w7 = INCR, w7		C			M I
+	ADDSUB		w0 = u0, v0		C			M I
+	;;
+.LL01x:	st8		[rp] = w7, 8		C			M23
+	ld8		v0 = [vp], 8		C			M01
+	cmp.PRED	p6, p0 = w0, u0		C			M I
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w1 = u1, v1		C			M I
+	br.cloop.dptk	.Loop			C			B
 	;;
 C *** MAIN LOOP END ***
 
-L(end):
-.mmi;
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C			M I
-   (p9)	add	w0 = INCR, w0		C			M I
-	mov	ar.lc = r2		C			I0
-L(cj5):
-.mmi;	cmp.CND	p7, p0 = w1, u1		C			M I
-	ADDSUB	w2 = u2, v2		C			M I
-	nop	0
-	;;
-.mmi;	st8	[rp] = w0, 8		C			M23
-   (p6)	cmpeqor	p7, p0 = LIM, w1	C			M I
-   (p6)	add	w1 = INCR, w1		C			M I
-L(cj4):
-.mmi;	cmp.CND	p8, p0 = w2, u2		C			M I
-	ADDSUB	w3 = u3, v3		C			M I
-	nop	0
-	;;
-.mmi;	st8	[rp] = w1, 8		C			M23
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C			M I
-   (p7)	add	w2 = INCR, w2		C			M I
-L(cj3):
-.mmi;	cmp.CND	p9, p0 = w3, u3		C			M I
-	ADDSUB	w0 = u0, v0		C			M I
-	nop	0
-	;;
-.mmi;	st8	[rp] = w2, 8		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C			M I
-   (p8)	add	w3 = INCR, w3		C			M I
-.mmi;	cmp.CND	p6, p0 = w0, u0		C			M I
-	nop	0
-	mov	r8 = 0			C			M I
-	;;
-L(cj2):
-.mmi;	st8	[rp] = w3, 8		C			M23
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C			M I
-   (p9)	add	w0 = INCR, w0		C			M I
-	;;
-L(cj1):
-.mmb;	st8	[rp] = w0, 8		C			M23
-   (p6)	mov	r8 = 1			C			M I
-	br.ret.sptk.many b0		C			B
+	cmp.PRED	p7, p0 = w1, u1		C			M I
+   (p9)	cmp.eq.or	p6, p0 = LIM, w0	C			M I
+   (p9)	add		w0 = INCR, w0		C			M I
+	ADDSUB		w2 = u2, v2		C			M I
+	;;
+.Lcj9:	cmp.PRED	p8, p0 = w2, u2		C			M I
+   (p6)	cmp.eq.or	p7, p0 = LIM, w1	C			M I
+	st8		[rp] = w0, 8		C			M23
+   (p6)	add		w1 = INCR, w1		C			M I
+	ADDSUB		w3 = u3, v3		C			M I
+	;;
+	cmp.PRED	p9, p0 = w3, u3		C			M I
+   (p7)	cmp.eq.or	p8, p0 = LIM, w2	C			M I
+   (p7)	add		w2 = INCR, w2		C			M I
+	;;
+.Lcj8:	st8		[rp] = w1, 16		C			M23
+	st8		[rpx] = w2, 32		C			M23
+   (p8)	cmp.eq.or	p9, p0 = LIM, w3	C			M I
+   (p8)	add		w3 = INCR, w3		C			M I
+	ADDSUB		w4 = u4, v4		C			M I
+	;;
+.Lcj67:	st8		[rp] = w3, 8		C			M23
+	cmp.PRED	p6, p0 = w4, u4		C			M I
+	ADDSUB		w5 = u5, v5		C			M I
+	;;
+	cmp.PRED	p7, p0 = w5, u5		C			M I
+   (p9)	cmp.eq.or	p6, p0 = LIM, w4	C			M I
+   (p9)	add		w4 = INCR, w4		C			M I
+	ADDSUB		w6 = u6, v6		C			M I
+	;;
+.Lcj5:	cmp.PRED	p8, p0 = w6, u6		C			M I
+   (p6)	cmp.eq.or	p7, p0 = LIM, w5	C			M I
+	st8		[rp] = w4, 8		C			M23
+   (p6)	add		w5 = INCR, w5		C			M I
+	ADDSUB		w7 = u7, v7		C			M I
+	;;
+.Lcj4:	cmp.PRED	p9, p0 = w7, u7		C			M I
+   (p7)	cmp.eq.or	p8, p0 = LIM, w6	C			M I
+   (p7)	add		w6 = INCR, w6		C			M I
+	;;
+	st8		[rp] = w5, 16		C			M23
+	st8		[rpx] = w6, 32		C			M23
+.Lcj3:
+   (p8)	cmp.eq.or	p9, p0 = LIM, w7	C			M I
+   (p8)	add		w7 = INCR, w7		C			M I
+	ADDSUB		w0 = u0, v0		C			M I
+	;;
+.Lcj2:	st8		[rp] = w7, 8		C			M23
+	cmp.PRED	p6, p0 = w0, u0		C			M I
+	;;
+   (p9)	cmp.eq.or	p6, p0 = LIM, w0	C			M I
+   (p9)	add		w0 = INCR, w0		C			M I
+	mov		r8 = 0			C			M I
+	;;
+.Lcj1:	st8		[rp] = w0, 8		C			M23
+	mov.i		ar.lc = r2		C			I0
+   (p6)	mov		r8 = 1			C			M I
+	br.ret.sptk.many b0			C			B
 EPILOGUE()
 ASM_END()
diff --git a/gmp/mpn/ia64/aorslsh1_n.asm b/gmp/mpn/ia64/aorslsh1_n.asm
new file mode 100644
index 0000000000..5348149c87
--- /dev/null
+++ b/gmp/mpn/ia64/aorslsh1_n.asm
@@ -0,0 +1,323 @@
+dnl  IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C           cycles/limb
+C Itanium:      3.0
+C Itanium 2:    1.5
+
+C TODO
+C  * Use shladd in feed-in code (for mpn_addlsh1_n).
+
+C INPUT PARAMETERS
+define(`rp',`r32')
+define(`up',`r33')
+define(`vp',`r34')
+define(`n',`r35')
+
+ifdef(`OPERATION_addlsh1_n',`
+  define(ADDSUB,       add)
+  define(PRED,	       ltu)
+  define(INCR,	       1)
+  define(LIM,	       -1)
+  define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+  define(ADDSUB,       sub)
+  define(PRED,	       gtu)
+  define(INCR,	       -1)
+  define(LIM,	       0)
+  define(func, mpn_sublsh1_n)
+')
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
+define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
+define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
+define(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+	.prologue
+	.save	ar.lc, r2
+	.body
+ifdef(`HAVE_ABI_32',`
+	addp4		rp = 0, rp		C			M I
+	addp4		up = 0, up		C			M I
+	addp4		vp = 0, vp		C			M I
+	zxt4		n = n			C			I
+	;;
+')
+ {.mmi;	ld8		r11 = [vp], 8		C			M01
+	ld8		r10 = [up], 8		C			M01
+	mov.i		r2 = ar.lc		C			I0
+}{.mmi;	and		r14 = 3, n		C			M I
+	cmp.lt		p15, p0 = 4, n		C			M I
+	add		n = -4, n		C			M I
+	;;
+}{.mmi;	cmp.eq		p6, p0 = 1, r14		C			M I
+	cmp.eq		p7, p0 = 2, r14		C			M I
+	cmp.eq		p8, p0 = 3, r14		C			M I
+}{.bbb
+  (p6)	br.dptk		.Lb01			C			B
+  (p7)	br.dptk		.Lb10			C			B
+  (p8)	br.dptk		.Lb11			C			B
+}
+
+.Lb00:	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	shr.u		n = n, 2		C			I0
+	;;
+	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	add		x3 = r11, r11		C			M I
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	ADDSUB		w3 = r10, x3		C			M I
+  (p15)	br.dpnt		.grt4			C			B
+	;;
+	shrp		x0 = v0, r11, 63	C			I0
+	cmp.PRED	p8, p0 = w3, r10	C			M I
+	;;
+	shrp		x1 = v1, v0, 63		C			I0
+	ADDSUB		w0 = u0, x0		C			M I
+	;;
+	cmp.PRED	p6, p0 = w0, u0		C			M I
+	ADDSUB		w1 = u1, x1		C			M I
+	br		.Lcj4			C			B
+
+.grt4:	ld8		v3 = [vp], 8		C			M01
+	shrp		x0 = v0, r11, 63	C			I0
+	cmp.PRED	p8, p0 = w3, r10	C			M I
+	add		n = -1, n
+	;;
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	shrp		x1 = v1, v0, 63		C			I0
+	ld8		v0 = [vp], 8		C			M01
+	ADDSUB		w0 = u0, x0		C			M I
+	;;
+	cmp.PRED	p6, p0 = w0, u0		C			M I
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w1 = u1, x1		C			M I
+	br		.LL00			C			B
+
+.Lb01:	add		x2 = r11, r11		C			M I
+	shr.u		n = n, 2		C			I0
+  (p15)	br.dpnt		.grt1			C			B
+	;;
+	ADDSUB		w2 = r10, x2		C			M I
+	shr.u		r8 = r11, 63		C retval		I0
+	;;
+	cmp.PRED	p6, p0 = w2, r10	C			M I
+	;;
+	st8		[rp] = w2, 8		C			M23
+   (p6)	add		r8 = 1, r8		C			M I
+	br.ret.sptk.many b0			C			B
+
+.grt1:	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C FIXME swap with next	I0
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w2 = r10, x2
+	;;
+	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shrp		x3 = v3, r11, 63	C			I0
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	cmp.PRED	p6, p0 = w2, r10	C			M I
+	ADDSUB		w3 = u3, x3		C			M I
+	br.cloop.dptk	.grt5			C			B
+	;;
+	shrp		x0 = v0, v3, 63		C			I0
+	cmp.PRED	p8, p0 = w3, u3		C			M I
+	br		.Lcj5			C			B
+
+.grt5:	shrp		x0 = v0, v3, 63		C			I0
+	ld8		v3 = [vp], 8		C			M01
+	cmp.PRED	p8, p0 = w3, u3		C			M I
+	br		.LL01			C			B
+
+.Lb10:	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+	shr.u		n = n, 2		C			I0
+	add		x1 = r11, r11		C			M I
+  (p15)	br.dpnt		.grt2			C			B
+	;;
+	ADDSUB		w1 = r10, x1		C			M I
+	shrp		x2 = v2, r11, 63	C			I0
+	;;
+	cmp.PRED	p8, p0 = w1, r10	C			M I
+	ADDSUB		w2 = u2, x2		C			M I
+	shr.u		r8 = v2, 63		C retval		I0
+	;;
+	cmp.PRED	p6, p0 = w2, u2		C			M I
+	br		.Lcj2			C			B
+
+.grt2:	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w1 = r10, x1		C			M I
+	;;
+	ld8		v1 = [vp], 8		C			M01
+	shrp		x2 = v2, r11, 63	C			I0
+	cmp.PRED	p8, p0 = w1, r10	C			M I
+	;;
+	ld8		u1 = [up], 8		C			M01
+	shrp		x3 = v3, v2, 63		C			I0
+	ld8		v2 = [vp], 8		C			M01
+	ADDSUB		w2 = u2, x2		C			M I
+	;;
+	cmp.PRED	p6, p0 = w2, u2		C			M I
+	ld8		u2 = [up], 8		C			M01
+	ADDSUB		w3 = u3, x3		C			M I
+	br.cloop.dpnt	.Loop			C			B
+	br		.Lskip			C			B
+
+.Lb11:	ld8		v1 = [vp], 8		C			M01
+	ld8		u1 = [up], 8		C			M01
+	shr.u		n = n, 2		C			I0
+	add		x0 = r11, r11		C			M I
+	;;
+	ld8		v2 = [vp], 8		C			M01
+	ld8		u2 = [up], 8		C			M01
+  (p15)	br.dpnt		.grt3			C			B
+	;;
+
+	shrp		x1 = v1, r11, 63	C			I0
+	ADDSUB		w0 = r10, x0		C			M I
+	;;
+	cmp.PRED	p6, p0 = w0, r10	C			M I
+	ADDSUB		w1 = u1, x1		C			M I
+	;;
+	shrp		x2 = v2, v1, 63		C			I0
+	cmp.PRED	p8, p0 = w1, u1		C			M I
+	br		.Lcj3			C			B
+
+.grt3:	ld8		v3 = [vp], 8		C			M01
+	ld8		u3 = [up], 8		C			M01
+	mov.i		ar.lc = n		C			I0
+	shrp		x1 = v1, r11, 63	C			I0
+	ADDSUB		w0 = r10, x0		C			M I
+	;;
+	ld8		v0 = [vp], 8		C			M01
+	cmp.PRED	p6, p0 = w0, r10	C			M I
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w1 = u1, x1		C			M I
+	;;
+	shrp		x2 = v2, v1, 63		C			I0
+	ld8		v1 = [vp], 8		C			M01
+	cmp.PRED	p8, p0 = w1, u1		C			M I
+	br		.LL11			C			B
+
+
+C *** MAIN LOOP START ***
+	ALIGN(32)
+.Loop:	st8		[rp] = w1, 8		C			M23
+	shrp		x0 = v0, v3, 63		C			I0
+   (p8)	cmp.eq.or	p6, p0 = LIM, w2	C			M I
+   (p8)	add		w2 = INCR, w2		C			M I
+	ld8		v3 = [vp], 8		C			M01
+	cmp.PRED	p8, p0 = w3, u3		C			M I
+	;;
+.LL01:	ld8		u3 = [up], 8		C			M01
+	shrp		x1 = v1, v0, 63		C			I0
+   (p6)	cmp.eq.or	p8, p0 = LIM, w3	C			M I
+   (p6)	add		w3 = INCR, w3		C			M I
+	ld8		v0 = [vp], 8		C			M01
+	ADDSUB		w0 = u0, x0		C			M I
+	;;
+	st8		[rp] = w2, 8		C			M23
+	cmp.PRED	p6, p0 = w0, u0		C			M I
+	ld8		u0 = [up], 8		C			M01
+	ADDSUB		w1 = u1, x1		C			M I
+	;;
+.LL00:	st8		[rp] = w3, 8		C			M23
+	shrp		x2 = v2, v1, 63		C			I0
+   (p8)	cmp.eq.or	p6, p0 = LIM, w0	C			M I
+   (p8)	add		w0 = INCR, w0		C			M I
+	ld8		v1 = [vp], 8		C			M01
+	cmp.PRED	p8, p0 = w1, u1		C			M I
+	;;
+.LL11:	ld8		u1 = [up], 8		C			M01
+	shrp		x3 = v3, v2, 63		C			I0
+   (p6)	cmp.eq.or	p8, p0 = LIM, w1	C			M I
+   (p6)	add		w1 = INCR, w1		C			M I
+	ld8		v2 = [vp], 8		C			M01
+	ADDSUB		w2 = u2, x2		C			M I
+	;;
+	st8		[rp] = w0, 8		C			M23
+	cmp.PRED	p6, p0 = w2, u2		C			M I
+	ld8		u2 = [up], 8		C			M01
+	ADDSUB		w3 = u3, x3		C			M I
+	br.cloop.dptk	.Loop			C			B
+	;;
+C *** MAIN LOOP END ***
+
+.Lskip:	st8		[rp] = w1, 8		C			M23
+	shrp		x0 = v0, v3, 63		C			I0
+   (p8)	cmp.eq.or	p6, p0 = LIM, w2	C			M I
+   (p8)	add		w2 = INCR, w2		C			M I
+	cmp.PRED	p8, p0 = w3, u3		C			M I
+	;;
+.Lcj5:	shrp		x1 = v1, v0, 63		C			I0
+   (p6)	cmp.eq.or	p8, p0 = LIM, w3	C			M I
+   (p6)	add		w3 = INCR, w3		C			M I
+	ADDSUB		w0 = u0, x0		C			M I
+	;;
+	st8		[rp] = w2, 8		C			M23
+	cmp.PRED	p6, p0 = w0, u0		C			M I
+	ADDSUB		w1 = u1, x1		C			M I
+	;;
+.Lcj4:	st8		[rp] = w3, 8		C			M23
+	shrp		x2 = v2, v1, 63		C			I0
+   (p8)	cmp.eq.or	p6, p0 = LIM, w0	C			M I
+   (p8)	add		w0 = INCR, w0		C			M I
+	cmp.PRED	p8, p0 = w1, u1		C			M I
+	;;
+.Lcj3:	shr.u		r8 = v2, 63		C			I0
+   (p6)	cmp.eq.or	p8, p0 = LIM, w1	C			M I
+   (p6)	add		w1 = INCR, w1		C			M I
+	ADDSUB		w2 = u2, x2		C			M I
+	;;
+	st8		[rp] = w0, 8		C			M23
+	cmp.PRED	p6, p0 = w2, u2		C			M I
+	;;
+.Lcj2:	st8		[rp] = w1, 8		C			M23
+   (p8)	cmp.eq.or	p6, p0 = LIM, w2	C			M I
+   (p8)	add		w2 = INCR, w2		C			M I
+	;;
+.Lcj1:	st8		[rp] = w2, 8		C			M23
+	mov.i		ar.lc = r2		C			I0
+   (p6)	add		r8 = 1, r8		C			M I
+	br.ret.sptk.many b0			C			B
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/ia64/aorsorrlsh1_n.asm b/gmp/mpn/ia64/aorsorrlsh1_n.asm
deleted file mode 100644
index 9b58b9e11f..0000000000
--- a/gmp/mpn/ia64/aorsorrlsh1_n.asm
+++ /dev/null
@@ -1,48 +0,0 @@
-dnl  IA-64 mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsblsh1_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      3.0
-C Itanium 2:    1.5
-
-
-define(LSH,		1)
-
-ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
-
-include_mpn(`ia64/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/ia64/aorsorrlsh2_n.asm b/gmp/mpn/ia64/aorsorrlsh2_n.asm
deleted file mode 100644
index 39b384a91b..0000000000
--- a/gmp/mpn/ia64/aorsorrlsh2_n.asm
+++ /dev/null
@@ -1,48 +0,0 @@
-dnl  IA-64 mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      3.0
-C Itanium 2:    1.5
-
-
-define(LSH,		2)
-
-ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
-
-include_mpn(`ia64/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/ia64/aorsorrlshC_n.asm b/gmp/mpn/ia64/aorsorrlshC_n.asm
deleted file mode 100644
index d327838402..0000000000
--- a/gmp/mpn/ia64/aorsorrlshC_n.asm
+++ /dev/null
@@ -1,397 +0,0 @@
-dnl  IA-64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C           cycles/limb
-C Itanium:      ?
-C Itanium 2:    1.5
-
-C TODO
-C  * Use shladd in feed-in code (for mpn_addlshC_n).
-C  * Rewrite loop to schedule loads closer to use, since we do prefetch.
-
-C INPUT PARAMETERS
-define(`rp', `r32')
-define(`up', `r33')
-define(`vp', `r34')
-define(`n',  `r35')
-
-ifdef(`DO_add', `
-  define(`ADDSUB',     `add	$1 = $2, $3')
-  define(`CMP',        `cmp.ltu	$1,p0 = $2, $3')
-  define(`INCR',       1)
-  define(`LIM',        -1)
-  define(`func',        mpn_addlsh`'LSH`'_n)')
-ifdef(`DO_sub', `
-  define(`ADDSUB',     `sub	$1 = $2, $3')
-  define(`CMP',        `cmp.gtu	$1,p0 = $2, $3')
-  define(`INCR',       -1)
-  define(`LIM',        0)
-  define(`func',        mpn_sublsh`'LSH`'_n)')
-ifdef(`DO_rsb', `
-  define(`ADDSUB',     `sub	$1 = $3, $2')
-  define(`CMP',        `cmp.gtu	$1,p0 = $2, $4')
-  define(`INCR',       -1)
-  define(`LIM',        0)
-  define(`func',        mpn_rsblsh`'LSH`'_n)')
-
-define(cmpeqor, `cmp.eq.or')
-define(PFDIST, 500)
-
-define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
-define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
-define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
-define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
-define(`x0',`r30') define(`x1',`r31') define(`x2',`r3')  define(`x3',`r9')
-
-C r3 r8 r9 r10 r11
-
-ASM_START()
-PROLOGUE(func)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',`
-	addp4	rp = 0, rp		C			M I
-	addp4	up = 0, up		C			M I
-	nop.i	0
-	addp4	vp = 0, vp		C			M I
-	nop.m	0
-	zxt4	n = n			C			I
-	;;
-')
- {.mmi;	ld8	r11 = [vp], 8		C			M01
-	ld8	r10 = [up], 8		C			M01
-	mov.i	r2 = ar.lc		C			I0
-}{.mmi;	and	r14 = 3, n		C			M I
-	cmp.lt	p15, p0 = 4, n		C			M I
-	add	n = -5, n		C			M I
-	;;
-}{.mmi;	cmp.eq	p6, p0 = 1, r14		C			M I
-	cmp.eq	p7, p0 = 2, r14		C			M I
-	cmp.eq	p8, p0 = 3, r14		C			M I
-}{.bbb
-  (p6)	br.dptk	.Lb01			C			B
-  (p7)	br.dptk	.Lb10			C			B
-  (p8)	br.dptk	.Lb11			C			B
-}
-
-.Lb00:	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	shr.u	n = n, 2		C			I0
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shl	x3 = r11, LSH		C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shrp	x0 = v0, r11, 64-LSH	C			I0
-.mmb;	ADDSUB(	w3, r10, x3)		C			M I
-	nop	0
-  (p15)	br.dpnt	.grt4			C			B
-	;;
-.mii;	CMP(	p7, w3, r10, x3)	C			M II0
-	shrp	x1 = v1, v0, 64-LSH	C			I0
-	ADDSUB(	w0, u0, x0)		C			M I
-	;;
-.mii;	CMP(	p8, w0, u0, x0)		C			M I
-	shrp	x2 = v2, v1, 64-LSH	C			I0
-	ADDSUB(	w1, u1, x1)		C			M I
-.mmb;	nop	0
-	nop	0
-	br	.Lcj4			C			B
-
-ALIGN(32)
-.grt4:	ld8	v3 = [vp], 8		C			M01
-	shrp	x0 = v0, r11, 64-LSH	C			I0
-	CMP(	p8, w3, r10, x3)	C			M I
-	;;
-.mmi;	ld8	u3 = [up], 8		C			M01
-	add	r11 = PFDIST, vp
-	shrp	x1 = v1, v0, 64-LSH	C			I0
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ADDSUB(	w0, u0, x0)		C			M I
-	nop	0
-	;;
-.mmi;	CMP(	p6, w0, u0, x0)		C			M I
-	add	r10 = PFDIST, up
-	mov.i	ar.lc = n		C			I0
-.mmb;	ADDSUB(	w1, u1, x1)		C			M I
-	ld8	u0 = [up], 8		C			M01
-	br	.LL00			C			B
-
-
-	ALIGN(32)
-.Lb01:
-ifdef(`DO_add',
-`	shladd	w2 = r11, LSH, r10	C			M I
-	shr.u	r8 = r11, 64-LSH	C retval		I0
-  (p15)	br.dpnt	.grt1			C			B
-	;;
-',`
-	shl	x2 = r11, LSH		C			I0
-  (p15)	br.dpnt	.grt1			C			B
-	;;
-	ADDSUB(	w2, r10, x2)		C			M I
-	shr.u	r8 = r11, 64-LSH	C retval		I0
-	;;
-')
-	CMP(	p6, w2, r10, x2)	C			M I
-	br		.Lcj1
-
-.grt1:	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	shr.u	n = n, 2		C			I0
-	;;
-	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	mov.i	ar.lc = n		C FIXME swap with next	I0
-ifdef(`DO_add',
-`',`
-	ADDSUB(	w2, r10, x2)
-')
-	;;
-.mmi;	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shrp	x3 = v3, r11, 64-LSH	C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shrp	x0 = v0, v3, 64-LSH	C			I0
-.mmb;	CMP(	p6, w2, r10, x2)	C			M I
-	ADDSUB(	w3, u3, x3)		C			M I
-	br.cloop.dptk	.grt5		C			B
-	;;
-.mmi;	CMP(	p7, w3, u3, x3)		C			M I
-	ADDSUB(	w0, u0, x0)		C			M I
-	shrp	x1 = v1, v0, 64-LSH	C			I0
-.mmb;	nop	0
-	nop	0
-	br	.Lcj5			C			B
-.grt5:
-.mmi;	add	r10 = PFDIST, up
-	add	r11 = PFDIST, vp
-	shrp	x0 = v0, v3, 64-LSH	C			I0
-.mmb;	ld8	v3 = [vp], 8		C			M01
-	CMP(	p8, w3, u3, x3)		C			M I
-	br	.LL01			C			B
-
-	ALIGN(32)
-.Lb10:	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shl	x1 = r11, LSH		C			I0
-.mmb;	nop	0
-	nop	0
-  (p15)	br.dpnt	.grt2			C			B
-	;;
-.mmi;	ADDSUB(	w1, r10, x1)		C			M I
-	nop	0
-	shrp	x2 = v2, r11, 64-LSH	C			I0
-	;;
-.mmi;	CMP(	p9, w1, r10, x1)	C			M I
-	ADDSUB(	w2, u2, x2)		C			M I
-	shr.u	r8 = v2, 64-LSH		C retval		I0
-	;;
-.mmb;	CMP(	p6, w2, u2, x2)		C			M I
-	nop	0
-	br	.Lcj2			C			B
-
-.grt2:	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	shr.u	n = n, 2		C			I0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	ld8	u0 = [up], 8		C			M01
-	mov.i	ar.lc = n		C			I0
-.mmi;	ADDSUB(	w1, r10, x1)		C			M I
-	nop	0
-	nop	0
-	;;
-.mii;	ld8	v1 = [vp], 8		C			M01
-	shrp	x2 = v2, r11, 64-LSH	C			I0
-	CMP(	p8, w1, r10, x1)	C			M I
-	;;
-.mmi;	add	r10 = PFDIST, up
-	ld8	u1 = [up], 8		C			M01
-	shrp	x3 = v3, v2, 64-LSH	C			I0
-.mmi;	add	r11 = PFDIST, vp
-	ld8	v2 = [vp], 8		C			M01
-	ADDSUB(	w2, u2, x2)		C			M I
-	;;
-.mmi;	CMP(	p6, w2, u2, x2)		C			M I
-	ld8	u2 = [up], 8		C			M01
-	shrp	x0 = v0, v3, 64-LSH	C			I0
-.mbb;	ADDSUB(	w3, u3, x3)		C			M I
-	br.cloop.dpnt	L(top)		C			B
-	br	L(end)			C			B
-
-.Lb11:	ld8	v1 = [vp], 8		C			M01
-	ld8	u1 = [up], 8		C			M01
-	shl	x0 = r11, LSH		C			I0
-	;;
-.mmi;	ld8	v2 = [vp], 8		C			M01
-	ld8	u2 = [up], 8		C			M01
-	shr.u	n = n, 2		C			I0
-.mmb;	nop	0
-	nop	0
-  (p15)	br.dpnt	.grt3			C			B
-	;;
-.mii;	nop	0
-	shrp	x1 = v1, r11, 64-LSH	C			I0
-	ADDSUB(	w0, r10, x0)		C			M I
-	;;
-.mii;	CMP(	p8, w0, r10, x0)	C			M I
-	shrp	x2 = v2, v1, 64-LSH	C			I0
-	ADDSUB(	w1, u1, x1)		C			M I
-	;;
-.mmb;	CMP(	p9, w1, u1, x1)		C			M I
-	ADDSUB(	w2, u2, x2)		C			M I
-	br	.Lcj3			C			B
-.grt3:
-.mmi;	ld8	v3 = [vp], 8		C			M01
-	ld8	u3 = [up], 8		C			M01
-	shrp	x1 = v1, r11, 64-LSH	C			I0
-.mmi;	ADDSUB(	w0, r10, x0)		C			M I
-	nop	0
-	nop	0
-	;;
-.mmi;	ld8	v0 = [vp], 8		C			M01
-	CMP(	p6, w0, r10, x0)	C			M I
-	mov.i	ar.lc = n		C			I0
-.mmi;	ld8	u0 = [up], 8		C			M01
-	ADDSUB(	w1, u1, x1)		C			M I
-	nop	0
-	;;
-.mmi;	add	r10 = PFDIST, up
-	add	r11 = PFDIST, vp
-	shrp	x2 = v2, v1, 64-LSH	C			I0
-.mmb;	ld8	v1 = [vp], 8		C			M01
-	CMP(	p8, w1, u1, x1)		C			M I
-	br	.LL11			C			B
-
-
-C *** MAIN LOOP START ***
-	ALIGN(32)
-L(top):	st8	[rp] = w1, 8		C			M23
-	lfetch	[r10], 32
-   (p8)	cmpeqor	p6, p0 = LIM, w2	C			M I
-   (p8)	add	w2 = INCR, w2		C			M I
-	ld8	v3 = [vp], 8		C			M01
-	CMP(	p8, w3, u3, x3)		C			M I
-	;;
-.LL01:	ld8	u3 = [up], 8		C			M01
-	shrp	x1 = v1, v0, 64-LSH	C			I0
-   (p6)	cmpeqor	p8, p0 = LIM, w3	C			M I
-   (p6)	add	w3 = INCR, w3		C			M I
-	ld8	v0 = [vp], 8		C			M01
-	ADDSUB(	w0, u0, x0)		C			M I
-	;;
-	st8	[rp] = w2, 8		C			M23
-	CMP(	p6, w0, u0, x0)		C			M I
-	nop.b	0
-	ld8	u0 = [up], 8		C			M01
-	lfetch	[r11], 32
-	ADDSUB(	w1, u1, x1)		C			M I
-	;;
-.LL00:	st8	[rp] = w3, 8		C			M23
-	shrp	x2 = v2, v1, 64-LSH	C			I0
-   (p8)	cmpeqor	p6, p0 = LIM, w0	C			M I
-   (p8)	add	w0 = INCR, w0		C			M I
-	ld8	v1 = [vp], 8		C			M01
-	CMP(	p8, w1, u1, x1)		C			M I
-	;;
-.LL11:	ld8	u1 = [up], 8		C			M01
-	shrp	x3 = v3, v2, 64-LSH	C			I0
-   (p6)	cmpeqor	p8, p0 = LIM, w1	C			M I
-   (p6)	add	w1 = INCR, w1		C			M I
-	ld8	v2 = [vp], 8		C			M01
-	ADDSUB(	w2, u2, x2)		C			M I
-	;;
-.mmi;	st8	[rp] = w0, 8		C			M23
-	CMP(	p6, w2, u2, x2)		C			M I
-	shrp	x0 = v0, v3, 64-LSH	C			I0
-	ld8	u2 = [up], 8		C			M01
-	ADDSUB(	w3, u3, x3)		C			M I
-	br.cloop.dptk	L(top)		C			B
-	;;
-C *** MAIN LOOP END ***
-
-L(end):
-.mmi;	st8	[rp] = w1, 8		C			M23
-   (p8)	cmpeqor	p6, p0 = LIM, w2	C			M I
-	shrp	x1 = v1, v0, 64-LSH	C			I0
-.mmi;
-   (p8)	add	w2 = INCR, w2		C			M I
-	CMP(	p7, w3, u3, x3)		C			M I
-	ADDSUB(	w0, u0, x0)		C			M I
-	;;
-.Lcj5:
-.mmi;	st8	[rp] = w2, 8		C			M23
-   (p6)	cmpeqor	p7, p0 = LIM, w3	C			M I
-	shrp	x2 = v2, v1, 64-LSH	C			I0
-.mmi;
-   (p6)	add	w3 = INCR, w3		C			M I
-	CMP(	p8, w0, u0, x0)		C			M I
-	ADDSUB(	w1, u1, x1)		C			M I
-	;;
-.Lcj4:
-.mmi;	st8	[rp] = w3, 8		C			M23
-   (p7)	cmpeqor	p8, p0 = LIM, w0	C			M I
-	mov.i	ar.lc = r2		C			I0
-.mmi;
-   (p7)	add	w0 = INCR, w0		C			M I
-	CMP(	p9, w1, u1, x1)		C			M I
-	ADDSUB(	w2, u2, x2)		C			M I
-	;;
-.Lcj3:
-.mmi;	st8	[rp] = w0, 8		C			M23
-   (p8)	cmpeqor	p9, p0 = LIM, w1	C			M I
-	shr.u	r8 = v2, 64-LSH		C			I0
-.mmi;
-   (p8)	add	w1 = INCR, w1		C			M I
-	CMP(	p6, w2, u2, x2)		C			M I
-	nop	0
-	;;
-.Lcj2:
-.mmi;	st8	[rp] = w1, 8		C			M23
-   (p9)	cmpeqor	p6, p0 = LIM, w2	C			M I
-   (p9)	add	w2 = INCR, w2		C			M I
-	;;
-.Lcj1:
-.mmb;	st8	[rp] = w2		C			M23
-ifdef(`DO_rsb',`
-   (p6)	add	r8 = -1, r8		C			M I
-',`
-   (p6)	add	r8 = 1, r8		C			M I
-')	br.ret.sptk.many b0		C			B
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/ia64/bdiv_dbm1c.asm b/gmp/mpn/ia64/bdiv_dbm1c.asm
index 47e4553cda..6ff4fdaaf9 100644
--- a/gmp/mpn/ia64/bdiv_dbm1c.asm
+++ b/gmp/mpn/ia64/bdiv_dbm1c.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_bdiv_dbm1.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
 dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ia64/cnd_aors_n.asm b/gmp/mpn/ia64/cnd_aors_n.asm
deleted file mode 100644
index dc4a937403..0000000000
--- a/gmp/mpn/ia64/cnd_aors_n.asm
+++ /dev/null
@@ -1,259 +0,0 @@
-dnl  IA-64 mpn_cnd_add_n/mpn_cnd_sub_n.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      ?
-C Itanium 2:    1.5
-
-C INPUT PARAMETERS
-define(`cnd', `r32')
-define(`rp',  `r33')
-define(`up',  `r34')
-define(`vp',  `r35')
-define(`n',   `r36')
-
-ifdef(`OPERATION_cnd_add_n',`
-  define(ADDSUB,	add)
-  define(CND,		ltu)
-  define(INCR,		1)
-  define(LIM,		-1)
-  define(func,    mpn_cnd_add_n)
-')
-ifdef(`OPERATION_cnd_sub_n',`
-  define(ADDSUB,	sub)
-  define(CND,		gtu)
-  define(INCR,		-1)
-  define(LIM,		0)
-  define(func,    mpn_cnd_sub_n)
-')
-
-define(cmpeqor, `cmp.eq.or')
-define(PFDIST, 160)
-
-C Some useful aliases for registers we use
-define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
-define(`x0',`r20') define(`x1',`r21') define(`x2',`r22') define(`x3',`r23')
-define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
-define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
-define(`up1',`up') define(`up2',`r8') define(`upadv',`r1')
-define(`vp1',`vp') define(`vp2',`r9') define(`vpadv',`r11')
-define(`rp1',`rp') define(`rp2',`r10')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',`
-	addp4	rp = 0, rp		C				M I
-	addp4	up = 0, up		C				M I
-	nop.i	0
-	addp4	vp = 0, vp		C				M I
-	nop.m	0
-	zxt4	n = n			C				I
-	;;
-')
-.mmi;	and	r3 = 3, n		C				M I
-	add	n = -1, n		C				M I
-	mov	r2 = ar.lc		C				I0
-.mmi;	cmp.ne	p6, p7 = 0, cnd		C				M I
-	add	vp2 = 8, vp		C				M I
-	add	up2 = 8, up		C				M I
-	;;
-.mmi;	add	upadv = PFDIST, up	C				M I
-	add	vpadv = PFDIST, vp	C				M I
-	shr.u	n = n, 2		C				I0
-	.pred.rel "mutex", p6, p7
-.mmi;	add	rp2 = 8, rp		C				M I
-   (p6)	mov	cnd = -1		C				M I
-   (p7)	mov	cnd = 0			C				M I
-	;;
-	cmp.eq	p9, p0 = 1, r3		C				M I
-	cmp.eq	p7, p0 = 2, r3		C				M I
-	cmp.eq	p8, p0 = 3, r3		C				M I
-   (p9)	br	L(b1)			C				B
-   (p7)	br	L(b2)			C				B
-   (p8)	br	L(b3)			C				B
-	;;
-L(b0):
-.mmi;	ld8	v2 = [vp1], 16		C				M01
-	ld8	v3 = [vp2], 16		C				M01
-	mov	ar.lc = n		C				I0
-	;;
-	ld8	u2 = [up1], 16		C				M01
-	ld8	u3 = [up2], 16		C				M01
-	and	x2 = v2, cnd		C				M I
-	and	x3 = v3, cnd		C				M I
-	;;
-	ADDSUB	w2 = u2, x2		C				M I
-	ADDSUB	w3 = u3, x3		C				M I
-	;;
-	ld8	v0 = [vp1], 16		C				M01
-	ld8	v1 = [vp2], 16		C				M01
-	cmp.CND	p8, p0 = w2, u2		C				M I
-	cmp.CND	p9, p0 = w3, u3		C				M I
-	br	L(lo0)
-
-L(b1):	ld8	v1 = [vp1], 8		C				M01
-	add	vp2 = 8, vp2		C				M I
-	add	rp2 = 8, rp2		C				M I
-	;;
-	ld8	u1 = [up1], 8		C				M01
-	add	up2 = 8, up2		C				M I
-	and	x1 = v1, cnd		C				M I
-	;;
-	ADDSUB	w1 = u1, x1		C				M I
-	cmp.ne	p10, p0 = 0, n
-	add	n = -1, n
-	;;
-	cmp.CND	p7, p0 = w1, u1		C				M I
-	st8	[rp1] = w1, 8		C				M23
-  (p10)	br	L(b0)
-	;;
-	mov	r8 = 0			C				M I
-	br	L(e1)
-
-L(b3):	ld8	v3 = [vp1], 8		C				M01
-	add	vp2 = 8, vp2		C				M I
-	add	rp2 = 8, rp2		C				M I
-	;;
-	ld8	u3 = [up1], 8		C				M01
-	add	up2 = 8, up2		C				M I
-	and	x3 = v3, cnd		C				M I
-	;;
-	ADDSUB	w3 = u3, x3		C				M I
-	;;
-	cmp.CND	p9, p0 = w3, u3		C				M I
-	st8	[rp1] = w3, 8		C				M23
-	C fall through
-
-L(b2):
-.mmi;	ld8	v0 = [vp1], 16		C				M01
-	ld8	v1 = [vp2], 16		C				M01
-	mov	ar.lc = n		C				I0
-	;;
-	ld8	u0 = [up1], 16		C				M01
-	ld8	u1 = [up2], 16		C				M01
-	and	x0 = v0, cnd		C				M I
-	and	x1 = v1, cnd		C				M I
-	;;
-	ADDSUB	w0 = u0, x0		C				M I
-	ADDSUB	w1 = u1, x1		C				M I
-	br.cloop.dptk	L(gt2)		C				B
-	;;
-	cmp.CND	p6, p0 = w0, u0		C				M I
-	br		L(e2)		C				B
-L(gt2):
-	ld8	v2 = [vp1], 16		C				M01
-	ld8	v3 = [vp2], 16		C				M01
-	cmp.CND	p6, p0 = w0, u0		C				M I
-	cmp.CND	p7, p0 = w1, u1		C				M I
-	br		L(lo2)		C				B
-
-
-C *** MAIN LOOP START ***
-C	ALIGN(32)
-L(top):
-.mmi;	ld8	v2 = [vp1], 16		C				M01
-	ld8	v3 = [vp2], 16		C				M01
-	cmp.CND	p6, p0 = w0, u0		C				M I
-.mmi;	st8	[rp1] = w2, 16		C				M23
-	st8	[rp2] = w3, 16		C				M23
-	cmp.CND	p7, p0 = w1, u1		C				M I
-	;;
-L(lo2):
-.mmi;	ld8	u2 = [up1], 16		C				M01
-	ld8	u3 = [up2], 16		C				M01
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C				M I
-.mmi;	and	x2 = v2, cnd		C				M I
-	and	x3 = v3, cnd		C				M I
-   (p9)	add	w0 = INCR, w0		C				M I
-	;;
-.mmi;	ADDSUB	w2 = u2, x2		C				M I
-   (p6)	cmpeqor	p7, p0 = LIM, w1	C				M I
-   (p6)	add	w1 = INCR, w1		C				M I
-.mmi;	ADDSUB	w3 = u3, x3		C				M I
-	lfetch	[upadv], 32
-	nop	0
-	;;
-.mmi;	ld8	v0 = [vp1], 16		C				M01
-	ld8	v1 = [vp2], 16		C				M01
-	cmp.CND	p8, p0 = w2, u2		C				M I
-.mmi;	st8	[rp1] = w0, 16		C				M23
-	st8	[rp2] = w1, 16		C				M23
-	cmp.CND	p9, p0 = w3, u3		C				M I
-	;;
-L(lo0):
-.mmi;	ld8	u0 = [up1], 16		C				M01
-	ld8	u1 = [up2], 16		C				M01
-   (p7)	cmpeqor	p8, p0 = LIM, w2	C				M I
-.mmi;	and	x0 = v0, cnd		C				M I
-	and	x1 = v1, cnd		C				M I
-   (p7)	add	w2 = INCR, w2		C				M I
-	;;
-.mmi;	ADDSUB	w0 = u0, x0		C				M I
-   (p8)	cmpeqor	p9, p0 = LIM, w3	C				M I
-   (p8)	add	w3 = INCR, w3		C				M I
-.mmb;	ADDSUB	w1 = u1, x1		C				M I
-	lfetch	[vpadv], 32
-	br.cloop.dptk	L(top)		C				B
-	;;
-C *** MAIN LOOP END ***
-
-
-L(end):
-.mmi;	st8	[rp1] = w2, 16		C				M23
-	st8	[rp2] = w3, 16		C				M23
-	cmp.CND	p6, p0 = w0, u0		C				M I
-	;;
-L(e2):
-.mmi;	cmp.CND	p7, p0 = w1, u1		C				M I
-   (p9)	cmpeqor	p6, p0 = LIM, w0	C				M I
-   (p9)	add	w0 = INCR, w0		C				M I
-	;;
-.mmi;	mov	r8 = 0			C				M I
-   (p6)	cmpeqor	p7, p0 = LIM, w1	C				M I
-   (p6)	add	w1 = INCR, w1		C				M I
-	;;
-.mmi;	st8	[rp1] = w0, 16		C				M23
-	st8	[rp2] = w1, 16		C				M23
-	mov	ar.lc = r2		C				I0
-L(e1):
-.mmb;	nop	0
-   (p7)	mov	r8 = 1			C				M I
-	br.ret.sptk.many b0		C				B
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/ia64/copyd.asm b/gmp/mpn/ia64/copyd.asm
index b94a1af362..759629e4a7 100644
--- a/gmp/mpn/ia64/copyd.asm
+++ b/gmp/mpn/ia64/copyd.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_copyd -- copy limb vector, decrementing.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
 dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ia64/copyi.asm b/gmp/mpn/ia64/copyi.asm
index 49ed192021..11451dc08d 100644
--- a/gmp/mpn/ia64/copyi.asm
+++ b/gmp/mpn/ia64/copyi.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_copyi -- copy limb vector, incrementing.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
 dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ia64/dive_1.asm b/gmp/mpn/ia64/dive_1.asm
index 5e4a273530..9b9d085c0c 100644
--- a/gmp/mpn/ia64/dive_1.asm
+++ b/gmp/mpn/ia64/dive_1.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_divexact_1 -- mpn by limb exact division.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
-
-dnl  Copyright 2003-2005, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -49,7 +36,7 @@ C This code is a bit messy, and not as similar to mode1o.asm as desired.
 
 C The critical path during initialization is for computing the inverse of the
 C divisor.  Since odd divisors are probably common, we conditionally execute
-C the initial count_trailing_zeros code and the downshift.
+C the initial count_traling_zeros code and the downshift.
 
 C Possible improvement: Merge more of the feed-in code into the inverse
 C computation.
@@ -190,28 +177,22 @@ ifdef(`HAVE_ABI_32',
 	ld8		r21 = [up], 8
 	br		.Lent
 
-.Ltop:	ld8		r21 = [up], 8
+.Loop:	ld8		r21 = [up], 8
 	xma.l		f12 = f9, f8, f10	C q = c * -inverse + si
-	nop.b		0
 	;;
 .Lent:	add		r16 = 160, up
 	shl		r22 = r21, lshift
-	nop.b		0
 	;;
 	stf8		[rp] = f12, 8
 	xma.hu		f9 = f12, f6, f9	C c = high(q * divisor + c)
-	nop.b		0
-	nop.m		0
 	xmpy.l		f10 = f11, f7		C si = ulimb * inverse
-	nop.b		0
 	;;
 	or		r31 = r22, r23
 	shr.u		r23 = r21, rshift
-	nop.b		0
 	;;
 	lfetch		[r16]
 	setf.sig	f11 = r31
-	br.cloop.sptk.few.clr .Ltop
+	br.cloop.sptk.few.clr .Loop
 
 
 	xma.l		f12 = f9, f8, f10	C q = c * -inverse + si
diff --git a/gmp/mpn/ia64/divrem_1.asm b/gmp/mpn/ia64/divrem_1.asm
index e8878209db..aa50ac902b 100644
--- a/gmp/mpn/ia64/divrem_1.asm
+++ b/gmp/mpn/ia64/divrem_1.asm
@@ -1,35 +1,22 @@
 dnl  IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
 dnl  unnormalized limb.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
 dnl  Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ia64/divrem_2.asm b/gmp/mpn/ia64/divrem_2.asm
index 9864311278..da3e9d64b7 100644
--- a/gmp/mpn/ia64/divrem_2.asm
+++ b/gmp/mpn/ia64/divrem_2.asm
@@ -1,45 +1,59 @@
-dnl  IA-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
+dnl  IA-64 mpn_divrem_2 -- Divide an n-limb number by a 2-limb number.
 
-dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C               norm   frac
-C itanium 1
-C itanium 2     29     29
+C         cycles/limb
+C Itanium:    63
+C Itanium 2:  46
 
 
 C TODO
-C  * Inline and interleave limb inversion code with loop setup code.
-C  * We should use explicit bundling in much of the code, since it typically
-C    cuts some cycles with the GNU assembler.
+C  * Further optimize the loop.  We could probably do some more trickery with
+C    arithmetic in the FPU, or perhaps use a non-zero addend of xma in more
+C    places.
+C  * Software pipeline for perhaps 5 saved cycles, around the end and start of
+C    the loop.
+C  * Schedule code outside of loop better.
+C  * Update the comments.  They are now using the same name for the same
+C    logical quantity.
+C  * Handle conditional zeroing of r31 in loop more cleanly.
+C  * Inline mpn_invert_limb and schedule its insns across the entire init code.
+C  * Ultimately, use 2-limb, or perhaps 3-limb or 4-limb inverse.
+
+define(`qp',`r32')
+define(`qxn',`r33')
+define(`np',`r34')
+define(`nn',`r35')
+define(`dp',`r36')
 
+define(`fnh',`f11')
+define(`fminus1',`f10')
+define(`fd0',`f13')
+define(`fd1',`f14')
+define(`d0',`r39')
+define(`d1',`r36')
+define(`fnl',`f32')
+define(`fdinv',`f12')
+
+define(`R1',`r38') define(`R0',`r37')
+define(`P1',`r28') define(`P0',`r27')
 
 ASM_START()
 
@@ -47,234 +61,204 @@ C HP's assembler requires these declarations for importing mpn_invert_limb
 	.global	mpn_invert_limb
 	.type	mpn_invert_limb,@function
 
-C INPUT PARAMETERS
-C qp   = r32
-C fn   = r33
-C np   = r34
-C nn   = r35
-C dp   = r36
-
-define(`f0x1', `f15')
-
-ASM_START()
 PROLOGUE(mpn_divrem_2)
 	.prologue
-ifdef(`HAVE_ABI_32',
-`	addp4		r32 = 0, r32		C M I
-	addp4		r34 = 0, r34		C M I
-	zxt4		r35 = r35		C I
-	addp4		r36 = 0, r36		C M I
-	nop.m		0
-	zxt4		r33 = r33		C I
-	;;
-')
 	.save ar.pfs, r42
-	alloc	 r42 = ar.pfs, 5, 9, 1, 0
-	shladd	 r34 = r35, 3, r34
-	adds	 r14 = 8, r36
-	mov	 r43 = r1
-	;;
-	adds	 r15 = -8, r34
-	ld8	 r39 = [r14]
-	.save ar.lc, r45
-	mov	 r45 = ar.lc
-	adds	 r14 = -16, r34
-	mov	 r40 = r0
-	adds	 r34 = -24, r34
-	;;
-	ld8	 r38 = [r15]
+	.save ar.lc, r44
 	.save rp, r41
-	mov	 r41 = b0
-	.body
-	ld8	 r36 = [r36]
-	ld8	 r37 = [r14]
-	;;
-	cmp.gtu	 p6, p7 = r39, r38
-  (p6)	br.cond.dptk .L8
-	;;
-	cmp.leu	 p8, p9 = r36, r37
-	cmp.geu	 p6, p7 = r39, r38
-	;;
-  (p8)	cmp4.ne.and.orcm p6, p7 = 0, r0
-  (p7)	br.cond.dptk .L51
-.L8:
-	add	 r14 = r33, r35		// un + fn
-	mov	 r46 = r39		// argument to mpn_invert_limb
-	;;
-	adds	 r35 = -3, r14
-	;;
-	cmp.gt	 p12, p0 = r0, r35
-  (p12)	br.cond.dpnt L(end)
-	br.call.sptk.many b0 = mpn_invert_limb
-	;;
-	setf.sig f11 = r8		// di (non-final)
-	setf.sig f34 = r39		// d1
-	setf.sig f33 = r36		// d0
-	mov	 r1 = r43
-	;;
-	mov	 r17 = 1
-	setf.sig f9 = r38		// n2
-	xma.l	 f6 = f11, f34, f0	// t0 = LO(di * d1)
-	;;
-	setf.sig f10 = r37		// n1
-	setf.sig f15 = r17		// 1
-	xma.hu	 f8 = f11, f33, f0	// s0 = HI(di * d0)
-	;;
-	getf.sig r17 = f6
-	getf.sig r16 = f8
-	mov	 ar.lc = r35
-	;;
-	sub	 r18 = r0, r39		// -d1
-	add	 r14 = r17, r36
-	;;
-	setf.sig f14 = r18		// -d1
-	cmp.leu	 p8, p9 = r17, r14
-	add	 r16 = r14, r16
-	;;
-  (p9)	adds	 r19 = 0, r0
-  (p8)	adds	 r19 = -1, r0
-	cmp.gtu	 p6, p7 = r14, r16
-	;;
-  (p6)	adds	 r19 = 1, r19
-	;;
-ifelse(1,1,`
-	cmp.gt	 p7, p6 = r0, r19
-	;;
-  (p6)	adds	 r8 = -1, r8		// di--
-  (p6)	sub	 r14 = r16, r39		// t0 -= d1
-  (p6)	cmp.ltu	 p6, p7 = r16, r39	// cy for: t0 - d1
-	;;
-  (p6)	cmp.gt	 p9, p8 = 1, r19
-  (p7)	cmp.gt	 p9, p8 = 0, r19
-  (p6)	adds	 r19 = -1, r19		// t1 -= cy
-	mov	 r16 = r14
-	;;
-  (p8)	adds	 r8 = -1, r8		// di--
-  (p8)	sub	 r14 = r16, r39		// t0 -= d1
-  (p8)	cmp.ltu	 p8, p9 = r16, r39	// cy for: t0 - d1
-	;;
-  (p8)	cmp.gt	 p7, p6 = 1, r19
-  (p9)	cmp.gt	 p7, p6 = 0, r19
-  (p8)	adds	 r19 = -1, r19		// t1 -= cy
-	mov	 r16 = r14
-	;;
-  (p6)	adds	 r8 = -1, r8		// di--
-  (p6)	sub	 r14 = r16, r39		// t0 -= d1
-  (p6)	cmp.ltu	 p6, p7 = r16, r39	// cy for: t0 - d1
-	;;
-  (p6)	cmp.gt	 p9, p8 = 1, r19
-  (p7)	cmp.gt	 p9, p8 = 0, r19
-  (p6)	adds	 r19 = -1, r19		// t1 -= cy
-	mov	 r16 = r14
-	;;
-  (p8)	adds	 r8 = -1, r8		// di--
-  (p8)	sub	 r14 = r16, r39		// t0 -= d1
-  (p8)	cmp.ltu	 p8, p9 = r16, r39	// cy for: t0 - d1
-	;;
-  (p8)	adds	 r19 = -1, r19		// t1 -= cy
-	mov	 r16 = r14
-',`
-	cmp.gt	 p8, p9 = r0, r19
-  (p8)	br.cond.dpnt .L46
-.L52:
-	cmp.leu	 p6, p7 = r39, r16
-	sub	 r14 = r16, r39
-	adds	 r8 = -1, r8
-	;;
-  (p7)	adds	 r19 = -1, r19
-	mov	 r16 = r14
+ifdef(`HAVE_ABI_32',
+`	addp4		qp = 0, qp		C M I
+	addp4		np = 0, np		C M I
+	addp4		dp = 0, dp		C M I
+	zxt4		nn = nn			C I
+	zxt4		qxn = qxn		C I
 	;;
-  (p7)	cmp.gt	 p8, p9 = r0, r19
-  (p9)	br.cond.dptk .L52
-.L46:
 ')
-	setf.sig f32 = r8		// di
-	shladd	 r32 = r35, 3, r32
-	;;
 
-	ALIGN(16)
-L(top):	nop 0
-	nop 0
-	cmp.gt	 p8, p9 = r33, r35
-	;;
- (p8)	mov	 r37 = r0
- (p9)	ld8	 r37 = [r34], -8
-	xma.hu	 f8 = f9, f32, f10	//				0,29
-	xma.l	 f12 = f9, f32, f10	//				0
-	;;
-	getf.sig r20 = f12		// q0				4
-	xma.l	 f13 = f15, f8, f9	// q += n2			4
-	sub	 r8 = -1, r36		// bitnot d0
-	;;
-	getf.sig r18 = f13		//				8
-	xma.l	 f7 = f14, f13, f10	//				8
-	xma.l	 f6 = f33, f13, f33	// t0 = LO(d0*q+d0)		8
-	xma.hu	 f9 = f33, f13, f33	// t1 = HI(d0*q+d0)		9
-	;;
-	getf.sig r38 = f7		// n1				12
-	getf.sig r16 = f6		//				13
-	getf.sig r19 = f9		//				14
-	;;
-	sub	 r38 = r38, r39		// n1 -= d1			17
-	;;
-	cmp.ne	 p9, p0 = r0, r0	// clear p9
-	cmp.leu	 p10, p11 = r16, r37	// cy for: n0 - t0		18
-	;;
-	sub	 r37 = r37, r16		// n0 -= t0			19
-  (p11)	sub	 r38 = r38, r19, 1	// n1 -= t1 - cy		19
-  (p10)	sub	 r38 = r38, r19		// n1 -= t1			19
-	;;
-	cmp.gtu	 p6, p7 = r20, r38	// n1 >= q0			20
-	;;
-  (p7)	cmp.ltu	 p9, p0 = r8, r37	//				21
-  (p6)	add	 r18 = 1, r18		//
-  (p7)	add	 r37 = r37, r36		//				21
-  (p7)	add	 r38 = r38, r39		//				21
+	alloc		r42 = ar.pfs, 5,8,1,0	C M2
+	ld8		d0 = [dp], 8		C M0M1	d0
+	mov		r44 = ar.lc		C I0
+	shladd		np = nn, 3, np		C M I
+	;;
+	ld8		d1 = [dp]		C M0M1	d1
+	mov		r41 = b0		C I0
+	add		r15 = -8, np		C M I
+	add		np = -16, np		C M I
+	mov		r40 = r0		C M I
+	;;
+	ld8		R1 = [r15]		C M0M1	n1
+	ld8		R0 = [r34], -8		C M0M1	n0
+	;;
+	cmp.ltu		p6, p0 = d1, R1		C M I
+	cmp.eq		p8, p0 = d1, R1		C M I
+	;;
+  (p8)	cmp.leu		p6, p0 = d0, R0
+	cmp.ltu		p8, p9 = R0, d0
+  (p6)	br.cond.dpnt	.L_high_limb_1		C FIXME: inline!
+.L8:
+
+	mov		r45 = d1
+	br.call.sptk.many b0 = mpn_invert_limb	C FIXME: inline+schedule
 	;;
-	setf.sig f10 = r37		// n1				22
-  (p9)	add	 r38 = 1, r38		//				22
+	setf.sig	fd1 = d1		C d1
+	setf.sig	fd0 = d0		C d0
+	add		r14 = r33, r35		C nn + qxn
 	;;
-	setf.sig f9 = r38		// n2				23
-	cmp.gtu	 p6, p7 = r39, r38	//				23
-  (p7)	br.cond.spnt L(fix)
-L(bck):	st8	 [r32] = r18, -8
-	adds	 r35 = -1, r35
-	br.cloop.sptk.few L(top)
+	setf.sig	fdinv = r8		C dinv
+	mov		r9 = -1
+	add		r35 = -3, r14
 	;;
-
-L(end):	add	r14 = 8, r34
-	add	r15 = 16, r34
-	mov	 b0 = r41
+	setf.sig	fminus1 = r9
+	cmp.gt		p6, p0 = r0, r35
+	shladd		qp = r35, 3, qp
+	mov		ar.lc = r35
+	mov		r31 = 0			C n0
+  (p6)	br.cond.dpnt	.Ldone
 	;;
-	st8	[r14] = r37
-	st8	[r15] = r38
-	mov	 ar.pfs = r42
-	mov	 r8 = r40
-	mov	 ar.lc = r45
+	ALIGN(16)
+C *** MAIN LOOP START ***
+.Loop:		C 00
+	mov		r15 = R0		C nadj = n10
+	cmp.le		p14, p15 = 0, R0	C check high bit of R0
+	cmp.le		p8, p0 = r33, r35	C dividend limbs remaining?
+	;;	C 01
+	.pred.rel "mutex", p14, p15
+  (p8)	ld8		r31 = [r34], -8		C n0
+  (p15)	add		r15 = d1, R0		C nadj = n10 + d1
+  (p15)	add		r14 = 1, R1		C nh + (nl:63)
+  (p14)	mov		r14 = R1		C nh
+	cmp.eq		p6, p0 = d1, R1		C nh == d1
+  (p6)	br.cond.spnt	.L_R1_eq_d1
+	;;	C 02
+	setf.sig	f8 = r14		C n2 + (nl:63)
+	setf.sig	f15 = r15		C nadj
+	sub		r23 = -1, R1		C r23 = ~nh
+	;;	C 03
+	setf.sig	fnh = r23
+	setf.sig	fnl = R0
+	;;	C 08
+	xma.hu		f7 = fdinv, f8, f15	C xh = HI(dinv*(nh-nmask)+nadj)
+	;;	C 12
+	xma.l		f7 = f7, fminus1, fnh	C nh + xh
+	;;	C 16
+	getf.sig	r14 = f7
+	xma.hu		f9 = f7, fd1, fnl	C xh = HI(q1*d1+nl)
+	xma.l		f33 = f7, fd1, fnl	C xh = LO(q1*d1+nl)
+	;;	C 20
+	getf.sig	r16 = f9
+	sub		r24 = d1, R1
+		C 21
+	getf.sig	r17 = f33
+	;;	C 25
+	cmp.eq		p6, p7 = r16, r24
+	;;	C 26
+	.pred.rel "mutex", p6, p7
+  (p6)	xma.l		f8 = f7, fminus1, f0	C f8 = -f7
+  (p7)	xma.l		f8 = f7,fminus1,fminus1	C f8 = -f7-1
+	;;	C 27
+	.pred.rel "mutex", p6, p7
+  (p6)	sub		r18 = 0, r14		C q = -q1
+  (p7)	sub		r18 = -1, r14		C q = -q1-1
+  (p6)	add		r14 = 0, r17		C n1 = xl
+  (p7)	add		r14 = d1, r17		C n1 = xl + d1
+	;;	C 30
+	xma.hu		f9 = fd0, f8, f0	C d0*(-f7-1) = -d0*f7-d0
+	xma.l		f35 = fd0, f8, f0
+	;;	C 34
+	getf.sig	P1 = f9		C P1
+		C 35
+	getf.sig	P0 = f35		C P0
+	;;
+.L_adj:		C 40
+	cmp.ltu		p8, p0 = r31, P0	C p8 = cy from low limb
+	cmp.ltu		p6, p0 = r14, P1	C p6 = prel cy from high limb
+	sub		R0 = r31, P0
+	sub		R1 = r14, P1
+	;;	C 41
+  (p8)	cmp.eq.or	p6, p0 = 0, R1		C p6 = final cy from high limb
+  (p8)	add		R1 = -1, R1
+	cmp.ne		p10, p0 = r0, r0	C clear p10 FIXME: use unc below!
+	cmp.ne		p13, p0 = r0, r0	C clear p13 FIXME: use unc below!
+	;;	C 42
+  (p6)	add		R0 = R0, d0
+  (p6)	add		R1 = R1, d1
+  (p6)	add		r18 = -1, r18		C q--
+	;;	C 43
+  (p6)	cmp.ltu		p10, p0 = R0, d0
+  (p6)	cmp.ltu		p0, p13 = R1, d1
+	;;	C 44
+  (p10)	cmp.ne.and	p0, p13 = -1, R1	C p13 = !cy
+  (p10)	add		R1 = 1, R1
+  (p13)	br.cond.spnt	.L_two_too_big		C jump if not cy
+	;;	C 45
+	st8		[qp] = r18, -8
+	add		r35 = -1, r35
+	mov		r31 = 0			C n0, next iteration
+	br.cloop.sptk	.Loop
+C *** MAIN LOOP END ***
+	;;
+.Ldone:
+	mov		r8 = r40
+	mov		b0 = r41
+	add		r21 = 8, r34
+	add		r22 = 16, r34
+	;;
+	st8		[r21] = R0
+	st8		[r22] = R1
+	mov		ar.pfs = r42
+	mov		ar.lc = r44
 	br.ret.sptk.many b0
-	;;
-.L51:
+
+.L_high_limb_1:
 	.pred.rel "mutex", p8, p9
-	sub	 r37 = r37, r36
-  (p9)	sub	 r38 = r38, r39, 1
-  (p8)	sub	 r38 = r38, r39
-	adds	 r40 = 1, r0
-	br .L8
+	sub		R0 = R0, d0
+  (p8)	sub		R1 = R1, d1, 1
+  (p9)	sub		R1 = R1, d1
+	mov		r40 = 1
+	br.sptk		.L8
 	;;
 
-L(fix):	cmp.geu	 p6, p7 = r39, r38
-	cmp.leu	 p8, p9 = r36, r37
+.L_two_too_big:
+	add		R0 = R0, d0
+	add		R1 = R1, d1
+	;;
+	add		r18 = -1, r18		C q--
+	cmp.ltu		p10, p0 = R0, d0
+	;;
+  (p10)	add		R1 = 1, R1
+	st8		[qp] = r18, -8
+	add		r35 = -1, r35
+	mov		r31 = 0			C n0, next iteration
+	br.cloop.sptk	.Loop
+	br.sptk		.Ldone
+
+.L_R1_eq_d1:
+	add		r14 = R0, d1		C r = R0 + d1
+	mov		r18 = -1		C q = -1
 	;;
-  (p8)	cmp4.ne.and.orcm p6, p7 = 0, r0
-  (p6)	br.cond.dptk L(bck)
-	sub	 r37 = r37, r36
-  (p9)	sub	 r38 = r38, r39, 1
-  (p8)	sub	 r38 = r38, r39
-	adds	 r18 = 1, r18
+	cmp.leu		p6, p0 = R0, r14
+ (p6)	br.cond.spnt	.L20			C jump unless cy
 	;;
-	setf.sig f9 = r38		// n2
-	setf.sig f10 = r37		// n1
-	br	 L(bck)
-
+	sub		P1 = r14, d0
+	add		R0 = r31, d0
+	;;
+	cmp.ltu		p8, p9 = R0, r31
+	;;
+	.pred.rel "mutex", p8, p9
+	st8		[qp] = r18, -8
+  (p8)	add		R1 = r0, P1, 1		C R1 = n1 - P1 - cy
+  (p9)	add		R1 = r0, P1		C R1 = n1 - P1
+	add		r35 = -1, r35
+	mov		r31 = 0			C n0, next iteration
+	br.cloop.sptk	.Loop
+	br.sptk		.Ldone
+	;;
+.L20:	cmp.ne		p6, p7 = 0, d0
+	;;
+	.pred.rel "mutex", p6, p7
+  (p6)	add		P1 = -1, d0
+  (p7)	mov		P1 = d0
+	sub		P0 = r0, d0
+	br.sptk		.L_adj
 EPILOGUE()
 ASM_END()
diff --git a/gmp/mpn/ia64/gcd_1.asm b/gmp/mpn/ia64/gcd_1.asm
index 28e9a63ca3..d3840a6bd8 100644
--- a/gmp/mpn/ia64/gcd_1.asm
+++ b/gmp/mpn/ia64/gcd_1.asm
@@ -1,42 +1,28 @@
 dnl  Itanium-2 mpn_gcd_1 -- mpn by 1 gcd.
 
-dnl  Contributed to the GNU project by Kevin Ryde, innerloop by Torbjorn
-dnl  Granlund.
-
-dnl  Copyright 2002-2005, 2012, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C           cycles/bitpair (1x1 gcd)
-C Itanium:       ?
-C Itanium 2:     5.1
+C itanium2:      6.3
+C itanium:      14 (approx)
 
 
 C mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
@@ -59,13 +45,29 @@ C The main loop consists of transforming x,y to abs(x-y),min(x,y), and then
 C stripping factors of 2 from abs(x-y).  Those factors of two are
 C determined from just y-x, without the abs(), since there's the same
 C number of trailing zeros on n or -n in twos complement.  That makes the
-C dependent chain 8 cycles deep.
+C dependent chain
+C
+C	cycles
+C	  1    sub     x-y and x-y-1
+C	  3    andcm   (x-y-1)&~(x-y)
+C	  2    popcnt  trailing zeros
+C	  3    shr.u   strip abs(x-y)
+C	 ---
+C	  9
 C
 C The selection of x-y versus y-x for abs(x-y), and the selection of the
-C minimum of x and y, is done in parallel with the critical path.
+C minimum of x and y, is done in parallel with the above.
 C
 C The algorithm takes about 0.68 iterations per bit (two N bit operands) on
-C average, hence the final 5.8 cycles/bitpair.
+C average, hence the final 6.3 cycles/bitpair.
+C
+C The loop is not as fast as one might hope, since there's extra latency
+C from andcm going across to the `multimedia' popcnt, and vice versa from
+C multimedia shr.u back to the integer sub.
+C
+C The loop branch is .sptk.clr since we usually expect a good number of
+C iterations, and the iterations are data dependent so it's unlikely past
+C results will predict anything much about the future.
 C
 C Not done:
 C
@@ -86,10 +88,13 @@ C only going down I0), perhaps it'd be possible to shift left instead,
 C using add.  That would mean keeping track of the lowest not-yet-zeroed
 C bit, using some sort of mask.
 C
-C TODO:
-C  * Once mod_1_N exists in assembly for Itanium, add conditional calls.
-C  * Call bmod_1 even for n=1 when up[0] >> v0 (like other gcd_1 impls).
-C  * Probably avoid popcnt also outside of loop, instead use ctz_table.
+C Itanium-1:
+C
+C This code is not designed for itanium-1 and in fact doesn't run well on
+C that chip.  The loop seems to be about 21 cycles, probably because we end
+C up with a 10 cycle replay for not forcibly scheduling the shr.u latency.
+C Lack of branch hints might introduce a couple of bubbles too.
+C
 
 ASM_START()
 	.explicit				C What does this mean?
@@ -98,19 +103,6 @@ C HP's assembler requires these declarations for importing mpn_modexact_1c_odd
 	.global	mpn_modexact_1c_odd
 	.type	mpn_modexact_1c_odd,@function
 
-C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-
-deflit(MAXSHIFT, 7)
-deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
-
-	.section	".rodata"
-	ALIGN(m4_lshift(1,MAXSHIFT))	C align table to allow using dep
-ctz_table:
-	.byte	MAXSHIFT
-forloop(i,1,MASK,
-`	.byte	m4_count_trailing_zeros(i)
-')
-
 PROLOGUE(mpn_gcd_1)
 
 		C r32	xp
@@ -154,9 +146,13 @@ ifdef(`HAVE_ABI_32',
 
 		mov	out_carry = 0
 
+		C
+
 		popcnt	y_twos = y_twos		C I0  y twos
 		;;
 
+		C
+
 { .mmi;		add	x_orig_one = -1, x_orig	C M0  orig x-1
 		shr.u	out_divisor = y, y_twos	C I0  y without twos
 }{		shr.u	y = y, y_twos		C I1  y without twos
@@ -173,62 +169,63 @@ ifdef(`HAVE_ABI_32',
 		mov	b0 = save_rp		C I0
 }		;;
 
+		C
+
 		popcnt	x_orig = x_orig		C I0  orig x twos
+
 		popcnt	r9 = r9			C I0  x twos
 		;;
 
+		C
+
 {		cmp.lt	p7,p0 = x_orig, y_twos	C M0  orig x_twos < y_twos
 		shr.u	x = x, r9		C I0  x odd
 }		;;
 
 {	(p7)	mov	y_twos = x_orig		C M0  common twos
 		add	r10 = -1, y		C I0  y-1
-	(p6)	br.dpnt.few L(done_y)		C B0  x%y==0 then result y
+	(p6)	br.dpnt.few .Ldone_y		C B0  x%y==0 then result y
 }		;;
 
-		addl	r22 = @ltoffx(ctz_table#), r1
-		mov	r25 = m4_lshift(MASK, MAXSHIFT)
-		;;
-		ld8.mov r22 = [r22], ctz_table#
-		br	L(ent)
-
-		ALIGN(32)
-L(top):		.pred.rel "mutex", p6,p7
-.mmi;	(p7)	mov	y = x
-	(p6)	sub	x = x, y
-		dep	r21 = r19, r22, 0, MAXSHIFT	C concat(table,lowbits)
-.mmi;		and	r20 = MASK, r19
-	(p7)	mov	x = r19
-		nop	0
-		;;
-L(mid):
-.mmb;		ld1	r16 = [r21]
-		cmp.eq	p10,p0 = 0, r20
-	(p10)	br.spnt.few.clr	 L(shift_alot)
-		;;
-.mmi;		nop	0
-		nop	0
-		shr.u	x = x, r16
+		C
+
+
+		C No noticable difference in speed for the loop aligned to
+		C 32 or just 16.
+.Ltop:
+		C r8	x
+		C r10  y-1
+		C r34	y
+		C r38	common twos, for use at end
+
+{  .mmi;	cmp.gtu	p8,p9 = x, y	C M0  x>y
+		cmp.ne	p10,p0 = x, y	C M1  x==y
+		sub	r9 = y, x	C I0  d = y - x
+}{ .mmi;	sub	r10 = r10, x	C M2  d-1 = y - x - 1
+}		;;
+
+{ .mmi;	.pred.rel "mutex", p8, p9
+	(p8)	sub	x = x, y	C M0  x>y  use x=x-y, y unchanged
+	(p9)	mov	y = x		C M1  y>=x use y=x
+	(p9)	mov	x = r9		C I0  y>=x use x=y-x
+}{ .mmi;	andcm	r9 = r10, r9	C M2  (d-1)&~d
 		;;
-L(ent):
-.mmi;		sub	r19 = y, x
-		cmp.gtu	p6,p7 = x, y
-		cmp.ne	p8,p0 = x, y
-.mmb;		nop	0
-		nop	0
-	(p8)	br.sptk.few.clr L(top)
+
+		add	r10 = -1, y	C M0  new y-1
+		popcnt	r9 = r9		C I0  twos on x-y
+}		;;
+
+{		shr.u	x = x, r9	C I0   new x without twos
+	(p10)	br.sptk.few.clr .Ltop
+}		;;
+
 
 
 		C result is y
-L(done_y):
-		mov	ar.pfs = save_pfs	C I0
+.Ldone_y:
 		shl	r8 = y, y_twos		C I   common factors of 2
+		;;
+		mov	ar.pfs = save_pfs	C I0
 		br.ret.sptk.many b0
 
-L(shift_alot):
-		and	r20 = x, r25
-		shr.u	x = x, MAXSHIFT
-		;;
-		dep	r21 = x, r22, 0, MAXSHIFT
-		br	L(mid)
 EPILOGUE()
diff --git a/gmp/mpn/ia64/gmp-mparam.h b/gmp/mpn/ia64/gmp-mparam.h
index bdbd62d974..9391887e10 100644
--- a/gmp/mpn/ia64/gmp-mparam.h
+++ b/gmp/mpn/ia64/gmp-mparam.h
@@ -1,204 +1,72 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2000-2005, 2009-2011, 2014 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 900MHz Itanium2 (titanic.gmplib.org) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.4 */
-
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        26
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD          MP_SIZE_T_MAX  /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD              12
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                40
-#define MUL_TOOM33_THRESHOLD               129
-#define MUL_TOOM44_THRESHOLD               212
-#define MUL_TOOM6H_THRESHOLD               318
-#define MUL_TOOM8H_THRESHOLD               430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     153
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     129
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     151
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     208
-
-#define SQR_BASECASE_THRESHOLD              11
-#define SQR_TOOM2_THRESHOLD                 82
-#define SQR_TOOM3_THRESHOLD                131
-#define SQR_TOOM4_THRESHOLD                494
-#define SQR_TOOM6_THRESHOLD                  0  /* always */
-#define SQR_TOOM8_THRESHOLD                  0  /* always */
-
-#define MULMID_TOOM42_THRESHOLD             98
-
-#define MULMOD_BNM1_THRESHOLD               23
-#define SQRMOD_BNM1_THRESHOLD               19
-
-#define MUL_FFT_MODF_THRESHOLD             500  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    500, 5}, {     27, 6}, {     14, 5}, {     29, 6}, \
-    {     31, 7}, {     16, 6}, {     35, 7}, {     18, 6}, \
-    {     37, 7}, {     19, 6}, {     39, 7}, {     33, 8}, \
-    {     17, 7}, {     37, 8}, {     19, 7}, {     39, 8}, \
-    {     21, 7}, {     43, 8}, {     37, 9}, {     19, 8}, \
-    {     43, 9}, {     23, 8}, {     49, 9}, {     27, 8}, \
-    {     57, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
-    {     23, 9}, {     59,10}, {     31, 9}, {     71,10}, \
-    {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
-    {     55,11}, {     31,10}, {     87,11}, {     47,10}, \
-    {    111,12}, {     31,11}, {     63,10}, {    143,11}, \
-    {     79,10}, {    167,11}, {     95,10}, {    199,11}, \
-    {    111,12}, {     63,11}, {    143,10}, {    287,11}, \
-    {    159,12}, {     95,11}, {    191,10}, {    399,11}, \
-    {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    335,10}, {    671,11}, {    367,12}, \
-    {    191,11}, {    399,10}, {    799,11}, {    431,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    543,12}, {    287,11}, {    607,12}, {    319,11}, \
-    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
-    {    383,11}, {    799,12}, {    415,11}, {    863,12}, \
-    {    447,14}, {    127,13}, {    255,12}, {    607,13}, \
-    {    319,12}, {    735,13}, {    383,12}, {    863,13}, \
-    {    447,12}, {    927,11}, {   1855,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1055,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1279,13}, {    703,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1727,13}, {    895,12}, {   1791,13}, {    959,15}, \
-    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1215,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1471,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1727,14}, {    895,13}, {   1855,15}, {    511,14}, \
-    {   1023,13}, {   2111,12}, {   4223,13}, {   2175,14}, \
-    {   1151,13}, {   2431,14}, {   1279,13}, {   2687,14}, \
-    {   1407,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 150
-#define MUL_FFT_THRESHOLD                 6272
-
-#define SQR_FFT_MODF_THRESHOLD             468  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    468, 5}, {     29, 6}, {     15, 5}, {     31, 6}, \
-    {     35, 7}, {     18, 6}, {     37, 7}, {     37, 8}, \
-    {     19, 7}, {     40, 8}, {     29, 9}, {     15, 8}, \
-    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
-    {     49, 9}, {     27, 8}, {     57, 9}, {     43,10}, \
-    {     23, 9}, {     55,10}, {     31, 9}, {     71,10}, \
-    {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
-    {     55,11}, {     31,10}, {     87,11}, {     47,10}, \
-    {    111,12}, {     31,11}, {     63,10}, {    143,11}, \
-    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
-    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
-    {    399,11}, {    207,10}, {    415,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
-    {    159,11}, {    335,10}, {    671,11}, {    367,10}, \
-    {    735,12}, {    191,11}, {    399,10}, {    799,11}, \
-    {    431,10}, {    863,12}, {    223,11}, {    463,13}, \
-    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
-    {    607,12}, {    319,11}, {    671,12}, {    351,11}, \
-    {    735,13}, {    191,12}, {    383,11}, {    799,12}, \
-    {    415,11}, {    863,12}, {    447,11}, {    895,14}, \
-    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
-    {    607,13}, {    319,12}, {    735,13}, {    383,12}, \
-    {    863,13}, {    447,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
-    {    639,12}, {   1279,13}, {    703,12}, {   1407,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1663,13}, {    895,12}, {   1791,13}, {    959,15}, \
-    {    255,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1471,14}, {    767,13}, \
-    {   1599,12}, {   3199,13}, {   1663,14}, {    895,13}, \
-    {   1919,15}, {    511,14}, {   1023,13}, {   2175,14}, \
-    {   1151,13}, {   2431,14}, {   1279,13}, {   2687,14}, \
-    {   1407,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 154
-#define SQR_FFT_THRESHOLD                 4032
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  62
-#define MULLO_MUL_N_THRESHOLD            11616
-
-#define DC_DIV_QR_THRESHOLD                 61
-#define DC_DIVAPPR_Q_THRESHOLD             222
-#define DC_BDIV_QR_THRESHOLD                95
-#define DC_BDIV_Q_THRESHOLD                264
-
-#define INV_MULMOD_BNM1_THRESHOLD           78
-#define INV_NEWTON_THRESHOLD               133
-#define INV_APPR_THRESHOLD                 141
-
-#define BINV_NEWTON_THRESHOLD              248
-#define REDC_1_TO_REDC_2_THRESHOLD           0  /* always */
-#define REDC_2_TO_REDC_N_THRESHOLD         148
-
-#define MU_DIV_QR_THRESHOLD               1187
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD              1308
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  1,28,251,1925
-
-#define MATRIX22_STRASSEN_THRESHOLD         23
-#define HGCD_THRESHOLD                     120
-#define HGCD_APPR_THRESHOLD                 77
-#define HGCD_REDUCE_THRESHOLD             3389
-#define GCD_DC_THRESHOLD                   393
-#define GCDEXT_DC_THRESHOLD                440
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        20
-#define SET_STR_DC_THRESHOLD              1216
-#define SET_STR_PRECOMPUTE_THRESHOLD      3170
-
-#define FAC_DSC_THRESHOLD                  746
-#define FAC_ODD_THRESHOLD                    0  /* always */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1300MHz Itanium2 (babe.fsffrance.org) */
+
+/* Generated by tuneup.c, 2009-03-04, gcc 4.2 */
+
+#define MUL_KARATSUBA_THRESHOLD          44
+#define MUL_TOOM3_THRESHOLD             137
+#define MUL_TOOM44_THRESHOLD            230
+
+#define SQR_BASECASE_THRESHOLD           25
+#define SQR_KARATSUBA_THRESHOLD         119
+#define SQR_TOOM3_THRESHOLD             146
+#define SQR_TOOM4_THRESHOLD             284
+
+#define MULLOW_BASECASE_THRESHOLD        19
+#define MULLOW_DC_THRESHOLD             120
+#define MULLOW_MUL_N_THRESHOLD          357
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* preinv always */
+#define DIV_DC_THRESHOLD                 70
+#define POWM_THRESHOLD                  312
+
+#define MATRIX22_STRASSEN_THRESHOLD      29
+#define HGCD_THRESHOLD                  118
+#define GCD_DC_THRESHOLD                595
+#define GCDEXT_DC_THRESHOLD             584
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 8
+#define MOD_1_2_THRESHOLD                 9
+#define MOD_1_4_THRESHOLD                20
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1  /* preinv always */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             17
+#define GET_STR_PRECOMPUTE_THRESHOLD     25
+#define SET_STR_DC_THRESHOLD           1488
+#define SET_STR_PRECOMPUTE_THRESHOLD   3590
+
+#define MUL_FFT_TABLE  { 528, 1184, 1856, 3840, 11264, 28672, 114688, 327680, 0 }
+#define MUL_FFT_MODF_THRESHOLD          784
+#define MUL_FFT_THRESHOLD              6656
+
+#define SQR_FFT_TABLE  { 592, 1248, 2368, 3840, 11264, 28672, 81920, 327680, 0 }
+#define SQR_FFT_MODF_THRESHOLD          608
+#define SQR_FFT_THRESHOLD              4992
diff --git a/gmp/mpn/ia64/hamdist.asm b/gmp/mpn/ia64/hamdist.asm
index 477df4cd71..b150a429cb 100644
--- a/gmp/mpn/ia64/hamdist.asm
+++ b/gmp/mpn/ia64/hamdist.asm
@@ -1,39 +1,25 @@
 dnl  IA-64 mpn_hamdist -- mpn hamming distance.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2003-2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
 dnl
+dnl  This file is part of the GNU MP Library.
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C           cycles/limb
-C Itanium:       2
 C Itanium 2:     1
 
 C INPUT PARAMETERS
diff --git a/gmp/mpn/ia64/ia64-defs.m4 b/gmp/mpn/ia64/ia64-defs.m4
index f71d280b17..65ade991d0 100644
--- a/gmp/mpn/ia64/ia64-defs.m4
+++ b/gmp/mpn/ia64/ia64-defs.m4
@@ -2,32 +2,21 @@ divert(-1)
 
 
 dnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  ia64 assembler comments are C++ style "//" to the end of line.  gas
@@ -86,7 +75,7 @@ dnl
 dnl  Emit a ".align" directive.  "bytes" is eval()ed, so can be an
 dnl  expression.
 dnl
-dnl  This version overrides the definition in mpn/asm-defs.m4.  We suppress
+dnl  This version overrides the definition in mpn/asm-defs.m4.  We supress
 dnl  any .align if the gas byte-swapped-nops bug was detected by configure
 dnl  GMP_ASM_IA64_ALIGN_OK.
 
@@ -99,7 +88,7 @@ m4_assert_defined(`IA64_ALIGN_OK')
 
 dnl  Usage: ASSERT([pr] [,code])
 dnl
-dnl  Require that the given predicate register is true after executing the
+dnl  Require that the given predictate register is true after executing the
 dnl  test code.  For example,
 dnl
 dnl         ASSERT(p6,
@@ -131,17 +120,5 @@ define(`ASSERT_label_counter',eval(ASSERT_label_counter+1))
 ')')
 define(`ASSERT_label_counter',1)
 
-define(`getfsig', `getf.sig')
-define(`setfsig', `setf.sig')
-define(`cmpeq',   `cmp.eq')
-define(`cmpne',   `cmp.ne')
-define(`cmpltu',  `cmp.ltu')
-define(`cmpleu',  `cmp.leu')
-define(`cmpgtu',  `cmp.gtu')
-define(`cmpgeu',  `cmp.geu')
-define(`cmple',   `cmp.le')
-define(`cmpgt',   `cmp.gt')
-define(`cmpeqor', `cmp.eq.or')
-define(`cmpequc', `cmp.eq.unc')
 
 divert
diff --git a/gmp/mpn/ia64/invert_limb.asm b/gmp/mpn/ia64/invert_limb.asm
index 5effdda815..982886e549 100644
--- a/gmp/mpn/ia64/invert_limb.asm
+++ b/gmp/mpn/ia64/invert_limb.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
-
 dnl  Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -36,7 +23,7 @@ C INPUT PARAMETERS
 C d = r32
 
 C           cycles
-C Itanium:    74
+C Itanium:    ?
 C Itanium 2:  50+6
 
 C It should be possible to avoid the xmpy.hu and the following tests by
diff --git a/gmp/mpn/ia64/logops_n.asm b/gmp/mpn/ia64/logops_n.asm
index e4a2f61cce..3ab9d2518b 100644
--- a/gmp/mpn/ia64/logops_n.asm
+++ b/gmp/mpn/ia64/logops_n.asm
@@ -1,35 +1,22 @@
 dnl  IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
 dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2003-2005 Free Software Foundation, Inc.
-
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -91,8 +78,6 @@ ifdef(`HAVE_ABI_32',
 `	addp4	rp = 0, rp			C			M I
 	addp4	up = 0, up			C			M I
 	addp4	vp = 0, vp			C			M I
-	nop.m		0
-	nop.m		0
 	zxt4	n = n				C			I
 	;;
 ')
diff --git a/gmp/mpn/ia64/lorrshift.asm b/gmp/mpn/ia64/lorrshift.asm
index 694aaf0f40..59badebc6a 100644
--- a/gmp/mpn/ia64/lorrshift.asm
+++ b/gmp/mpn/ia64/lorrshift.asm
@@ -1,57 +1,40 @@
 dnl  IA-64 mpn_lshift/mpn_rshift.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2000-2005 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C           cycles/limb
-C Itanium:      2
-C Itanium 2:    1
+C Itanium:      2.0
+C Itanium 2:    1.0
 
 C This code is scheduled deeply since the plain shift instructions shr and shl
 C have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of
 C these instructions cause a 10 cycle replay trap on Itanium.
 
-C The ld8 scheduling should probably be decreased to make the function smaller.
-C Good lfetch  will make sure we never stall anyway.
-
-C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
-C at cycle 2.  Judicious use of predicates could allow us to issue more ld8's
-C in the prologue.
-
+C TODO
+C  * Optimize function entry and feed-in code.
 
 C INPUT PARAMETERS
-define(`rp', `r32')
-define(`up', `r33')
-define(`n',  `r34')
+define(`rp',`r32')
+define(`up',`r33')
+define(`n',`r34')
 define(`cnt',`r35')
 
 define(`tnc',`r9')
@@ -78,281 +61,284 @@ MULFUNC_PROLOGUE(mpn_lshift mpn_rshift)
 ASM_START()
 PROLOGUE(func)
 	.prologue
-	.save	ar.lc, r2
+	.save		ar.lc, r2
 	.body
 ifdef(`HAVE_ABI_32',
-`	addp4	rp = 0, rp		C			M I
-	addp4	up = 0, up		C		M I
-	sxt4	n = n			C		M I
-	nop.m		0
-	nop.m		0
-	zxt4	cnt = cnt		C		I
+`	addp4		rp = 0, rp		C			M I
+	addp4		up = 0, up		C			M I
+	sxt4		n = n			C			M I
+	zxt4		cnt = cnt		C			I
 	;;
 ')
 
- {.mmi;	cmp.lt	p14, p15 = 4, n		C		M I
-	and	r14 = 3, n		C		M I
-	mov.i	r2 = ar.lc		C		I0
-}{.mmi;	add	r15 = -1, n		C		M I
-	sub	tnc = 64, cnt		C		M I
-	add	r16 = -5, n
-	;;
-}{.mmi;	cmp.eq	p6, p0 = 1, r14		C		M I
-	cmp.eq	p7, p0 = 2, r14		C		M I
-	shr.u	n = r16, 2		C		I0
-}{.mmi;	cmp.eq	p8, p0 = 3, r14		C		M I
+ {.mmi;	cmp.lt		p14, p15 = 4, n		C			M I
+	and		r14 = 3, n		C			M I
+	mov.i		r2 = ar.lc		C			I0
+}{.mmi;	add		r15 = -1, n		C			M I
+	sub		tnc = 64, cnt		C			M I
+	add		r16 = -5, n
+	;;
+}{.mmi;	cmp.eq		p6, p0 = 1, r14		C			M I
+	cmp.eq		p7, p0 = 2, r14		C			M I
+	shr.u		n = r16, 2		C			I0
+}{.mmi;	cmp.eq		p8, p0 = 3, r14		C			M I
 ifdef(`OPERATION_lshift',
-`	shladd	up = r15, 3, up		C		M I
-	shladd	rp = r15, 3, rp')	C		M I
+`	shladd		up = r15, 3, up		C			M I
+	shladd		rp = r15, 3, rp')	C			M I
 	;;
-}{.mmi;	add	r11 = POFF, up		C		M I
-	ld8	r10 = [up], UPD		C		M01
-	mov.i	ar.lc = n		C		I0
+}{.mmi;	add		r11 = POFF, up		C			M I
+	ld8		r10 = [up], UPD		C			M01
+	mov.i		ar.lc = n		C			I0
 }{.bbb;
-   (p6)	br.dptk	.Lb01
-   (p7)	br.dptk	.Lb10
-   (p8)	br.dptk	.Lb11
-	;; }
-
-.Lb00:	ld8	r19 = [up], UPD
+   (p6)	br.dptk		.Lb01
+   (p7)	br.dptk		.Lb10
+   (p8)	br.dptk		.Lb11
 	;;
-	ld8	r16 = [up], UPD
+}
+
+.Lb00:	ld8		r19 = [up], UPD
 	;;
-	ld8	r17 = [up], UPD
-	BSH	r8 = r10, tnc		C function return value
+	ld8		r16 = [up], UPD
 	;;
-	FSH	r24 = r10, cnt
-	BSH	r25 = r19, tnc
+	ld8		r17 = [up], UPD
+	BSH		r8 = r10, tnc		C function return value
   (p14)	br.cond.dptk	.grt4
+
+	FSH		r24 = r10, cnt
+	BSH		r25 = r19, tnc
 	;;
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
 	;;
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
 	;;
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-	BSH	r23 = r10, tnc
-	br	.Lr4
+	or		r14 = r25, r24
+	FSH		r22 = r17, cnt
+	BSH		r23 = r10, tnc
+	br		.Lr4
 
-.grt4:	ld8	r18 = [up], UPD
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
+.grt4:	FSH		r24 = r10, cnt
+	BSH		r25 = r19, tnc
 	;;
-	ld8	r19 = [up], UPD
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
+	ld8		r18 = [up], UPD
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
 	;;
-	ld8	r16 = [up], UPD
-	FSH	r22 = r17, cnt
-	BSH	r23 = r18, tnc
+	ld8		r19 = [up], UPD
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
 	;;
-	or	r14 = r25, r24
-	ld8	r17 = [up], UPD
+	ld8		r16 = [up], UPD
+	FSH		r22 = r17, cnt
+	BSH		r23 = r18, tnc
+	;;
+	or		r14 = r25, r24
+	ld8		r17 = [up], UPD
 	br.cloop.dpnt	.Ltop
-	br	.Lbot
+	br		.Lbot
 
 .Lb01:
-  (p15)	BSH	r8 = r10, tnc		C function return value	I
-  (p15)	FSH	r22 = r10, cnt		C		I
-  (p15)	br.cond.dptk	.Lr1		C return	B
+  (p15)	BSH		r8 = r10, tnc		C function return value	I
+  (p15)	FSH		r22 = r10, cnt		C			I
+  (p15)	br.cond.dptk	.Lr1			C return		B
 
-.grt1:	ld8	r18 = [up], UPD
+.grt1:	ld8		r18 = [up], UPD
 	;;
-	ld8	r19 = [up], UPD
-	BSH	r8 = r10, tnc		C function return value
+	ld8		r19 = [up], UPD
+	BSH		r8 = r10, tnc		C function return value
 	;;
-	ld8	r16 = [up], UPD
-	FSH	r22 = r10, cnt
-	BSH	r23 = r18, tnc
+	ld8		r16 = [up], UPD
+	FSH		r22 = r10, cnt
+	BSH		r23 = r18, tnc
 	;;
-	ld8	r17 = [up], UPD
-	FSH	r24 = r18, cnt
-	BSH	r25 = r19, tnc
+	ld8		r17 = [up], UPD
 	br.cloop.dpnt	.grt5
 	;;
-	or	r15 = r23, r22
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
+
+	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
+	;;
+	or		r15 = r23, r22
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
 	;;
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
-	br	.Lr5
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
+	br		.Lr5
 
-.grt5:	ld8	r18 = [up], UPD
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
+.grt5:	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
 	;;
-	ld8	r19 = [up], UPD
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
+	ld8		r18 = [up], UPD
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
 	;;
-	or	r15 = r23, r22
-	ld8	r16 = [up], UPD
-	br	.LL01
+	ld8		r19 = [up], UPD
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
+	;;
+	or		r15 = r23, r22
+	ld8		r16 = [up], UPD
+	br		.LL01
 
 
-.Lb10:	ld8	r17 = [up], UPD
+.Lb10:	ld8		r17 = [up], UPD
   (p14)	br.cond.dptk	.grt2
 
-	BSH	r8 = r10, tnc		C function return value
+	BSH		r8 = r10, tnc		C function return value
 	;;
-	FSH	r20 = r10, cnt
-	BSH	r21 = r17, tnc
+	FSH		r20 = r10, cnt
+	BSH		r21 = r17, tnc
 	;;
-	or	r14 = r21, r20
-	FSH	r22 = r17, cnt
-	br	.Lr2			C return
+	or		r14 = r21, r20
+	FSH		r22 = r17, cnt
+	br		.Lr2			C return
 
-.grt2:	ld8	r18 = [up], UPD
-	BSH	r8 = r10, tnc		C function return value
+.grt2:	ld8		r18 = [up], UPD
+	BSH		r8 = r10, tnc		C function return value
 	;;
-	ld8	r19 = [up], UPD
-	FSH	r20 = r10, cnt
-	BSH	r21 = r17, tnc
+	ld8		r19 = [up], UPD
+	FSH		r20 = r10, cnt
+	BSH		r21 = r17, tnc
 	;;
-	ld8	r16 = [up], UPD
-	FSH	r22 = r17, cnt
-	BSH	r23 = r18, tnc
+	ld8		r16 = [up], UPD
+	FSH		r22 = r17, cnt
+	BSH		r23 = r18, tnc
 	;;
- {.mmi;	ld8	r17 = [up], UPD
-	or	r14 = r21, r20
-	FSH	r24 = r18, cnt
-}{.mib;	nop	0
-	BSH	r25 = r19, tnc
+	ld8		r17 = [up], UPD
 	br.cloop.dpnt	.grt6
-	;; }
+	;;
 
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
-	br	.Lr6
+	or		r14 = r21, r20
+	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
+	;;
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
+	br		.Lr6
 
-.grt6:	ld8	r18 = [up], UPD
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
+.grt6:	or		r14 = r21, r20
+	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
 	;;
-	ld8	r19 = [up], UPD
-	br	.LL10
+	ld8		r18 = [up], UPD
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
+	;;
+	ld8		r19 = [up], UPD
+	br		.LL10
 
 
-.Lb11:	ld8	r16 = [up], UPD
+.Lb11:	ld8		r16 = [up], UPD
 	;;
-	ld8	r17 = [up], UPD
-	BSH	r8 = r10, tnc		C function return value
+	ld8		r17 = [up], UPD
+	BSH		r8 = r10, tnc		C function return value
   (p14)	br.cond.dptk	.grt3
 	;;
 
-	FSH	r26 = r10, cnt
-	BSH	r27 = r16, tnc
+	FSH		r26 = r10, cnt
+	BSH		r27 = r16, tnc
 	;;
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
 	;;
-	or	r15 = r27, r26
-	FSH	r22 = r17, cnt
-	br	.Lr3			C return
+	or		r15 = r27, r26
+	FSH		r22 = r17, cnt
+	br		.Lr3			C return
 
-.grt3:	ld8	r18 = [up], UPD
-	FSH	r26 = r10, cnt
-	BSH	r27 = r16, tnc
+.grt3:	ld8		r18 = [up], UPD
+	FSH		r26 = r10, cnt
+	BSH		r27 = r16, tnc
 	;;
-	ld8	r19 = [up], UPD
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
+	ld8		r19 = [up], UPD
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
 	;;
-	ld8	r16 = [up], UPD
-	FSH	r22 = r17, cnt
-	BSH	r23 = r18, tnc
+	ld8		r16 = [up], UPD
+	FSH		r22 = r17, cnt
+	BSH		r23 = r18, tnc
 	;;
-	ld8	r17 = [up], UPD
+	ld8		r17 = [up], UPD
 	br.cloop.dpnt	.grt7
 
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-	BSH	r25 = r19, tnc
-	br	.Lr7
+	or		r15 = r27, r26
+	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
+	br		.Lr7
 
-.grt7:	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-	BSH	r25 = r19, tnc
-	ld8	r18 = [up], UPD
-	br	.LL11
+.grt7:	or		r15 = r27, r26
+	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
+	ld8		r18 = [up], UPD
+	br		.LL11
 
 C *** MAIN LOOP START ***
 	ALIGN(32)
 .Ltop:
- {.mmi;	st8	[rp] = r14, UPD		C M2
-	or	r15 = r27, r26		C M3
-	FSH	r24 = r18, cnt		C I0
-}{.mmi;	ld8	r18 = [up], UPD		C M1
-	lfetch	[r11], PUPD
-	BSH	r25 = r19, tnc		C I1
+ {.mmi;	st8		[rp] = r14, UPD		C M2
+	or		r15 = r27, r26		C M3
+	FSH		r24 = r18, cnt		C I0
+}{.mmi;	ld8		r18 = [up], UPD		C M1
+	lfetch		[r11], PUPD
+	BSH		r25 = r19, tnc		C I1
 	;; }
 .LL11:
- {.mmi;	st8	[rp] = r15, UPD
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-}{.mmi;	ld8	r19 = [up], UPD
-	nop.m	0
-	BSH	r27 = r16, tnc
+ {.mmi;	st8		[rp] = r15, UPD
+	or		r14 = r21, r20
+	FSH		r26 = r19, cnt
+}{.mmi;	ld8		r19 = [up], UPD
+	nop.m		0
+	BSH		r27 = r16, tnc
 	;; }
 .LL10:
- {.mmi;	st8	[rp] = r14, UPD
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-}{.mmi;	ld8	r16 = [up], UPD
-	nop.m	0
-	BSH	r21 = r17, tnc
+ {.mmi;	st8		[rp] = r14, UPD
+	or		r15 = r23, r22
+	FSH		r20 = r16, cnt
+}{.mmi;	ld8		r16 = [up], UPD
+	nop.m		0
+	BSH		r21 = r17, tnc
 	;; }
 .LL01:
- {.mmi;	st8	[rp] = r15, UPD
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-}{.mib;	ld8	r17 = [up], UPD
-	BSH	r23 = r18, tnc
+ {.mmi;	st8		[rp] = r15, UPD
+	or		r14 = r25, r24
+	FSH		r22 = r17, cnt
+}{.mib;	ld8		r17 = [up], UPD
+	BSH		r23 = r18, tnc
 	br.cloop.dptk	.Ltop
 	;; }
+
 C *** MAIN LOOP END ***
 
-.Lbot:
- {.mmi;	st8	[rp] = r14, UPD
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-}{.mib;	nop	0
-	BSH	r25 = r19, tnc
-	nop	0
-	;; }
-.Lr7:
- {.mmi;	st8	[rp] = r15, UPD
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-}{.mib;	nop	0
-	BSH	r27 = r16, tnc
-	nop	0
-	;; }
-.Lr6:
- {.mmi;	st8	[rp] = r14, UPD
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-}{.mib;	nop	0
-	BSH	r21 = r17, tnc
-	nop	0
-	;; }
-.Lr5:	st8	[rp] = r15, UPD
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
+.Lbot:	or		r15 = r27, r26
+	FSH		r24 = r18, cnt
+	BSH		r25 = r19, tnc
+	st8		[rp] = r14, UPD
+	;;
+.Lr7:	or		r14 = r21, r20
+	FSH		r26 = r19, cnt
+	BSH		r27 = r16, tnc
+	st8		[rp] = r15, UPD
+	;;
+.Lr6:	or		r15 = r23, r22
+	FSH		r20 = r16, cnt
+	BSH		r21 = r17, tnc
+	st8		[rp] = r14, UPD
+	;;
+.Lr5:	st8		[rp] = r15, UPD
+	or		r14 = r25, r24
+	FSH		r22 = r17, cnt
 	;;
-.Lr4:	st8	[rp] = r14, UPD
-	or	r15 = r27, r26
+.Lr4:	or		r15 = r27, r26
+	st8		[rp] = r14, UPD
 	;;
-.Lr3:	st8	[rp] = r15, UPD
-	or	r14 = r21, r20
+.Lr3:	or		r14 = r21, r20
+	st8		[rp] = r15, UPD
 	;;
-.Lr2:	st8	[rp] = r14, UPD
+.Lr2:	st8		[rp] = r14, UPD
 	;;
-.Lr1:	st8	[rp] = r22, UPD		C		M23
-	mov	ar.lc = r2		C		I0
-	br.ret.sptk.many b0		C		B
+.Lr1:	st8		[rp] = r22, UPD		C			M23
+	mov		ar.lc = r2		C			I0
+	br.ret.sptk.many b0			C			B
 EPILOGUE(func)
 ASM_END()
diff --git a/gmp/mpn/ia64/lshiftc.asm b/gmp/mpn/ia64/lshiftc.asm
deleted file mode 100644
index c402486484..0000000000
--- a/gmp/mpn/ia64/lshiftc.asm
+++ /dev/null
@@ -1,463 +0,0 @@
-dnl  IA-64 mpn_lshiftc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2000-2005, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      ?
-C Itanium 2:    1.25
-
-C This code is scheduled deeply since the plain shift instructions shr and shl
-C have a latency of 4 (on Itanium) or 3 (on Itanium 2).  Poor scheduling of
-C these instructions cause a 10 cycle replay trap on Itanium.
-
-C The ld8 scheduling should probably be decreased to make the function smaller.
-C Good lfetch  will make sure we never stall anyway.
-
-C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
-C at cycle 2.  Judicious use of predicates could allow us to issue more ld8's
-C in the prologue.
-
-
-C INPUT PARAMETERS
-define(`rp', `r32')
-define(`up', `r33')
-define(`n',  `r34')
-define(`cnt',`r35')
-
-define(`tnc',`r9')
-
-define(`FSH',`shl')
-define(`BSH',`shr.u')
-define(`UPD',`-8')
-define(`POFF',`-512')
-define(`PUPD',`-32')
-define(`func',`mpn_lshiftc')
-
-ASM_START()
-PROLOGUE(mpn_lshiftc)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',
-`	addp4	rp = 0, rp		C				M I
-	addp4	up = 0, up		C				M I
-	sxt4	n = n			C				M I
-	nop.m		0
-	nop.m		0
-	zxt4	cnt = cnt		C				I
-	;;
-')
-
- {.mmi;	nop	0			C				M I
-	and	r14 = 3, n		C				M I
-	mov.i	r2 = ar.lc		C				I0
-}{.mmi;	add	r15 = -1, n		C				M I
-	sub	tnc = 64, cnt		C				M I
-	nop	0
-	;;
-}{.mmi;	cmp.eq	p6, p0 = 1, r14		C				M I
-	cmp.eq	p7, p0 = 2, r14		C				M I
-	shr.u	n = r15, 2		C				I0
-}{.mmi;	cmp.eq	p8, p0 = 3, r14		C				M I
-	shladd	up = r15, 3, up		C				M I
-	shladd	rp = r15, 3, rp		C				M I
-	;;
-}{.mmi;	add	r11 = POFF, up		C				M I
-	ld8	r10 = [up], UPD		C				M01
-	mov.i	ar.lc = n		C				I0
-}{.bbb;
-   (p6)	br.dptk	.Lb01
-   (p7)	br.dptk	.Lb10
-   (p8)	br.dptk	.Lb11
-	;; }
-
-.Lb00:
-	ld8	r19 = [up], UPD
-	;;
-	ld8	r16 = [up], UPD
-	;;
-	ld8	r17 = [up], UPD
-	BSH	r8 = r10, tnc
-	br.cloop.dptk	L(gt4)
-	;;
-	FSH	r24 = r10, cnt
-	BSH	r25 = r19, tnc
-	;;
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
-	;;
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
-	;;
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-	;;
-	or	r15 = r27, r26
-	sub	r31 = -1, r14
-	br	.Lr4
-
-L(gt4):
- {.mmi;	nop	0
-	nop	0
-	FSH	r24 = r10, cnt
-}{.mmi;	ld8	r18 = [up], UPD
-	nop	0
-	BSH	r25 = r19, tnc
-	;; }
- {.mmi;	nop	0
-	nop	0
-	FSH	r26 = r19, cnt
-}{.mmi;	ld8	r19 = [up], UPD
-	nop	0
-	BSH	r27 = r16, tnc
-	;; }
- {.mmi;	nop	0
-	nop	0
-	FSH	r20 = r16, cnt
-}{.mmi;	ld8	r16 = [up], UPD
-	nop	0
-	BSH	r21 = r17, tnc
-	;; }
- {.mmi;	nop	0
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-}{.mib;	ld8	r17 = [up], UPD
-	BSH	r23 = r18, tnc
-	br.cloop.dptk	L(gt8)
-	;; }
- {.mmi;	nop	0
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-}{.mib;	sub	r31 = -1, r14
-	BSH	r25 = r19, tnc
-	br	.Lr8 }
-
-L(gt8):
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-	ld8	r18 = [up], UPD
-	sub	r31 = -1, r14
-	BSH	r25 = r19, tnc
-	br	.LL00
-
-.Lb01:
-	br.cloop.dptk	L(gt1)
-	;;
-	BSH	r8 = r10, tnc
-	FSH	r22 = r10, cnt
-	;;
-	sub	r31 = -1, r22
-	br	.Lr1
-	;;
-L(gt1):
-	ld8	r18 = [up], UPD
-	BSH	r8 = r10, tnc
-	FSH	r22 = r10, cnt
-	;;
-	ld8	r19 = [up], UPD
-	;;
-	ld8	r16 = [up], UPD
-	;;
-	ld8	r17 = [up], UPD
-	BSH	r23 = r18, tnc
-	br.cloop.dptk	L(gt5)
-	;;
-	nop	0
-	FSH	r24 = r18, cnt
-	BSH	r25 = r19, tnc
-	;;
-	nop	0
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
-	;;
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
-	;;
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-	sub	r31 = -1, r15
-	br	.Lr5
-
-L(gt5):
- {.mmi;	nop	0
-	nop	0
-	FSH	r24 = r18, cnt
-}{.mmi;	ld8	r18 = [up], UPD
-	nop	0
-	BSH	r25 = r19, tnc
-	;; }
- {.mmi;	nop	0
-	nop	0
-	FSH	r26 = r19, cnt
-}{.mmi;	ld8	r19 = [up], UPD
-	nop	0
-	BSH	r27 = r16, tnc
-	;; }
- {.mmi;	nop	0
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-}{.mmi;	ld8	r16 = [up], UPD
-	nop	0
-	BSH	r21 = r17, tnc
-	;; }
- {.mmi;	or	r14 = r25, r24
-	sub	r31 = -1, r15
-	FSH	r22 = r17, cnt
-}{.mib;	ld8	r17 = [up], UPD
-	BSH	r23 = r18, tnc
-	br	L(end)
-	;; }
-
-.Lb10:
-	ld8	r17 = [up], UPD
-	br.cloop.dptk	L(gt2)
-	;;
-	BSH	r8 = r10, tnc
-	FSH	r20 = r10, cnt
-	;;
-	BSH	r21 = r17, tnc
-	FSH	r22 = r17, cnt
-	;;
-	or	r14 = r21, r20
-	;;
-	sub	r31 = -1, r14
-	br	.Lr2
-	;;
-L(gt2):
-	ld8	r18 = [up], UPD
-	BSH	r8 = r10, tnc
-	FSH	r20 = r10, cnt
-	;;
-	ld8	r19 = [up], UPD
-	;;
-	ld8	r16 = [up], UPD
-	BSH	r21 = r17, tnc
-	FSH	r22 = r17, cnt
-	;;
-	ld8	r17 = [up], UPD
-	BSH	r23 = r18, tnc
-	br.cloop.dptk	L(gt6)
-	;;
-	nop	0
-	FSH	r24 = r18, cnt
-	BSH	r25 = r19, tnc
-	;;
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-	BSH	r27 = r16, tnc
-	;;
- {.mmi;	nop	0
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-}{.mib;	sub	r31 = -1, r14
-	BSH	r21 = r17, tnc
-	br	.Lr6
-	;; }
-L(gt6):
- {.mmi;	nop	0
-	nop	0
-	FSH	r24 = r18, cnt
-}{.mmi;	ld8	r18 = [up], UPD
-	nop	0
-	BSH	r25 = r19, tnc
-	;; }
- {.mmi; nop   0
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-}{.mmi;	ld8	r19 = [up], UPD
-	nop	0
-	BSH	r27 = r16, tnc
-	;; }
- {.mmi;	or	r15 = r23, r22
-	sub	r31 = -1, r14
-	FSH	r20 = r16, cnt
-}{.mib;	ld8	r16 = [up], UPD
-	BSH	r21 = r17, tnc
-	br	.LL10
-}
-
-.Lb11:
-	ld8	r16 = [up], UPD
-	;;
-	ld8	r17 = [up], UPD
-	BSH	r8 = r10, tnc
-	FSH	r26 = r10, cnt
-	br.cloop.dptk	L(gt3)
-	;;
-	BSH	r27 = r16, tnc
-	;;
-	FSH	r20 = r16, cnt
-	BSH	r21 = r17, tnc
-	;;
-	FSH	r22 = r17, cnt
-	;;
-	or	r15 = r27, r26
-	;;
-	or	r14 = r21, r20
-	sub	r31 = -1, r15
-	br	.Lr3
-	;;
-L(gt3):
-	ld8	r18 = [up], UPD
-	;;
-	ld8	r19 = [up], UPD
-	BSH	r27 = r16, tnc
-	;;
- {.mmi;	nop	0
-	nop	0
-	FSH	r20 = r16, cnt
-}{.mmi;	ld8	r16 = [up], UPD
-	nop	0
-	BSH	r21 = r17, tnc
-	;; }
- {.mmi	nop	0
-	nop	0
-	FSH	r22 = r17, cnt
-}{.mib;	ld8	r17 = [up], UPD
-	BSH	r23 = r18, tnc
-	br.cloop.dptk	L(gt7)
-	;; }
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-	BSH	r25 = r19, tnc
-	;;
- {.mmi;	nop	0
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-}{.mib;	sub	r31 = -1, r15
-	BSH	r27 = r16, tnc
-	br	.Lr7
-}
-L(gt7):
- {.mmi;	nop	0
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-}{.mmi;	ld8	r18 = [up], UPD
-	nop	0
-	BSH	r25 = r19, tnc
-	;; }
- {.mmi;	or	r14 = r21, r20
-	sub	r31 = -1, r15
-	FSH	r26 = r19, cnt
-}{.mib;	ld8	r19 = [up], UPD
-	BSH	r27 = r16, tnc
-	br	.LL11
-}
-
-C *** MAIN LOOP START ***
-	ALIGN(32)
-L(top):
-.LL01:
- {.mmi;	st8	[rp] = r31, UPD		C M2
-	or	r15 = r27, r26		C M3
-	FSH	r24 = r18, cnt		C I0
-}{.mmi;	ld8	r18 = [up], UPD		C M0
-	sub	r31 = -1, r14		C M1
-	BSH	r25 = r19, tnc		C I1
-	;; }
-.LL00:
- {.mmi;	st8	[rp] = r31, UPD
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-}{.mmi;	ld8	r19 = [up], UPD
-	sub	r31 = -1, r15
-	BSH	r27 = r16, tnc
-	;; }
-.LL11:
- {.mmi;	st8	[rp] = r31, UPD
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-}{.mmi;	ld8	r16 = [up], UPD
-	sub	r31 = -1, r14
-	BSH	r21 = r17, tnc
-	;; }
-.LL10:
- {.mmi;	st8	[rp] = r31, UPD
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-}{.mmi;	ld8	r17 = [up], UPD
-	sub	r31 = -1, r15
-	BSH	r23 = r18, tnc
-	;; }
-L(end):	lfetch		[r11], PUPD
-	br.cloop.dptk	L(top)
-C *** MAIN LOOP END ***
-
- {.mmi;	st8	[rp] = r31, UPD
-	or	r15 = r27, r26
-	FSH	r24 = r18, cnt
-}{.mib;	sub	r31 = -1, r14
-	BSH	r25 = r19, tnc
-	nop	0
-	;; }
-.Lr8:
- {.mmi;	st8	[rp] = r31, UPD
-	or	r14 = r21, r20
-	FSH	r26 = r19, cnt
-}{.mib;	sub	r31 = -1, r15
-	BSH	r27 = r16, tnc
-	nop	0
-	;; }
-.Lr7:
- {.mmi;	st8	[rp] = r31, UPD
-	or	r15 = r23, r22
-	FSH	r20 = r16, cnt
-}{.mib;	sub	r31 = -1, r14
-	BSH	r21 = r17, tnc
-	nop	0
-	;; }
-.Lr6:	st8	[rp] = r31, UPD
-	or	r14 = r25, r24
-	FSH	r22 = r17, cnt
-	sub	r31 = -1, r15
-	;;
-.Lr5:	st8	[rp] = r31, UPD
-	or	r15 = r27, r26
-	sub	r31 = -1, r14
-	;;
-.Lr4:	st8	[rp] = r31, UPD
-	or	r14 = r21, r20
-	sub	r31 = -1, r15
-	;;
-.Lr3:	st8	[rp] = r31, UPD
-	sub	r31 = -1, r14
-	;;
-.Lr2:	st8	[rp] = r31, UPD
-	sub	r31 = -1, r22
-	;;
-.Lr1:	st8	[rp] = r31, UPD		C				M23
-	mov	ar.lc = r2		C				I0
-	br.ret.sptk.many b0		C				B
-EPILOGUE(func)
-ASM_END()
diff --git a/gmp/mpn/ia64/mod_34lsub1.asm b/gmp/mpn/ia64/mod_34lsub1.asm
deleted file mode 100644
index edf3602c4c..0000000000
--- a/gmp/mpn/ia64/mod_34lsub1.asm
+++ /dev/null
@@ -1,236 +0,0 @@
-dnl  IA-64 mpn_mod_34lsub1
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2003-2005, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      ?
-C Itanium 2:    1
-
-
-C INPUT PARAMETERS
-define(`up', `r32')
-define(`n',  `r33')
-
-C Some useful aliases for registers we use
-define(`u0',`r14') define(`u1',`r15') define(`u2',`r16')
-define(`a0',`r17') define(`a1',`r18') define(`a2',`r19')
-define(`c0',`r20') define(`c1',`r21') define(`c2',`r22')
-
-C This is a fairly simple-minded implementation.  One could approach 0.67 c/l
-C with a more sophisticated implementation.  If we're really crazy, we could
-C super-unroll, storing carries just in predicate registers, then copy them to
-C a general register, and population count them from there.  That'd bring us
-C close to 3 insn/limb, for nearly 0.5 c/l.
-
-C Computing n/3 needs 16 cycles, which is a lot of startup overhead.
-C We therefore use a plain while-style loop:
-C	add		n = -3, n
-C	cmp.le		p9, p0 = 3, n
-C  (p9)	br.cond		.Loop
-C Alternatively, we could table n/3 for, say, n < 256, and predicate the
-C 16-cycle code.
-
-C The summing-up code at the end was written quickly, and could surely be
-C vastly improved.
-
-ASM_START()
-PROLOGUE(mpn_mod_34lsub1)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',`
-	addp4		up = 0, up		C			M I
-	nop.m		0
-	zxt4		n = n			C			I
-	;;
-')
-
-ifelse(0,1,`
-	movl		r14 = 0xAAAAAAAAAAAAAAAB
-	;;
-	setf.sig	f6 = r14
-	setf.sig	f7 = r33
-	;;
-	xmpy.hu		f6 = f6, f7
-	;;
-	getf.sig	r8 = f6
-	;;
-	shr.u		r8 = r8, 1		C Loop count
-	;;
-	mov.i		ar.lc = r8
-')
-
-	ld8	u0 = [up], 8
-	cmp.ne	p9, p0 = 1, n
-  (p9)	br	L(gt1)
-	;;
-	shr.u	r8 = u0, 48
-	dep.z	r27 = u0, 0, 48
-	;;
-	add	r8 = r8, r27
-	br.ret.sptk.many b0
-
-
-L(gt1):
-.mmi;	nop.m	0
-	mov	a0 = 0
-	add	n = -2, n
-.mmi;	mov	c0 = 0
-	mov	c1 = 0
-	mov	c2 = 0
-	;;
-.mmi;	ld8	u1 = [up], 8
-	mov	a1 = 0
-	cmp.ltu	p6, p0 = r0, r0		C clear p6
-.mmb;	cmp.gt	p9, p0 = 3, n
-	mov	a2 = 0
-  (p9)	br.cond.dptk	L(end)
-	;;
-
-	ALIGN(32)
-L(top):
-.mmi;	ld8	u2 = [up], 8
-  (p6)	add	c0 = 1, c0
-	cmp.ltu	p7, p0 = a0, u0
-.mmb;	sub	a0 = a0, u0
-	add	n = -3, n
-	nop.b	0
-	;;
-.mmi;	ld8	u0 = [up], 8
-  (p7)	add	c1 = 1, c1
-	cmp.ltu	p8, p0 = a1, u1
-.mmb;	sub	a1 = a1, u1
-	cmp.le	p9, p0 = 3, n
-	nop.b	0
-	;;
-.mmi;	ld8	u1 = [up], 8
-  (p8)	add	c2 = 1, c2
-	cmp.ltu	p6, p0 = a2, u2
-.mmb;	sub	a2 = a2, u2
-	nop.m	0
-dnl	br.cloop.dptk	L(top)
-  (p9)	br.cond.dptk	L(top)
-	;;
-
-L(end):
-	cmp.eq	p10, p0 = 0, n
-	cmp.eq	p11, p0 = 1, n
-  (p10)	br	L(0)
-
-L(2):
-.mmi;	ld8	u2 = [up], 8
-  (p6)	add	c0 = 1, c0
-	cmp.ltu	p7, p0 = a0, u0
-.mmb;	sub	a0 = a0, u0
-	nop.m	0
-  (p11)	br	L(1)
-	;;
-	ld8	u0 = [up], 8
-  (p7)	add	c1 = 1, c1
-	cmp.ltu	p8, p0 = a1, u1
-	sub	a1 = a1, u1
-	;;
-  (p8)	add	c2 = 1, c2
-	cmp.ltu	p6, p0 = a2, u2
-	sub	a2 = a2, u2
-	;;
-  (p6)	add	c0 = 1, c0
-	cmp.ltu	p7, p0 = a0, u0
-	sub	a0 = a0, u0
-	;;
-  (p7)	add	c1 = 1, c1
-	br	L(com)
-
-
-L(1):
-  (p7)	add	c1 = 1, c1
-	cmp.ltu	p8, p0 = a1, u1
-	sub	a1 = a1, u1
-	;;
-  (p8)	add	c2 = 1, c2
-	cmp.ltu	p6, p0 = a2, u2
-	sub	a2 = a2, u2
-	;;
-  (p6)	add	c0 = 1, c0
-	br	L(com)
-
-
-L(0):
-  (p6)	add	c0 = 1, c0
-	cmp.ltu	p7, p0 = a0, u0
-	sub	a0 = a0, u0
-	;;
-  (p7)	add	c1 = 1, c1
-	cmp.ltu	p8, p0 = a1, u1
-	sub	a1 = a1, u1
-	;;
-  (p8)	add	c2 = 1, c2
-
-L(com):
-C |     a2    |     a1    |     a0    |
-C |        |        |        |        |
-	shr.u	r24 = a0, 48		C 16 bits
-	shr.u	r25 = a1, 32		C 32 bits
-	shr.u	r26 = a2, 16		C 48 bits
-	;;
-	shr.u	r10 = c0, 48		C 16 bits, always zero
-	shr.u	r11 = c1, 32		C 32 bits
-	shr.u	r30 = c2, 16		C 48 bits
-	;;
-	dep.z	r27 = a0,  0, 48	C 48 bits
-	dep.z	r28 = a1, 16, 32	C 48 bits
-	dep.z	r29 = a2, 32, 16	C 48 bits
-	dep.z	r31 = c0,  0, 48	C 48 bits
-	dep.z	r14 = c1, 16, 32	C 48 bits
-	dep.z	r15 = c2, 32, 16	C 48 bits
-	;;
-.mmi;	add	r24 = r24, r25
-	add	r26 = r26, r27
-	add	r28 = r28, r29
-.mmi;	add	r10 = r10, r11
-	add	r30 = r30, r31
-	add	r14 = r14, r15
-	;;
-	movl	r8 = 0xffffffffffff0
-	add	r24 = r24, r26
-	add	r10 = r10, r30
-	;;
-	add	r24 = r24, r28
-	add	r10 = r10, r14
-	;;
-	sub	r8 = r8, r24
-	;;
-	add	r8 = r8, r10
-	br.ret.sptk.many b0
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/ia64/mode1o.asm b/gmp/mpn/ia64/mode1o.asm
index 14d5e81602..6b3626ebe6 100644
--- a/gmp/mpn/ia64/mode1o.asm
+++ b/gmp/mpn/ia64/mode1o.asm
@@ -1,34 +1,21 @@
 dnl  Itanium-2 mpn_modexact_1c_odd -- mpn by 1 exact remainder.
 
-dnl  Contributed to the GNU project by Kevin Ryde.
-
-dnl  Copyright 2003-2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ia64/mul_1.asm b/gmp/mpn/ia64/mul_1.asm
index 21bf6d0e14..8df8d93f8e 100644
--- a/gmp/mpn/ia64/mul_1.asm
+++ b/gmp/mpn/ia64/mul_1.asm
@@ -1,35 +1,23 @@
 dnl  IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and
 dnl  store the result in a second limb vector.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2000-2004, 2006, 2007 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -553,6 +541,7 @@ C *** MAIN LOOP END ***
    (p6)	cmp.leu		p8, p9 = r24, r17
    (p7)	cmp.ltu		p8, p9 = r24, r17
 	;;
+	.pred.rel "mutex",p8,p9
    (p8)	add		r8 = 1, r8
 	mov.i		ar.lc = r2
 	br.ret.sptk.many b0
diff --git a/gmp/mpn/ia64/mul_2.asm b/gmp/mpn/ia64/mul_2.asm
index 2bbce97267..b0d4ef70a1 100644
--- a/gmp/mpn/ia64/mul_2.asm
+++ b/gmp/mpn/ia64/mul_2.asm
@@ -1,47 +1,39 @@
 dnl  IA-64 mpn_mul_2 -- Multiply a n-limb number with a 2-limb number and store
 dnl  store the result to a (n+1)-limb number.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2004, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2004 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C         cycles/limb
-C Itanium:    ?
-C Itanium 2:  1.5
+C Itanium:    3.15
+C Itanium 2:  1.625
+
+C Note that this is very similar to addmul_2.asm.  If you change this file,
+C please change that file too.
 
 C TODO
 C  * Clean up variable names, and try to decrease the number of distinct
 C    registers used.
-C  * Clean up feed-in code to not require zeroing several registers.
+C  * Cleanup feed-in code to not require zeroing several registers.
 C  * Make sure we don't depend on uninitialized predicate registers.
+C  * We currently cross-jump very aggressively, at the expense of a few cycles
+C    per operation.  Consider changing that.
 C  * Could perhaps save a few cycles by using 1 c/l carry propagation in
 C    wind-down code.
 C  * Ultimately rewrite.  The problem with this code is that it first uses a
@@ -102,519 +94,598 @@ PROLOGUE(mpn_mul_2)
 	.save	ar.lc, r2
 	.body
 
-ifdef(`HAVE_ABI_32',`
-.mmi;		addp4	rp = 0, rp		C			M I
-		addp4	up = 0, up		C			M I
-		addp4	vp = 0, vp		C			M I
-.mmi;		nop	1
-		nop	1
-		zxt4	n = n			C			I
+ifdef(`HAVE_ABI_32',
+`	addp4		rp = 0, rp		C			M I
+	addp4		up = 0, up		C			M I
+	addp4		vp = 0, vp		C			M I
+	zxt4		n = n			C			I
 	;;')
 
-.mmi;		ldf8	ux = [up], 8		C			M
-		ldf8	v0 = [vp], 8		C			M
-		mov	r2 = ar.lc		C			I0
-.mmi;		nop	1			C			M
-		and	r14 = 3, n		C			M I
-		add	n = -2, n		C			M I
-	;;
-.mmi;		ldf8	uy = [up], 8		C			M
-		ldf8	v1 = [vp]		C			M
-		shr.u	n = n, 2		C			I0
-.mmi;		nop	1			C			M
-		cmp.eq	p10, p0 = 1, r14	C			M I
-		cmp.eq	p11, p0 = 2, r14	C			M I
-	;;
-.mmi;		nop	1			C			M
-		cmp.eq	p12, p0 = 3, r14	C			M I
-		mov	ar.lc = n		C			I0
-.bbb;	(p10)	br.dptk	L(b01)			C			B
-	(p11)	br.dptk	L(b10)			C			B
-	(p12)	br.dptk	L(b11)			C			B
-	;;
+{.mmi		C 00
+	ldf8		ux = [up], 8		C			M
+	ldf8		v0 = [vp], 8		C			M
+	mov.i		r2 = ar.lc		C			I0
+}{.mmi
+	nop		0			C			M
+	and		r14 = 3, n		C			M I
+	add		n = -2, n		C			M I
+	;;
+}{.mmi		C 01
+	ldf8		uy = [up], 8		C			M
+	ldf8		v1 = [vp]		C			M
+	shr.u		n = n, 2		C			I
+}{.mmi
+	nop		0			C			M
+	cmp.eq		p10, p0 = 1, r14	C			M I
+	cmp.eq		p11, p0 = 2, r14	C			M I
+	;;
+}{.mmi		C 02
+	nop		0			C			M
+	cmp.eq		p12, p0 = 3, r14	C			M I
+	mov.i		ar.lc = n		C			I0
+}{.bbb
+  (p10) br.dptk		.Lb01			C			B
+  (p11) br.dptk		.Lb10			C			B
+  (p12) br.dptk		.Lb11			C			B
+	;;
+}
 
 	ALIGN(32)
-L(b00):		ldf8	u_1 = [up], 8
-		mov	acc1_2 = 0
-		mov	pr1_2 = 0
-		mov	pr0_3 = 0
-		cmp.ne	p8, p9 = r0, r0
+.Lb00:	ldf8		u_1 = [up], 8
+	mov		acc1_2 = 0
+	mov		pr1_2 = 0
+	mov		pr0_3 = 0
+	cmp.ne		p8, p9 = r0, r0
 	;;
-		xma.l	fp0b_3 = ux, v0, f0
-		cmp.ne	p12, p13 = r0, r0
-		ldf8	u_2 = [up], 8
-		xma.hu	fp1a_3 = ux, v0, f0
-		br.cloop.dptk	L(gt4)
+	xma.l		fp0b_3 = ux, v0, f0
+	cmp.ne		p12, p13 = r0, r0
+	ldf8		u_2 = [up], 8
+	xma.hu		fp1a_3 = ux, v0, f0
+	br.cloop.dptk	.grt4
 
-		xma.l	fp0b_0 = uy, v0, f0
-		xma.hu	fp1a_0 = uy, v0, f0
+	xma.l		fp0b_0 = uy, v0, f0
+	xma.hu		fp1a_0 = uy, v0, f0
 	;;
-		getfsig	acc0 = fp0b_3
-		xma.l	fp1b_3 = ux, v1, fp1a_3
-		xma.hu	fp2a_3 = ux, v1, fp1a_3
+	getf.sig	acc0 = fp0b_3
+	xma.l		fp1b_3 = ux, v1, fp1a_3
+	xma.hu		fp2a_3 = ux, v1, fp1a_3
 	;;
-		xma.l	fp0b_1 = u_1, v0, f0
-		xma.hu	fp1a_1 = u_1, v0, f0
+	xma.l		fp0b_1 = u_1, v0, f0
+	xma.hu		fp1a_1 = u_1, v0, f0
 	;;
-		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = uy, v1, fp1a_0
-		xma.hu	fp2a_0 = uy, v1, fp1a_0
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = uy, v1, fp1a_0
+	xma.hu		fp2a_0 = uy, v1, fp1a_0
 	;;
-		getfsig	pr1_3 = fp1b_3
-		getfsig	acc1_3 = fp2a_3
-		xma.l	fp0b_2 = u_2, v0, f0
-		xma.hu	fp1a_2 = u_2, v0, f0
-		br	L(cj4)
+	getf.sig	pr1_3 = fp1b_3
+	getf.sig	acc1_3 = fp2a_3
+	xma.l		fp0b_2 = u_2, v0, f0
+	xma.hu		fp1a_2 = u_2, v0, f0
+	br		.Lcj4
 
-L(gt4):		xma.l	fp0b_0 = uy, v0, f0
-		xma.hu	fp1a_0 = uy, v0, f0
+.grt4:	xma.l		fp0b_0 = uy, v0, f0
+	xma.hu		fp1a_0 = uy, v0, f0
 	;;
-		getfsig	acc0 = fp0b_3
-		xma.l	fp1b_3 = ux, v1, fp1a_3
-		ldf8	u_3 = [up], 8
-		xma.hu	fp2a_3 = ux, v1, fp1a_3
+	getf.sig	acc0 = fp0b_3
+	xma.l		fp1b_3 = ux, v1, fp1a_3
+	ldf8		u_3 = [up], 8
+	xma.hu		fp2a_3 = ux, v1, fp1a_3
 	;;
-		xma.l	fp0b_1 = u_1, v0, f0
-		xma.hu	fp1a_1 = u_1, v0, f0
+	xma.l		fp0b_1 = u_1, v0, f0
+	xma.hu		fp1a_1 = u_1, v0, f0
 	;;
-		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = uy, v1, fp1a_0
-		xma.hu	fp2a_0 = uy, v1, fp1a_0
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = uy, v1, fp1a_0
+	xma.hu		fp2a_0 = uy, v1, fp1a_0
 	;;
-		ldf8	u_0 = [up], 8
-		getfsig	pr1_3 = fp1b_3
-		xma.l	fp0b_2 = u_2, v0, f0
+	ldf8		u_0 = [up], 8
+	getf.sig	pr1_3 = fp1b_3
 	;;
-		getfsig	acc1_3 = fp2a_3
-		xma.hu	fp1a_2 = u_2, v0, f0
-		br	L(00)
+	getf.sig	acc1_3 = fp2a_3
+	xma.l		fp0b_2 = u_2, v0, f0
+	xma.hu		fp1a_2 = u_2, v0, f0
+	br		.LL00
 
 
 	ALIGN(32)
-L(b01):		ldf8	u_0 = [up], 8		C M
-		mov	acc1_1 = 0		C M I
-		mov	pr1_1 = 0		C M I
-		mov	pr0_2 = 0		C M I
-		cmp.ne	p6, p7 = r0, r0		C M I
+.Lb01:	ldf8		u_0 = [up], 8		C M
+	mov		acc1_1 = 0		C M I
+	mov		pr1_1 = 0		C M I
+	mov		pr0_2 = 0		C M I
+	cmp.ne		p6, p7 = r0, r0		C M I
 	;;
-		xma.l	fp0b_2 = ux, v0, f0	C F
-		cmp.ne	p10, p11 = r0, r0	C M I
-		ldf8	u_1 = [up], 8		C M
-		xma.hu	fp1a_2 = ux, v0, f0	C F
+	xma.l		fp0b_2 = ux, v0, f0	C F
+	cmp.ne		p10, p11 = r0, r0	C M I
+	ldf8		u_1 = [up], 8		C M
+	xma.hu		fp1a_2 = ux, v0, f0	C F
 	;;
-		xma.l	fp0b_3 = uy, v0, f0	C F
-		xma.hu	fp1a_3 = uy, v0, f0	C F
+	xma.l		fp0b_3 = uy, v0, f0	C F
+	xma.hu		fp1a_3 = uy, v0, f0	C F
 	;;
-		getfsig	acc0 = fp0b_2		C M
-		xma.l	fp1b_2 = ux, v1,fp1a_2	C F
-		ldf8	u_2 = [up], 8		C M
-		xma.hu	fp2a_2 = ux, v1,fp1a_2	C F
-		br.cloop.dptk	L(gt5)
+	getf.sig	acc0 = fp0b_2		C M
+	xma.l		fp1b_2 = ux, v1,fp1a_2	C F
+	xma.hu		fp2a_2 = ux, v1,fp1a_2	C F
+	ldf8		u_2 = [up], 8		C M
+	br.cloop.dptk	.grt5
 
-		xma.l	fp0b_0 = u_0, v0, f0	C F
-		xma.hu	fp1a_0 = u_0, v0, f0	C F
+	xma.l		fp0b_0 = u_0, v0, f0	C F
+	xma.hu		fp1a_0 = u_0, v0, f0	C F
 	;;
-		getfsig	pr0_3 = fp0b_3		C M
-		xma.l	fp1b_3 = uy, v1,fp1a_3	C F
-		xma.hu	fp2a_3 = uy, v1,fp1a_3	C F
+	getf.sig	pr0_3 = fp0b_3		C M
+	xma.l		fp1b_3 = uy, v1,fp1a_3	C F
+	xma.hu		fp2a_3 = uy, v1,fp1a_3	C F
 	;;
-		getfsig	pr1_2 = fp1b_2		C M
-		getfsig	acc1_2 = fp2a_2		C M
-		xma.l	fp0b_1 = u_1, v0, f0	C F
-		xma.hu	fp1a_1 = u_1, v0, f0	C F
-		br	L(cj5)
+	getf.sig	pr1_2 = fp1b_2		C M
+	getf.sig	acc1_2 = fp2a_2		C M
+	xma.l		fp0b_1 = u_1, v0, f0	C F
+	xma.hu		fp1a_1 = u_1, v0, f0	C F
+	br		.Lcj5
 
-L(gt5):		xma.l	fp0b_0 = u_0, v0, f0
-		xma.hu	fp1a_0 = u_0, v0, f0
+.grt5:	xma.l		fp0b_0 = u_0, v0, f0
+	xma.hu		fp1a_0 = u_0, v0, f0
 	;;
-		getfsig	pr0_3 = fp0b_3
-		xma.l	fp1b_3 = uy, v1, fp1a_3
-		xma.hu	fp2a_3 = uy, v1, fp1a_3
+	getf.sig	pr0_3 = fp0b_3
+	xma.l		fp1b_3 = uy, v1, fp1a_3
+	xma.hu		fp2a_3 = uy, v1, fp1a_3
 	;;
-		ldf8	u_3 = [up], 8
-		getfsig	pr1_2 = fp1b_2
-		xma.l	fp0b_1 = u_1, v0, f0
+	ldf8		u_3 = [up], 8
+	getf.sig	pr1_2 = fp1b_2
 	;;
-		getfsig	acc1_2 = fp2a_2
-		xma.hu	fp1a_1 = u_1, v0, f0
-		br	L(01)
+	getf.sig	acc1_2 = fp2a_2
+	xma.l		fp0b_1 = u_1, v0, f0
+	xma.hu		fp1a_1 = u_1, v0, f0
+	br		.LL01
 
 
+C We have two variants for n = 2.  They turn out to run at exactly the same
+C speed.  But the first, odd variant might allow one cycle to be trimmed.
 	ALIGN(32)
-L(b10):		br.cloop.dptk	L(gt2)
-		xma.l	fp0b_1 = ux, v0, f0
-		xma.hu	fp1a_1 = ux, v0, f0
-	;;
-		xma.l	fp0b_2 = uy, v0, f0
-		xma.hu	fp1a_2 = uy, v0, f0
-	;;
-		stf8	[rp] = fp0b_1, 8
-		xma.l	fp1b_1 = ux, v1, fp1a_1
-		xma.hu	fp2a_1 = ux, v1, fp1a_1
-	;;
-		getfsig	acc0 = fp0b_2
-		xma.l	fp1b_2 = uy, v1, fp1a_2
-		xma.hu	fp2a_2 = uy, v1, fp1a_2
-	;;
-		getfsig	pr1_1 = fp1b_1
-		getfsig	acc1_1 = fp2a_1
-		mov	ar.lc = r2
-		getfsig	pr1_2 = fp1b_2
-		getfsig	r8 = fp2a_2
-	;;
-		add	s0 = pr1_1, acc0
-	;;
-		st8	[rp] = s0, 8
-		cmp.ltu	p8, p9 = s0, pr1_1
-		sub	r31 = -1, acc1_1
-	;;
-		.pred.rel "mutex", p8, p9
-	(p8)	add	acc0 = pr1_2, acc1_1, 1
-	(p9)	add	acc0 = pr1_2, acc1_1
-	(p8)	cmp.leu	p10, p0 = r31, pr1_2
-	(p9)	cmp.ltu	p10, p0 = r31, pr1_2
-	;;
-		st8	[rp] = acc0, 8
-	(p10)	add	r8 = 1, r8
-		br.ret.sptk.many b0
-
-L(gt2):		ldf8	u_3 = [up], 8
-		mov	acc1_0 = 0
-		mov	pr1_0 = 0
-	;;
-		mov	pr0_1 = 0
-		xma.l	fp0b_1 = ux, v0, f0
-		ldf8	u_0 = [up], 8
-		xma.hu	fp1a_1 = ux, v0, f0
-	;;
-		xma.l	fp0b_2 = uy, v0, f0
-		xma.hu	fp1a_2 = uy, v0, f0
-	;;
-		getfsig	acc0 = fp0b_1
-		xma.l	fp1b_1 = ux, v1, fp1a_1
-		xma.hu	fp2a_1 = ux, v1, fp1a_1
-	;;
-		ldf8	u_1 = [up], 8
-		xma.l	fp0b_3 = u_3, v0, f0
-		xma.hu	fp1a_3 = u_3, v0, f0
-	;;
-		getfsig	pr0_2 = fp0b_2
-		xma.l	fp1b_2 = uy, v1, fp1a_2
-		xma.hu	fp2a_2 = uy, v1, fp1a_2
-	;;
-		ldf8	u_2 = [up], 8
-		getfsig	pr1_1 = fp1b_1
-	;;
-.mfi;		getfsig	acc1_1 = fp2a_1
-		xma.l	fp0b_0 = u_0, v0, f0
-		cmp.ne	p8, p9 = r0, r0
-.mfb;		cmp.ne	p12, p13 = r0, r0
-		xma.hu	fp1a_0 = u_0, v0, f0
-		br	L(10)
+ifdef(`',`
+.Lb10:		C 03
+	br.cloop.dptk	.grt2
+		C 04
+		C 05
+		C 06
+	xma.l		fp0b_1 = ux, v0, f0	C 0
+	xma.hu		fp1a_1 = ux, v0, f0	C 1
+	;;	C 07
+	xma.l		fp0b_2 = uy, v0, f0	C 1
+	xma.l		fp1b_1 = ux, v1, f0	C 1
+	;;	C 08
+	xma.hu		fp1a_2 = uy, v0, f0	C 2
+	xma.hu		fp2a_1 = ux, v1, f0	C 2
+	;;	C 09
+	xma.l		fp1b_2 = uy, v1, f0	C 2
+	xma.hu		fp2a_2 = uy, v1, f0	C 3
+	;;	C 10
+	getf.sig	r16 = fp1a_1
+	stf8		[rp] = fp0b_1, 8
+	;;	C 11
+	getf.sig	r17 = fp0b_2
+		C 12
+	getf.sig	r18 = fp1b_1
+		C 13
+	getf.sig	r19 = fp1a_2
+		C 14
+	getf.sig	r20 = fp2a_1
+		C 15
+	getf.sig	r21 = fp1b_2
+	;;	C 16
+	getf.sig	r8 = fp2a_2
+	add		r24 = r16, r17
+	;;	C 17
+	cmp.ltu		p6, p7 = r24, r16
+	add		r26 = r24, r18
+	;;	C 18
+	cmp.ltu		p8, p9 = r26, r24
+	;;	C 19
+	st8		[rp] = r26, 8
+  (p6)	add		r25 = r19, r20, 1
+  (p7)	add		r25 = r19, r20
+	;;	C 20
+  (p8)	add		r27 = r25, r21, 1
+  (p9)	add		r27 = r25, r21
+  (p6)	cmp.leu		p10, p0 = r25, r19
+  (p7)	cmp.ltu		p10, p0 = r25, r19
+	;;	C 21
+  (p10)	add		r8 = 1, r8
+  (p8)	cmp.leu		p12, p0 = r27, r25
+  (p9)	cmp.ltu		p12, p0 = r27, r25
+	;;	C 22
+	st8		[rp] = r27, 8
+	mov.i		ar.lc = r2
+  (p12)	add		r8 = 1, r8
+	br.ret.sptk.many b0
+')
+
+.Lb10:		C 03
+	br.cloop.dptk	.grt2
+		C 04
+		C 05
+		C 06
+	xma.l		fp0b_1 = ux, v0, f0
+	xma.hu		fp1a_1 = ux, v0, f0
+	;;	C 07
+	xma.l		fp0b_2 = uy, v0, f0
+	xma.hu		fp1a_2 = uy, v0, f0
+	;;	C 08
+		C 09
+		C 10
+	stf8		[rp] = fp0b_1, 8
+	xma.l		fp1b_1 = ux, v1, fp1a_1
+	xma.hu		fp2a_1 = ux, v1, fp1a_1
+	;;	C 11
+	getf.sig	acc0 = fp0b_2
+	xma.l		fp1b_2 = uy, v1, fp1a_2
+	xma.hu		fp2a_2 = uy, v1, fp1a_2
+	;;	C 12
+		C 13
+		C 14
+	getf.sig	pr1_1 = fp1b_1
+		C 15
+	getf.sig	acc1_1 = fp2a_1
+		C 16
+	getf.sig	pr1_2 = fp1b_2
+		C 17
+	getf.sig	r8 = fp2a_2
+	;;	C 18
+		C 19
+	add		s0 = pr1_1, acc0
+	;;	C 20
+	st8		[rp] = s0, 8
+	cmp.ltu		p8, p9 = s0, pr1_1
+	sub		r31 = -1, acc1_1
+	;;	C 21
+	.pred.rel "mutex", p8, p9
+  (p8)	add		acc0 = pr1_2, acc1_1, 1
+  (p9)	add		acc0 = pr1_2, acc1_1
+  (p8)	cmp.leu		p10, p0 = r31, pr1_2
+  (p9)	cmp.ltu		p10, p0 = r31, pr1_2
+	;;	C 22
+	st8		[rp] = acc0, 8
+	mov.i		ar.lc = r2
+  (p10)	add		r8 = 1, r8
+	br.ret.sptk.many b0
+
+
+.grt2:	ldf8		u_3 = [up], 8
+	mov		acc1_0 = 0
+	mov		pr1_0 = 0
+	;;
+	mov		pr0_1 = 0
+	xma.l		fp0b_1 = ux, v0, f0
+	ldf8		u_0 = [up], 8
+	xma.hu		fp1a_1 = ux, v0, f0
+	;;
+	xma.l		fp0b_2 = uy, v0, f0
+	xma.hu		fp1a_2 = uy, v0, f0
+	;;
+	getf.sig	acc0 = fp0b_1
+	xma.l		fp1b_1 = ux, v1, fp1a_1
+	xma.hu		fp2a_1 = ux, v1, fp1a_1
+	;;
+	ldf8		u_1 = [up], 8
+	xma.l		fp0b_3 = u_3, v0, f0
+	xma.hu		fp1a_3 = u_3, v0, f0
+	;;
+	getf.sig	pr0_2 = fp0b_2
+	xma.l		fp1b_2 = uy, v1, fp1a_2
+	xma.hu		fp2a_2 = uy, v1, fp1a_2
+	;;
+	ldf8		u_2 = [up], 8
+	getf.sig	pr1_1 = fp1b_1
+	;;
+	getf.sig	acc1_1 = fp2a_1
+	xma.l		fp0b_0 = u_0, v0, f0
+	cmp.ne		p8, p9 = r0, r0
+	cmp.ne		p12, p13 = r0, r0
+	xma.hu		fp1a_0 = u_0, v0, f0
+	br		.LL10
 
 
 	ALIGN(32)
-L(b11):		mov	acc1_3 = 0
-		mov	pr1_3 = 0
-		mov	pr0_0 = 0
-		ldf8	u_2 = [up], 8
-		cmp.ne	p6, p7 = r0, r0
-		br.cloop.dptk	L(gt3)
+.Lb11:	mov		acc1_3 = 0
+	mov		pr1_3 = 0
+	mov		pr0_0 = 0
+	cmp.ne		p6, p7 = r0, r0
+	;;
+	ldf8		u_2 = [up], 8
+	br.cloop.dptk	.grt3
 	;;
-		xma.l	fp0b_0 = ux, v0, f0
-		xma.hu	fp1a_0 = ux, v0, f0
+	xma.l		fp0b_0 = ux, v0, f0
+	xma.hu		fp1a_0 = ux, v0, f0
 	;;
-		cmp.ne	p10, p11 = r0, r0
-		xma.l	fp0b_1 = uy, v0, f0
-		xma.hu	fp1a_1 = uy, v0, f0
+	cmp.ne		p10, p11 = r0, r0
+	xma.l		fp0b_1 = uy, v0, f0
+	xma.hu		fp1a_1 = uy, v0, f0
 	;;
-		getfsig	acc0 = fp0b_0
-		xma.l	fp1b_0 = ux, v1, fp1a_0
-		xma.hu	fp2a_0 = ux, v1, fp1a_0
+	getf.sig	acc0 = fp0b_0
+	xma.l		fp1b_0 = ux, v1, fp1a_0
+	xma.hu		fp2a_0 = ux, v1, fp1a_0
 	;;
-		xma.l	fp0b_2 = u_2, v0, f0
-		xma.hu	fp1a_2 = u_2, v0, f0
+	xma.l		fp0b_2 = u_2, v0, f0
+	xma.hu		fp1a_2 = u_2, v0, f0
 	;;
-		getfsig	pr0_1 = fp0b_1
-		xma.l	fp1b_1 = uy, v1, fp1a_1
-		xma.hu	fp2a_1 = uy, v1, fp1a_1
+	getf.sig	pr0_1 = fp0b_1
+	xma.l		fp1b_1 = uy, v1, fp1a_1
+	xma.hu		fp2a_1 = uy, v1, fp1a_1
 	;;
-		getfsig	pr1_0 = fp1b_0
-		getfsig	acc1_0 = fp2a_0
-		br	L(cj3)
+	getf.sig	pr1_0 = fp1b_0
+	getf.sig	acc1_0 = fp2a_0
+	br		.Lcj3
 
-L(gt3):		xma.l	fp0b_0 = ux, v0, f0
-		cmp.ne	p10, p11 = r0, r0
-		ldf8	u_3 = [up], 8
-		xma.hu	fp1a_0 = ux, v0, f0
+.grt3:	xma.l		fp0b_0 = ux, v0, f0
+	cmp.ne		p10, p11 = r0, r0
+	ldf8		u_3 = [up], 8
+	xma.hu		fp1a_0 = ux, v0, f0
 	;;
-		xma.l	fp0b_1 = uy, v0, f0
-		xma.hu	fp1a_1 = uy, v0, f0
+	xma.l		fp0b_1 = uy, v0, f0
+	xma.hu		fp1a_1 = uy, v0, f0
 	;;
-		getfsig	acc0 = fp0b_0
-		xma.l	fp1b_0 = ux, v1, fp1a_0
-		ldf8	u_0 = [up], 8
-		xma.hu	fp2a_0 = ux, v1, fp1a_0
+	getf.sig	acc0 = fp0b_0
+	xma.l		fp1b_0 = ux, v1, fp1a_0
+	ldf8		u_0 = [up], 8
+	xma.hu		fp2a_0 = ux, v1, fp1a_0
 	;;
-		xma.l	fp0b_2 = u_2, v0, f0
-		xma.hu	fp1a_2 = u_2, v0, f0
+	xma.l		fp0b_2 = u_2, v0, f0
+	xma.hu		fp1a_2 = u_2, v0, f0
 	;;
-		getfsig	pr0_1 = fp0b_1
-		xma.l	fp1b_1 = uy, v1, fp1a_1
-		xma.hu	fp2a_1 = uy, v1, fp1a_1
+	getf.sig	pr0_1 = fp0b_1
+	xma.l		fp1b_1 = uy, v1, fp1a_1
+	xma.hu		fp2a_1 = uy, v1, fp1a_1
 	;;
-		ldf8	u_1 = [up], 8
-		getfsig	pr1_0 = fp1b_0
+	ldf8		u_1 = [up], 8
+	getf.sig	pr1_0 = fp1b_0
 	;;
-		getfsig	acc1_0 = fp2a_0
-		xma.l	fp0b_3 = u_3, v0, f0
-		xma.hu	fp1a_3 = u_3, v0, f0
-		br	L(11)
+	getf.sig	acc1_0 = fp2a_0
+	xma.l		fp0b_3 = u_3, v0, f0
+	xma.hu		fp1a_3 = u_3, v0, f0
+	br		.LL11
 
 
 C *** MAIN LOOP START ***
 	ALIGN(32)
-L(top):						C 00
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-		ldf8	u_3 = [up], 8
-		getfsig	pr1_2 = fp1b_2
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
-	(p9)	cmp.ltu	p6, p7 = acc0, pr0_1
-	(p12)	cmp.leu	p10, p11 = s0, pr1_0
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
+.Loop:						C 00
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_3 = fp0b_3
+	xma.l		fp1b_3 = u_3, v1, fp1a_3
+  (p12)	add		s0 = pr1_0, acc0, 1
+  (p13)	add		s0 = pr1_0, acc0
+	xma.hu		fp2a_3 = u_3, v1, fp1a_3
 	;;					C 01
-		.pred.rel "mutex", p6, p7
-		getfsig	acc1_2 = fp2a_2
-		st8	[rp] = s0, 8
-		xma.l	fp0b_1 = u_1, v0, f0
-	(p6)	add	acc0 = pr0_2, acc1_0, 1
-	(p7)	add	acc0 = pr0_2, acc1_0
-		xma.hu	fp1a_1 = u_1, v0, f0
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	ldf8		u_3 = [up], 8
+	getf.sig	pr1_2 = fp1b_2
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_1
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_1
+  (p12)	cmp.leu		p10, p11 = s0, pr1_0
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_0
 	;;					C 02
-L(01):
-		.pred.rel "mutex", p10, p11
-		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = u_0, v1, fp1a_0
-	(p10)	add	s0 = pr1_1, acc0, 1
-	(p11)	add	s0 = pr1_1, acc0
-		xma.hu	fp2a_0 = u_0, v1, fp1a_0
-		nop	1
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_2 = fp2a_2
+	st8		[rp] = s0, 8
+	xma.l		fp0b_1 = u_1, v0, f0
+  (p6)	add		acc0 = pr0_2, acc1_0, 1
+  (p7)	add		acc0 = pr0_2, acc1_0
+	xma.hu		fp1a_1 = u_1, v0, f0
 	;;					C 03
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-		ldf8	u_0 = [up], 8
-		getfsig	pr1_3 = fp1b_3
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
-	(p7)	cmp.ltu	p8, p9 = acc0, pr0_2
-	(p10)	cmp.leu	p12, p13 = s0, pr1_1
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
+.LL01:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = u_0, v1, fp1a_0
+  (p10)	add		s0 = pr1_1, acc0, 1
+  (p11)	add		s0 = pr1_1, acc0
+	xma.hu		fp2a_0 = u_0, v1, fp1a_0
 	;;					C 04
-		.pred.rel "mutex", p8, p9
-		getfsig	acc1_3 = fp2a_3
-		st8	[rp] = s0, 8
-		xma.l	fp0b_2 = u_2, v0, f0
-	(p8)	add	acc0 = pr0_3, acc1_1, 1
-	(p9)	add	acc0 = pr0_3, acc1_1
-		xma.hu	fp1a_2 = u_2, v0, f0
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	ldf8		u_0 = [up], 8
+	getf.sig	pr1_3 = fp1b_3
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_2
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_2
+  (p10)	cmp.leu		p12, p13 = s0, pr1_1
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_1
 	;;					C 05
-L(00):
-		.pred.rel "mutex", p12, p13
-		getfsig	pr0_1 = fp0b_1
-		xma.l	fp1b_1 = u_1, v1, fp1a_1
-	(p12)	add	s0 = pr1_2, acc0, 1
-	(p13)	add	s0 = pr1_2, acc0
-		xma.hu	fp2a_1 = u_1, v1, fp1a_1
-		nop	1
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_3 = fp2a_3
+	st8		[rp] = s0, 8
+	xma.l		fp0b_2 = u_2, v0, f0
+  (p8)	add		acc0 = pr0_3, acc1_1, 1
+  (p9)	add		acc0 = pr0_3, acc1_1
+	xma.hu		fp1a_2 = u_2, v0, f0
 	;;					C 06
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-		ldf8	u_1 = [up], 8
-		getfsig	pr1_0 = fp1b_0
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_3
-	(p9)	cmp.ltu	p6, p7 = acc0, pr0_3
-	(p12)	cmp.leu	p10, p11 = s0, pr1_2
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_2
+.LL00:
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_1 = fp0b_1
+	xma.l		fp1b_1 = u_1, v1, fp1a_1
+  (p12)	add		s0 = pr1_2, acc0, 1
+  (p13)	add		s0 = pr1_2, acc0
+	xma.hu		fp2a_1 = u_1, v1, fp1a_1
 	;;					C 07
-		.pred.rel "mutex", p6, p7
-		getfsig	acc1_0 = fp2a_0
-		st8	[rp] = s0, 8
-		xma.l	fp0b_3 = u_3, v0, f0
-	(p6)	add	acc0 = pr0_0, acc1_2, 1
-	(p7)	add	acc0 = pr0_0, acc1_2
-		xma.hu	fp1a_3 = u_3, v0, f0
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	ldf8		u_1 = [up], 8
+	getf.sig	pr1_0 = fp1b_0
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_3
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_3
+  (p12)	cmp.leu		p10, p11 = s0, pr1_2
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_2
 	;;					C 08
-L(11):
-		.pred.rel "mutex", p10, p11
-		getfsig	pr0_2 = fp0b_2
-		xma.l	fp1b_2 = u_2, v1, fp1a_2
-	(p10)	add	s0 = pr1_3, acc0, 1
-	(p11)	add	s0 = pr1_3, acc0
-		xma.hu	fp2a_2 = u_2, v1, fp1a_2
-		nop	1
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_0 = fp2a_0
+	st8		[rp] = s0, 8
+	xma.l		fp0b_3 = u_3, v0, f0
+  (p6)	add		acc0 = pr0_0, acc1_2, 1
+  (p7)	add		acc0 = pr0_0, acc1_2
+	xma.hu		fp1a_3 = u_3, v0, f0
 	;;					C 09
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-		ldf8	u_2 = [up], 8
-		getfsig	pr1_1 = fp1b_1
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_0
-	(p7)	cmp.ltu	p8, p9 = acc0, pr0_0
-	(p10)	cmp.leu	p12, p13 = s0, pr1_3
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_3
+.LL11:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_2 = fp0b_2
+	xma.l		fp1b_2 = u_2, v1, fp1a_2
+  (p10)	add		s0 = pr1_3, acc0, 1
+  (p11)	add		s0 = pr1_3, acc0
+	xma.hu		fp2a_2 = u_2, v1, fp1a_2
 	;;					C 10
-		.pred.rel "mutex", p8, p9
-		getfsig	acc1_1 = fp2a_1
-		st8	[rp] = s0, 8
-		xma.l	fp0b_0 = u_0, v0, f0
-	(p8)	add	acc0 = pr0_1, acc1_3, 1
-	(p9)	add	acc0 = pr0_1, acc1_3
-		xma.hu	fp1a_0 = u_0, v0, f0
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	ldf8		u_2 = [up], 8
+	getf.sig	pr1_1 = fp1b_1
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_0
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_0
+  (p10)	cmp.leu		p12, p13 = s0, pr1_3
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_3
 	;;					C 11
-L(10):
-		.pred.rel "mutex", p12, p13
-		getfsig	pr0_3 = fp0b_3
-		xma.l	fp1b_3 = u_3, v1, fp1a_3
-	(p12)	add	s0 = pr1_0, acc0, 1
-	(p13)	add	s0 = pr1_0, acc0
-		xma.hu	fp2a_3 = u_3, v1, fp1a_3
-		br.cloop.dptk	L(top)
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_1 = fp2a_1
+	st8		[rp] = s0, 8
+	xma.l		fp0b_0 = u_0, v0, f0
+  (p8)	add		acc0 = pr0_1, acc1_3, 1
+  (p9)	add		acc0 = pr0_1, acc1_3
+	xma.hu		fp1a_0 = u_0, v0, f0
+.LL10:	br.cloop.dptk	.Loop			C 12
 	;;
 C *** MAIN LOOP END ***
 
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-.mmi;		getfsig	pr1_2 = fp1b_2
-		st8	[rp] = s0, 8
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
-.mmi;	(p9)	cmp.ltu	p6, p7 = acc0, pr0_1
-	(p12)	cmp.leu	p10, p11 = s0, pr1_0
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
-	;;
-		.pred.rel "mutex", p6, p7
-.mfi;		getfsig	acc1_2 = fp2a_2
-		xma.l	fp0b_1 = u_1, v0, f0
-		nop	1
-.mmf;	(p6)	add	acc0 = pr0_2, acc1_0, 1
-	(p7)	add	acc0 = pr0_2, acc1_0
-		xma.hu	fp1a_1 = u_1, v0, f0
-	;;
-L(cj5):
-		.pred.rel "mutex", p10, p11
-.mfi;		getfsig	pr0_0 = fp0b_0
-		xma.l	fp1b_0 = u_0, v1, fp1a_0
-	(p10)	add	s0 = pr1_1, acc0, 1
-.mfi;	(p11)	add	s0 = pr1_1, acc0
-		xma.hu	fp2a_0 = u_0, v1, fp1a_0
-		nop	1
-	;;
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-.mmi;		getfsig	pr1_3 = fp1b_3
-		st8	[rp] = s0, 8
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
-.mmi;	(p7)	cmp.ltu	p8, p9 = acc0, pr0_2
-	(p10)	cmp.leu	p12, p13 = s0, pr1_1
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
-	;;
-		.pred.rel "mutex", p8, p9
-.mfi;		getfsig	acc1_3 = fp2a_3
-		xma.l	fp0b_2 = u_2, v0, f0
-		nop	1
-.mmf;	(p8)	add	acc0 = pr0_3, acc1_1, 1
-	(p9)	add	acc0 = pr0_3, acc1_1
-		xma.hu	fp1a_2 = u_2, v0, f0
-	;;
-L(cj4):
-		.pred.rel "mutex", p12, p13
-.mfi;		getfsig	pr0_1 = fp0b_1
-		xma.l	fp1b_1 = u_1, v1, fp1a_1
-	(p12)	add	s0 = pr1_2, acc0, 1
-.mfi;	(p13)	add	s0 = pr1_2, acc0
-		xma.hu	fp2a_1 = u_1, v1, fp1a_1
-		nop	1
-	;;
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-.mmi;		getfsig	pr1_0 = fp1b_0
-		st8	[rp] = s0, 8
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_3
-.mmi;	(p9)	cmp.ltu	p6, p7 = acc0, pr0_3
-	(p12)	cmp.leu	p10, p11 = s0, pr1_2
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_2
-	;;
-		.pred.rel "mutex", p6, p7
-.mmi;		getfsig	acc1_0 = fp2a_0
-	(p6)	add	acc0 = pr0_0, acc1_2, 1
-	(p7)	add	acc0 = pr0_0, acc1_2
-	;;
-L(cj3):
-		.pred.rel "mutex", p10, p11
-.mfi;		getfsig	pr0_2 = fp0b_2
-		xma.l	fp1b_2 = u_2, v1, fp1a_2
-	(p10)	add	s0 = pr1_3, acc0, 1
-.mfi;	(p11)	add	s0 = pr1_3, acc0
-		xma.hu	fp2a_2 = u_2, v1, fp1a_2
-		nop	1
-	;;
-		.pred.rel "mutex", p6, p7
-		.pred.rel "mutex", p10, p11
-.mmi;		getfsig	pr1_1 = fp1b_1
-		st8	[rp] = s0, 8
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_0
-.mmi;	(p7)	cmp.ltu	p8, p9 = acc0, pr0_0
-	(p10)	cmp.leu	p12, p13 = s0, pr1_3
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_3
-	;;
-		.pred.rel "mutex", p8, p9
-.mmi;		getfsig	acc1_1 = fp2a_1
-	(p8)	add	acc0 = pr0_1, acc1_3, 1
-	(p9)	add	acc0 = pr0_1, acc1_3
-	;;
-		.pred.rel "mutex", p12, p13
-.mmi;	(p12)	add	s0 = pr1_0, acc0, 1
-	(p13)	add	s0 = pr1_0, acc0
-		nop	1
-	;;
-		.pred.rel "mutex", p8, p9
-		.pred.rel "mutex", p12, p13
-.mmi;		getfsig	pr1_2 = fp1b_2
-		st8	[rp] = s0, 8
-	(p8)	cmp.leu	p6, p7 = acc0, pr0_1
-.mmi;	(p9)	cmp.ltu	p6, p7 = acc0, pr0_1
-	(p12)	cmp.leu	p10, p11 = s0, pr1_0
-	(p13)	cmp.ltu	p10, p11 = s0, pr1_0
-	;;
-		.pred.rel "mutex", p6, p7
-.mmi;		getfsig	r8 = fp2a_2
-	(p6)	add	acc0 = pr0_2, acc1_0, 1
-	(p7)	add	acc0 = pr0_2, acc1_0
-	;;
-		.pred.rel "mutex", p10, p11
-.mmi;	(p10)	add	s0 = pr1_1, acc0, 1
-	(p11)	add	s0 = pr1_1, acc0
-	(p6)	cmp.leu	p8, p9 = acc0, pr0_2
-	;;
-		.pred.rel "mutex", p10, p11
-.mmi;	(p7)	cmp.ltu	p8, p9 = acc0, pr0_2
-	(p10)	cmp.leu	p12, p13 = s0, pr1_1
-	(p11)	cmp.ltu	p12, p13 = s0, pr1_1
-	;;
-		.pred.rel "mutex", p8, p9
-.mmi;		st8	[rp] = s0, 8
-	(p8)	add	acc0 = pr1_2, acc1_1, 1
-	(p9)	add	acc0 = pr1_2, acc1_1
-	;;
-		.pred.rel "mutex", p8, p9
-.mmi;	(p8)	cmp.leu	p10, p11 = acc0, pr1_2
-	(p9)	cmp.ltu	p10, p11 = acc0, pr1_2
-	(p12)	add	acc0 = 1, acc0
-	;;
-.mmi;		st8	[rp] = acc0, 8
-	(p12)	cmpeqor	p10, p0 = 0, acc0
-		nop	1
-	;;
-.mib;	(p10)	add	r8 = 1, r8
-		mov	ar.lc = r2
-		br.ret.sptk.many b0
+.Lcj6:
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_3 = fp0b_3
+	xma.l		fp1b_3 = u_3, v1, fp1a_3
+  (p12)	add		s0 = pr1_0, acc0, 1
+  (p13)	add		s0 = pr1_0, acc0
+	xma.hu		fp2a_3 = u_3, v1, fp1a_3
+	;;
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr1_2 = fp1b_2
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_1
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_1
+  (p12)	cmp.leu		p10, p11 = s0, pr1_0
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_0
+	;;
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_2 = fp2a_2
+	st8		[rp] = s0, 8
+	xma.l		fp0b_1 = u_1, v0, f0
+  (p6)	add		acc0 = pr0_2, acc1_0, 1
+  (p7)	add		acc0 = pr0_2, acc1_0
+	xma.hu		fp1a_1 = u_1, v0, f0
+	;;
+.Lcj5:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_0 = fp0b_0
+	xma.l		fp1b_0 = u_0, v1, fp1a_0
+  (p10)	add		s0 = pr1_1, acc0, 1
+  (p11)	add		s0 = pr1_1, acc0
+	xma.hu		fp2a_0 = u_0, v1, fp1a_0
+	;;
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr1_3 = fp1b_3
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_2
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_2
+  (p10)	cmp.leu		p12, p13 = s0, pr1_1
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_1
+	;;
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_3 = fp2a_3
+	st8		[rp] = s0, 8
+	xma.l		fp0b_2 = u_2, v0, f0
+  (p8)	add		acc0 = pr0_3, acc1_1, 1
+  (p9)	add		acc0 = pr0_3, acc1_1
+	xma.hu		fp1a_2 = u_2, v0, f0
+	;;
+.Lcj4:
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr0_1 = fp0b_1
+	xma.l		fp1b_1 = u_1, v1, fp1a_1
+  (p12)	add		s0 = pr1_2, acc0, 1
+  (p13)	add		s0 = pr1_2, acc0
+	xma.hu		fp2a_1 = u_1, v1, fp1a_1
+	;;
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr1_0 = fp1b_0
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_3
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_3
+  (p12)	cmp.leu		p10, p11 = s0, pr1_2
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_2
+	;;
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_0 = fp2a_0
+	st8		[rp] = s0, 8
+  (p6)	add		acc0 = pr0_0, acc1_2, 1
+  (p7)	add		acc0 = pr0_0, acc1_2
+	;;
+.Lcj3:
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr0_2 = fp0b_2
+	xma.l		fp1b_2 = u_2, v1, fp1a_2
+  (p10)	add		s0 = pr1_3, acc0, 1
+  (p11)	add		s0 = pr1_3, acc0
+	xma.hu		fp2a_2 = u_2, v1, fp1a_2
+	;;
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+	getf.sig	pr1_1 = fp1b_1
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_0
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_0
+  (p10)	cmp.leu		p12, p13 = s0, pr1_3
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_3
+	;;
+	.pred.rel "mutex", p8, p9
+	getf.sig	acc1_1 = fp2a_1
+	st8		[rp] = s0, 8
+  (p8)	add		acc0 = pr0_1, acc1_3, 1
+  (p9)	add		acc0 = pr0_1, acc1_3
+	;;
+	.pred.rel "mutex", p12, p13
+  (p12)	add		s0 = pr1_0, acc0, 1
+  (p13)	add		s0 = pr1_0, acc0
+	;;
+	.pred.rel "mutex", p8, p9
+	.pred.rel "mutex", p12, p13
+	getf.sig	pr1_2 = fp1b_2
+  (p8)	cmp.leu		p6, p7 = acc0, pr0_1
+  (p9)	cmp.ltu		p6, p7 = acc0, pr0_1
+  (p12)	cmp.leu		p10, p11 = s0, pr1_0
+  (p13)	cmp.ltu		p10, p11 = s0, pr1_0
+	;;
+	.pred.rel "mutex", p6, p7
+	getf.sig	acc1_2 = fp2a_2
+	st8		[rp] = s0, 8
+  (p6)	add		acc0 = pr0_2, acc1_0, 1
+  (p7)	add		acc0 = pr0_2, acc1_0
+	;;
+	.pred.rel "mutex", p10, p11
+  (p10)	add		s0 = pr1_1, acc0, 1
+  (p11)	add		s0 = pr1_1, acc0
+	;;
+	.pred.rel "mutex", p6, p7
+	.pred.rel "mutex", p10, p11
+  (p6)	cmp.leu		p8, p9 = acc0, pr0_2
+  (p7)	cmp.ltu		p8, p9 = acc0, pr0_2
+  (p10)	cmp.leu		p12, p13 = s0, pr1_1
+  (p11)	cmp.ltu		p12, p13 = s0, pr1_1
+	;;
+	.pred.rel "mutex", p8, p9
+	st8		[rp] = s0, 8
+  (p8)	add		acc0 = pr1_2, acc1_1, 1
+  (p9)	add		acc0 = pr1_2, acc1_1
+	;;
+	.pred.rel "mutex", p8, p9
+  (p8)	cmp.leu		p10, p11 = acc0, pr1_2
+  (p9)	cmp.ltu		p10, p11 = acc0, pr1_2
+  (p12)	add		acc0 = 1, acc0
+	;;
+	st8		[rp] = acc0, 8
+  (p12)	cmp.eq.or	p10, p0 = 0, acc0
+	mov		r8 = acc1_2
+	;;
+	.pred.rel "mutex", p10, p11
+  (p10)	add		r8 = 1, r8
+	mov.i		ar.lc = r2
+	br.ret.sptk.many b0
 EPILOGUE()
 ASM_END()
diff --git a/gmp/mpn/ia64/popcount.asm b/gmp/mpn/ia64/popcount.asm
index c0b5c5c1cf..a02bf4346c 100644
--- a/gmp/mpn/ia64/popcount.asm
+++ b/gmp/mpn/ia64/popcount.asm
@@ -1,34 +1,22 @@
 dnl  IA-64 mpn_popcount -- mpn population count.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2000-2005 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -50,7 +38,6 @@ PROLOGUE(mpn_popcount)
 	.prologue
 ifdef(`HAVE_ABI_32',
 `	addp4		up = 0, up		C			M I
-	nop.m		0
 	zxt4		n = n			C			I
 	;;
 ')
diff --git a/gmp/mpn/ia64/rsh1aors_n.asm b/gmp/mpn/ia64/rsh1aors_n.asm
index 3c7defb0ba..366b5c50bb 100644
--- a/gmp/mpn/ia64/rsh1aors_n.asm
+++ b/gmp/mpn/ia64/rsh1aors_n.asm
@@ -1,34 +1,21 @@
 dnl  IA-64 mpn_rsh1add_n/mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2003-2005 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -78,8 +65,6 @@ ifdef(`HAVE_ABI_32',`
 	addp4		rp = 0, rp		C			M I
 	addp4		up = 0, up		C			M I
 	addp4		vp = 0, vp		C			M I
-	nop.m		0
-	nop.m		0
 	zxt4		n = n			C			I
 	;;
 ')
diff --git a/gmp/mpn/ia64/sec_tabselect.asm b/gmp/mpn/ia64/sec_tabselect.asm
deleted file mode 100644
index f116ea3843..0000000000
--- a/gmp/mpn/ia64/sec_tabselect.asm
+++ /dev/null
@@ -1,150 +0,0 @@
-dnl  IA-64 mpn_sec_tabselect.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:       ?
-C Itanium 2:     2.5
-
-C NOTES
-C  * Using software pipelining could trivially yield 2 c/l without unrolling,
-C    or 1+epsilon with unrolling.  (This code was modelled after the powerpc64
-C    code, for simplicity.)
-
-C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
-define(`rp',     `r32')
-define(`tp',     `r33')
-define(`n',      `r34')
-define(`nents',  `r35')
-define(`which',  `r36')
-
-define(`mask',   `r8')
-
-define(`rp1',     `r32')
-define(`tp1',     `r33')
-define(`rp2',     `r14')
-define(`tp2',     `r15')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
-	.prologue
-	.save	ar.lc, r2
-	.body
-ifdef(`HAVE_ABI_32',`
-.mmi;	addp4	rp = 0, rp		C			M I
-	addp4	tp = 0, tp		C			M I
-	zxt4	n = n			C			I
-.mii;	nop	0
-	zxt4	nents = nents		C			I
-	zxt4	which = which		C			I
-	;;
-')
-.mmi;	add	rp2 = 8, rp1
-	add	tp2 = 8, tp1
-	add	r6 = -2, n
-	;;
-.mmi;	cmp.eq	p10, p0 = 1, n
-	and	r9 = 1, n		C set cr0 for use in inner loop
-	shr.u	r6 = r6, 1		C inner loop count
-	;;
-.mmi;	cmp.eq	p8, p0 = 0, r9
-	sub	which = nents, which
-	shl	n = n, 3
-	;;
-
-L(outer):
-.mmi	cmp.eq	p6, p7 = which, nents	C are we at the selected table entry?
-	nop	0
-	mov	ar.lc = r6		C			I0
-	;;
-.mmb;
-  (p6)	mov	mask = -1
-  (p7)	mov	mask = 0
-  (p8)	br.dptk	L(top)			C branch to loop entry if n even
-	;;
-
-.mmi;	ld8	r16 = [tp1], 8
-	add	tp2 = 8, tp2
-	nop	0
-	;;
-.mmi;	ld8	r18 = [rp1]
-	and	r16 = r16, mask
-	nop	0
-	;;
-.mmi;	andcm	r18 = r18, mask
-	;;
-	or	r16 = r16, r18
-	nop	0
-	;;
-.mmb;	st8	[rp1] = r16, 8
-	add	rp2 = 8, rp2
-  (p10)	br.dpnt	L(end)
-
-	ALIGN(32)
-L(top):
-.mmi;	ld8	r16 = [tp1], 16
-	ld8	r17 = [tp2], 16
-	nop	0
-	;;
-.mmi;	ld8	r18 = [rp1]
-	and	r16 = r16, mask
-	nop	0
-.mmi;	ld8	r19 = [rp2]
-	and	r17 = r17, mask
-	nop	0
-	;;
-.mmi;	andcm	r18 = r18, mask
-	andcm	r19 = r19, mask
-	nop	0
-	;;
-.mmi;	or	r16 = r16, r18
-	or	r17 = r17, r19
-	nop	0
-	;;
-.mmb;	st8	[rp1] = r16, 16
-	st8	[rp2] = r17, 16
-	br.cloop.dptk	L(top)
-	;;
-L(end):
-.mmi;	sub	rp1 = rp1, n		C move rp back to beginning
-	sub	rp2 = rp2, n		C move rp back to beginning
-	cmp.ne	p9, p0 = 1, nents
-.mmb;	add	nents = -1, nents
-	nop	0
-  (p9)	br.dptk	L(outer)
-	;;
-
-.mib;	nop	0
-	nop	0
-	br.ret.sptk.many b0
-EPILOGUE()
diff --git a/gmp/mpn/ia64/sqr_diag_addlsh1.asm b/gmp/mpn/ia64/sqr_diag_addlsh1.asm
deleted file mode 100644
index f9288298b3..0000000000
--- a/gmp/mpn/ia64/sqr_diag_addlsh1.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl  IA-64 mpn_sqr_diag_addlsh1
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C           cycles/limb
-C Itanium:      ?
-C Itanium 2:    2	Unrolling could bring it to 1.5 + epsilon
-
-C Exact performance table.  The 2nd line is this code, the 3rd line is ctop-
-C less code.  In an assembly sqr_basecase, the ctop-full numbers will become a
-C few cycles better since we can mitigate the many I0 instructions.
-C
-C 1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20
-C -  20  22  24  26  28  30  32  34  36  38  40  42  44  46  48  50  52  54  56 Needs updating
-C -  13  16  17  18  20  21  23  25  26  30  31  31  33  34  36  38  39  42  43
-
-C We should keep in mind that this code takes linear time in a O(n^2) context
-C and that it will only be used under SQR_TOOM2_THRESHOLD, which might become
-C around 60.  Keeping overhead down for smallish operands (< 10) is more
-C important than optimal cycle counts.
-
-C TODO
-C  * Make sure we don't depend on uninitialised r-registers, f-registers, or
-C  * p-registers.
-C  * Optimise by doing first two loop iterations in function header.
-
-C INPUT PARAMETERS
-define(`rp_param', `r32')  define(`rp', `r14')		C size: 2n
-define(`tp_param', `r33')  define(`tp', `r15')		C size: 2n - 2
-define(`up_param', `r34')  define(`up', `r31')		C size: n
-define(`n',  `r35')
-
-
-ASM_START()
-PROLOGUE(mpn_sqr_diag_addlsh1)
-
-	.prologue
-	.save	ar.pfs, r2
-	.save	ar.lc, r3
-	.body
-
-.mmi;		alloc	r2 = ar.pfs, 4,24,0,24	C			M
-		nop	4711
-		mov	r3 = ar.lc		C			I0
-.mmi;		mov	tp = tp_param		C			M I
-		mov	up = up_param		C			M I
-		mov	rp = rp_param		C			M I
-	;;
-.mmi;		ld8	r36 = [tp], 8		C			M
-		add	r20 = -2, n		C			M I
-		mov	r9 = ar.ec		C			I0
-	;;
-.mmi;		ld8	r32 = [tp], 8		C			M
-		mov	r16 = 0			C			M I
-		mov	ar.ec = 7		C			I0
-	;;
-.mmi;		nop	4711
-		mov	r44 = 0			C			M I
-		mov	ar.lc = r20		C			I0
-	;;
-.mii;		mov	r33 = 0
-		mov	r10 = pr		C			I0
-		mov	pr.rot = 0x30000	C			I0
-	;;
-		br.cexit.spnt.few.clr	L(end)
-
-dnl *** MAIN LOOP START ***
-	ALIGN(32)
-L(top):
-.mfi;	(p18)	ldf8	f33 = [up], 8		C			M
-	(p20)	xma.l	f36 = f35, f35, f42	C			F
-	(p41)	cmpequc	p50, p0 = -1, r44	C			M I
-.mfi;		setfsig	f40 = r16		C			M23
-	(p20)	xma.hu	f38 = f35, f35, f42	C			F
-	(p23)	add	r50 = r41, r49		C			M I
-	;;
-.mmi;	(p16)	ld8	r36 = [tp], 8		C			M
-	(p23)	cmpltu	p40, p0 = r50, r41	C cyout hi		M I
-	(p19)	shrp	r45 = r38, r35, 63	C non-critical		I0
-.mmi;	(p21)	getfsig	r39 = f39		C hi			M2
-	(p24)	st8	[rp] = r51, 8		C hi			M23
-	(p41)	add	r44 = 1, r44		C			M I
-	;;
-.mmi;	(p16)	ld8	r32 = [tp], 8		C			M
-	(p50)	cmpeqor	p40, p0 = -1, r50	C cyout hi		M I
-	(p17)	shrp	r16 = r33, r37, 63	C critical		I0
-.mmi;	(p21)	getfsig	r42 = f37		C lo			M2
-	(p23)	st8	[rp] = r44, 8		C lo			M23
-	(p50)	add	r50 = 1, r50		C			M I
-	;;
-		br.ctop.sptk.few.clr L(top)	C			B
-dnl *** MAIN LOOP END ***
-	;;
-L(end):
-.mmi;		nop	4711
-	(p41)	add	r44 = 1, r44		C			M I
-		shr.u	r48 = r39, 63		C			I0
-	;;
-.mmi;		st8	[rp] = r51, 8		C			M23
-	(p41)	cmpequc	p6, p0 = 0, r44		C			M I
-		add	r50 = r41, r48		C			M I
-	;;
-.mmi;		st8	[rp] = r44, 8		C			M23
-	(p6)	add	r50 = 1, r50		C			M I
-		mov	ar.lc = r3		C			I0
-	;;
-.mii;		st8	[rp] = r50		C			M23
-		mov	ar.ec = r9		C			I0
-		mov	pr = r10		C			I0
-	;;
-.mib;		nop	4711
-		mov	ar.pfs = r2		C			I0
-		br.ret.sptk.many b0		C			B
-EPILOGUE()
diff --git a/gmp/mpn/ia64/sqr_diagonal.asm b/gmp/mpn/ia64/sqr_diagonal.asm
new file mode 100644
index 0000000000..50307d4bb5
--- /dev/null
+++ b/gmp/mpn/ia64/sqr_diagonal.asm
@@ -0,0 +1,79 @@
+dnl  IA-64 mpn_sqr_diagonal.  Helper for sqr_basecase.
+
+dnl  Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C         cycles/limb
+C Itanium:    4
+C Itanium 2:  2
+
+C TODO
+C  * Perhaps avoid ctop loop.  Unfortunately, a cloop loop running at 1 c/l
+C    would need prohibitive 8-way unrolling.
+C  * Instead of messing too much with this, write a nifty mpn_sqr_basecase.
+
+C INPUT PARAMETERS
+C rp = r32
+C sp = r33
+C n = r34
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+	.prologue
+	.save	ar.lc, r2
+	.save	pr, r15
+	.body
+ifdef(`HAVE_ABI_32',
+`	addp4	r32 = 0, r32
+	addp4	r33 = 0, r33
+	zxt4	r34 = r34
+	;;
+')
+	ldf8		f32 = [r33], 8		C M	load rp[0] early
+	mov		r2 = ar.lc		C I0
+	mov		r14 = ar.ec		C I0
+	mov		r15 = pr		C I0
+	add		r19 = -1, r34		C M I	decr n
+	add		r18 = 8, r32		C M I	rp for high limb
+	;;
+	mov		ar.lc = r19		C I0
+	mov		ar.ec = 5		C I0
+	mov		pr.rot = 1<<16		C I0
+	;;
+	br.cexit.spnt	.Ldone			C B
+	;;
+	ALIGN(32)
+.Loop:
+  (p16)	ldf8		f32 = [r33], 8		C M
+  (p19)	xma.l		f36 = f35, f35, f0	C F
+  (p21)	stf8		[r32] = f38, 16		C M2 M3
+  (p19)	xma.hu		f40 = f35, f35, f0	C F
+  (p21)	stf8		[r18] = f42, 16		C M2 M3
+	br.ctop.dptk	.Loop			C B
+	;;
+.Ldone:
+	stf8		[r32] = f38		C M2 M3
+	stf8		[r18] = f42		C M2 M3
+	mov		ar.ec = r14		C I0
+	;;
+	mov		pr = r15, 0x1ffff	C I0
+	mov		ar.lc = r2		C I0
+	br.ret.sptk.many b0			C B
+EPILOGUE(mpn_sqr_diagonal)
+ASM_END()
diff --git a/gmp/mpn/ia64/submul_1.asm b/gmp/mpn/ia64/submul_1.asm
index cb2a5525b5..ae46e55d75 100644
--- a/gmp/mpn/ia64/submul_1.asm
+++ b/gmp/mpn/ia64/submul_1.asm
@@ -1,35 +1,22 @@
 dnl  IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
 dnl  result from a second limb vector.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2000-2004 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/lisp/gmpasm-mode.el b/gmp/mpn/lisp/gmpasm-mode.el
index 06b74bd6ce..31a9b48cbe 100644
--- a/gmp/mpn/lisp/gmpasm-mode.el
+++ b/gmp/mpn/lisp/gmpasm-mode.el
@@ -1,33 +1,22 @@
 ;;; gmpasm-mode.el -- GNU MP asm and m4 editing mode.
 
 
-;; Copyright 1999-2002 Free Software Foundation, Inc.
-
-;;   This file is part of the GNU MP Library.
-;;   
-;;   The GNU MP Library is free software; you can redistribute it and/or modify
-;;   it under the terms of either:
-;;   
-;;     * the GNU Lesser General Public License as published by the Free
-;;       Software Foundation; either version 3 of the License, or (at your
-;;       option) any later version.
-;;   
-;;   or
-;;   
-;;     * the GNU General Public License as published by the Free Software
-;;       Foundation; either version 2 of the License, or (at your option) any
-;;       later version.
-;;   
-;;   or both in parallel, as here.
-;;   
-;;   The GNU MP Library is distributed in the hope that it will be useful, but
-;;   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;;   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;;   for more details.
-;;   
-;;   You should have received copies of the GNU General Public License and the
-;;   GNU Lesser General Public License along with the GNU MP Library.  If not,
-;;   see https://www.gnu.org/licenses/.
+;; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+;;
+;; This file is part of the GNU MP Library.
+;;
+;; The GNU MP Library is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU Lesser General Public License as published by
+;; the Free Software Foundation; either version 3 of the License, or (at your
+;; option) any later version.
+;;
+;; The GNU MP Library is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU Lesser General Public License
+;; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ;;; Commentary:
diff --git a/gmp/mpn/m4-ccas b/gmp/mpn/m4-ccas
index 16d80c6f51..984e8e9b0e 100755
--- a/gmp/mpn/m4-ccas
+++ b/gmp/mpn/m4-ccas
@@ -4,31 +4,20 @@
 
 # Copyright 2001 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage: m4-ccas --m4=M4 CC ... file.asm ...
diff --git a/gmp/mpn/m68k/README b/gmp/mpn/m68k/README
index 5261564df2..8838f8d41f 100644
--- a/gmp/mpn/m68k/README
+++ b/gmp/mpn/m68k/README
@@ -3,28 +3,17 @@ Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/m68k/aors_n.asm b/gmp/mpn/m68k/aors_n.asm
index f7d379ec01..da9bb415b2 100644
--- a/gmp/mpn/m68k/aors_n.asm
+++ b/gmp/mpn/m68k/aors_n.asm
@@ -1,32 +1,22 @@
 dnl  mc68020 mpn_add_n, mpn_sub_n -- add or subtract limb vectors
 
-dnl  Copyright 1992, 1994, 1996, 1999-2003, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002, 2003, 2005 Free
+dnl  Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/m68k/gmp-mparam.h b/gmp/mpn/m68k/gmp-mparam.h
index 9ac7b41019..c623046535 100644
--- a/gmp/mpn/m68k/gmp-mparam.h
+++ b/gmp/mpn/m68k/gmp-mparam.h
@@ -1,47 +1,36 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2000-2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* 25MHz 68040 */
 
 /* Generated by tuneup.c, 2004-02-05, gcc 3.2 */
 
-#define MUL_TOOM22_THRESHOLD             14
-#define MUL_TOOM33_THRESHOLD             90
+#define MUL_KARATSUBA_THRESHOLD          14
+#define MUL_TOOM3_THRESHOLD              90
 
 #define SQR_BASECASE_THRESHOLD            5
-#define SQR_TOOM2_THRESHOLD              28
+#define SQR_KARATSUBA_THRESHOLD          28
 #define SQR_TOOM3_THRESHOLD              98
 
 #define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
diff --git a/gmp/mpn/m68k/lshift.asm b/gmp/mpn/m68k/lshift.asm
index f202abfe43..9d7a5ed0f6 100644
--- a/gmp/mpn/m68k/lshift.asm
+++ b/gmp/mpn/m68k/lshift.asm
@@ -1,32 +1,22 @@
 dnl  mc68020 mpn_lshift -- mpn left shift.
 
-dnl  Copyright 1996, 1999-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1996, 1999, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -114,14 +104,14 @@ ifelse(scale_available_p,1,`
 	bcs	L(L1)
 	subql	#1, s_size
 
-L(Loop):
+L(Loop:)
 	movel	M(-,s_ptr), d2
 	movel	d2, d3
 	lsrl	d5, d3
 	orl	d3, d1
 	movel	d1, M(-,res_ptr)
 	lsll	cnt, d2
-L(L1):
+L(L1:)
 	movel	M(-,s_ptr), d1
 	movel	d1, d3
 	lsrl	d5, d3
@@ -133,7 +123,7 @@ L(L1):
 	subl	#0x10000, s_size
 	bcc	L(Loop)
 
-L(Lend):
+L(Lend:)
 	movel	d1, M(-,res_ptr)	C store least significant limb
 
 C Restore used registers from stack frame.
diff --git a/gmp/mpn/m68k/m68k-defs.m4 b/gmp/mpn/m68k/m68k-defs.m4
index 15289f676f..17a345998a 100644
--- a/gmp/mpn/m68k/m68k-defs.m4
+++ b/gmp/mpn/m68k/m68k-defs.m4
@@ -2,33 +2,22 @@ divert(-1)
 
 dnl  m4 macros for 68k assembler.
 
-dnl  Copyright 2001-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  The default m4 `#' commenting interferes with the assembler syntax for
diff --git a/gmp/mpn/m68k/mc68020/aorsmul_1.asm b/gmp/mpn/m68k/mc68020/aorsmul_1.asm
index 4ee30ad9b3..17866602f8 100644
--- a/gmp/mpn/m68k/mc68020/aorsmul_1.asm
+++ b/gmp/mpn/m68k/mc68020/aorsmul_1.asm
@@ -1,32 +1,22 @@
 dnl  mc68020 mpn_addmul_1, mpn_submul_1 -- add or subtract mpn multiple.
 
-dnl  Copyright 1992, 1994, 1996, 1999-2002, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -55,7 +45,6 @@ define(s1_ptr,  `a1')
 define(s1_size, `d2')
 define(s2_limb, `d4')
 
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
 
 PROLOGUE(M4_function_1)
 
diff --git a/gmp/mpn/m68k/mc68020/mul_1.asm b/gmp/mpn/m68k/mc68020/mul_1.asm
index f5fbb3063b..d24f6d1d9a 100644
--- a/gmp/mpn/m68k/mc68020/mul_1.asm
+++ b/gmp/mpn/m68k/mc68020/mul_1.asm
@@ -1,32 +1,22 @@
 dnl  mc68020 mpn_mul_1 -- mpn by limb multiply
 
-dnl  Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/m68k/mc68020/udiv.asm b/gmp/mpn/m68k/mc68020/udiv.asm
index aadeab999a..ebc1ef26db 100644
--- a/gmp/mpn/m68k/mc68020/udiv.asm
+++ b/gmp/mpn/m68k/mc68020/udiv.asm
@@ -1,32 +1,21 @@
 dnl  mc68020 mpn_udiv_qrnnd -- 2x1 limb division
 
-dnl  Copyright 1999-2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/m68k/mc68020/umul.asm b/gmp/mpn/m68k/mc68020/umul.asm
index f19314e9bb..4d6e8a8eb8 100644
--- a/gmp/mpn/m68k/mc68020/umul.asm
+++ b/gmp/mpn/m68k/mc68020/umul.asm
@@ -1,32 +1,21 @@
 dnl  mc68020 mpn_umul_ppmm -- limb by limb multiplication
 
-dnl  Copyright 1999-2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/m68k/rshift.asm b/gmp/mpn/m68k/rshift.asm
index 21b5f89f48..1bf58ac310 100644
--- a/gmp/mpn/m68k/rshift.asm
+++ b/gmp/mpn/m68k/rshift.asm
@@ -1,32 +1,22 @@
 dnl  mc68020 mpn_rshift -- mpn right shift.
 
-dnl  Copyright 1996, 1999-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1996, 1999, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -89,7 +79,7 @@ ifelse(scale_available_p,1,`
 	cmpl	s_ptr, a2
 	bls	L(Lspecial)		C jump if s_ptr >= res_ptr + s_size
 
-L(Lnormal):
+L(Lnormal:)
 	moveql	#32, d5
 	subl	cnt, d5
 	movel	M(s_ptr,+), d2
@@ -104,14 +94,14 @@ L(Lnormal):
 	bcs	L(L1)
 	subql	#1, s_size
 
-L(Loop):
+L(Loop:)
 	movel	M(s_ptr,+), d2
 	movel	d2, d3
 	lsll	d5, d3
 	orl	d3, d1
 	movel	d1, M(res_ptr,+)
 	lsrl	cnt, d2
-L(L1):
+L(L1:)
 	movel	M(s_ptr,+), d1
 	movel	d1, d3
 	lsll	d5, d3
@@ -123,7 +113,7 @@ L(L1):
 	subl	#0x10000, s_size
 	bcc	L(Loop)
 
-L(Lend):
+L(Lend:)
 	movel	d1, M(res_ptr)	C store most significant limb
 
 C Restore used registers from stack frame.
@@ -134,7 +124,7 @@ C We loop from most significant end of the arrays, which is only permissable
 C if the source and destination don't overlap, since the function is
 C documented to work for overlapping source and destination.
 
-L(Lspecial):
+L(Lspecial:)
 ifelse(scale_available_p,1,`
 	lea	M(s_ptr,s_size,l,4), s_ptr
 	lea	M(res_ptr,s_size,l,4), res_ptr
@@ -151,11 +141,11 @@ ifelse(scale_available_p,1,`
 	bcc	L(LL1)
 	subql	#1, s_size
 
-L(LLoop):
+L(LLoop:)
 	movel	M(-,s_ptr), d2
 	roxrl	#1, d2
 	movel	d2, M(-,res_ptr)
-L(LL1):
+L(LL1:)
 	movel	M(-,s_ptr), d2
 	roxrl	#1, d2
 	movel	d2, M(-,res_ptr)
@@ -167,7 +157,7 @@ L(LL1):
 	addl	d0, d0		C restore cy
 	bra	L(LLoop)
 
-L(LLend):
+L(LLend:)
 C Restore used registers from stack frame.
 	moveml	M(sp,+), d2-d6/a2
 	rts
diff --git a/gmp/mpn/m68k/t-m68k-defs.pl b/gmp/mpn/m68k/t-m68k-defs.pl
index 91c21fa1f8..226afc5449 100644
--- a/gmp/mpn/m68k/t-m68k-defs.pl
+++ b/gmp/mpn/m68k/t-m68k-defs.pl
@@ -2,31 +2,20 @@
 
 # Copyright 2001, 2003 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage:  perl t-m68k-defs.pl [-t]
diff --git a/gmp/mpn/m88k/README b/gmp/mpn/m88k/README
index 1b51e83079..046e3bf19a 100644
--- a/gmp/mpn/m88k/README
+++ b/gmp/mpn/m88k/README
@@ -3,28 +3,17 @@ Copyright 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/m88k/add_n.s b/gmp/mpn/m88k/add_n.s
index dbdb22f888..db2fffff3e 100644
--- a/gmp/mpn/m88k/add_n.s
+++ b/gmp/mpn/m88k/add_n.s
@@ -3,31 +3,20 @@
 
 ; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/m88k/mc88110/add_n.S b/gmp/mpn/m88k/mc88110/add_n.S
index c3b12b3cd0..3b627c01a7 100644
--- a/gmp/mpn/m88k/mc88110/add_n.S
+++ b/gmp/mpn/m88k/mc88110/add_n.S
@@ -3,31 +3,20 @@
 
 ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/m88k/mc88110/addmul_1.s b/gmp/mpn/m88k/mc88110/addmul_1.s
index 321221f23c..f41283395d 100644
--- a/gmp/mpn/m88k/mc88110/addmul_1.s
+++ b/gmp/mpn/m88k/mc88110/addmul_1.s
@@ -3,31 +3,20 @@
 
 ; Copyright 1996, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/m88k/mc88110/mul_1.s b/gmp/mpn/m88k/mc88110/mul_1.s
index 28fd14b77b..e8e88790a7 100644
--- a/gmp/mpn/m88k/mc88110/mul_1.s
+++ b/gmp/mpn/m88k/mc88110/mul_1.s
@@ -3,31 +3,20 @@
 
 ; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/m88k/mc88110/sub_n.S b/gmp/mpn/m88k/mc88110/sub_n.S
index f0a8ecb3f0..a21a2cc0c0 100644
--- a/gmp/mpn/m88k/mc88110/sub_n.S
+++ b/gmp/mpn/m88k/mc88110/sub_n.S
@@ -3,31 +3,20 @@
 
 ; Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/m88k/mul_1.s b/gmp/mpn/m88k/mul_1.s
index c8abdc0b7f..5c385bd351 100644
--- a/gmp/mpn/m88k/mul_1.s
+++ b/gmp/mpn/m88k/mul_1.s
@@ -3,31 +3,20 @@
 
 ; Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/m88k/sub_n.s b/gmp/mpn/m88k/sub_n.s
index 2bd8f09ca3..9ea78ff3a1 100644
--- a/gmp/mpn/m88k/sub_n.s
+++ b/gmp/mpn/m88k/sub_n.s
@@ -3,31 +3,20 @@
 
 ; Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
 
-;  This file is part of the GNU MP Library.
-;
-;  The GNU MP Library is free software; you can redistribute it and/or modify
-;  it under the terms of either:
-;
-;    * the GNU Lesser General Public License as published by the Free
-;      Software Foundation; either version 3 of the License, or (at your
-;      option) any later version.
-;
-;  or
-;
-;    * the GNU General Public License as published by the Free Software
-;      Foundation; either version 2 of the License, or (at your option) any
-;      later version.
-;
-;  or both in parallel, as here.
-;
-;  The GNU MP Library is distributed in the hope that it will be useful, but
-;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-;  for more details.
-;
-;  You should have received copies of the GNU General Public License and the
-;  GNU Lesser General Public License along with the GNU MP Library.  If not,
-;  see https://www.gnu.org/licenses/.
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+; License for more details.
+
+; You should have received a copy of the GNU Lesser General Public License
+; along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 ; INPUT PARAMETERS
diff --git a/gmp/mpn/minithres/gmp-mparam.h b/gmp/mpn/minithres/gmp-mparam.h
index 1b8f311516..cf5970b7d1 100644
--- a/gmp/mpn/minithres/gmp-mparam.h
+++ b/gmp/mpn/minithres/gmp-mparam.h
@@ -1,109 +1,64 @@
 /* Minimal values gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000, 2006, 2008-2010, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2006, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* The values in this file are not currently minimal.
    Trimming them further would be good.  */
 
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         3
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         4
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      1
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD            3
-
-#define MUL_TOOM22_THRESHOLD                 8
-#define MUL_TOOM33_THRESHOLD                20
-#define MUL_TOOM44_THRESHOLD                24
-#define MUL_TOOM6H_THRESHOLD                70 /* FIXME */
-#define MUL_TOOM8H_THRESHOLD                86
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      50 /* FIXME */
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      50 /* FIXME */
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      50 /* FIXME */
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      50 /* FIXME */
-
-#define SQR_BASECASE_THRESHOLD               0
-#define SQR_TOOM2_THRESHOLD                  8
-#define SQR_TOOM3_THRESHOLD                 20
-#define SQR_TOOM4_THRESHOLD                 24
-#define SQR_TOOM6H_THRESHOLD                70 /* FIXME */
-#define SQR_TOOM8H_THRESHOLD                86
-
-#define MULMOD_BNM1_THRESHOLD            10
-#define SQRMOD_BNM1_THRESHOLD            10
-
-#define MUL_FFT_TABLE  {64, 256, 1024, 4096, 8192, 65536, 0}
+#define MUL_KARATSUBA_THRESHOLD           8
+#define MUL_TOOM3_THRESHOLD              20
+#define MUL_TOOM44_THRESHOLD             24
+
+#define SQR_BASECASE_THRESHOLD            0
+#define SQR_KARATSUBA_THRESHOLD           8
+#define SQR_TOOM3_THRESHOLD              20
+#define SQR_TOOM4_THRESHOLD              24
+
+#define MULLOW_BASECASE_THRESHOLD         0
+#define MULLOW_DC_THRESHOLD               2
+#define MULLOW_MUL_N_THRESHOLD            4
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                  6
+#define POWM_THRESHOLD                    4
+
+#define HGCD_THRESHOLD                   10
+#define GCD_DC_THRESHOLD                 20
+#define GCDEXT_SCHOENHAGE_THRESHOLD      20
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD              4
+#define GET_STR_PRECOMPUTE_THRESHOLD     10
+#define SET_STR_THRESHOLD                64
+#define SET_STR_PRECOMPUTE_THRESHOLD    100
+
+#define MUL_FFT_TABLE  {64-1, 256-1, 1024-1, 4096-1, 8192-1, 65536-1, 0}
 #define MUL_FFT_MODF_THRESHOLD  65
 #define MUL_FFT_THRESHOLD      200
 
-#define SQR_FFT_TABLE  {64, 256, 1024, 4096, 8192, 65536, 0}
+#define SQR_FFT_TABLE  {64-1, 256-1, 1024-1, 4096-1, 8192-1, 65536-1, 0}
 #define SQR_FFT_MODF_THRESHOLD  65
 #define SQR_FFT_THRESHOLD      200
-
-#define MULLO_BASECASE_THRESHOLD             0
-#define MULLO_DC_THRESHOLD                   2
-#define MULLO_MUL_N_THRESHOLD                4
-
-#define DC_DIV_QR_THRESHOLD                  6
-#define DC_DIVAPPR_Q_THRESHOLD               6
-#define DC_BDIV_QR_THRESHOLD                 4
-#define DC_BDIV_Q_THRESHOLD                  4
-
-#define INV_MULMOD_BNM1_THRESHOLD            2
-#define INV_NEWTON_THRESHOLD                 6
-#define INV_APPR_THRESHOLD                   4
-
-#define BINV_NEWTON_THRESHOLD                6
-#define REDC_1_TO_REDC_N_THRESHOLD           9
-
-#define MU_DIV_QR_THRESHOLD                  8
-#define MU_DIVAPPR_Q_THRESHOLD               8
-#define MUPI_DIV_QR_THRESHOLD                8
-#define MU_BDIV_QR_THRESHOLD                 8
-#define MU_BDIV_Q_THRESHOLD                  8
-
-#define MATRIX22_STRASSEN_THRESHOLD          2
-#define HGCD_THRESHOLD                      10
-#define GCD_DC_THRESHOLD                    20
-#define GCDEXT_SCHOENHAGE_THRESHOLD         20
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                 4
-#define GET_STR_PRECOMPUTE_THRESHOLD        10
-#define SET_STR_THRESHOLD                   64
-#define SET_STR_PRECOMPUTE_THRESHOLD       100
-
-#define FAC_ODD_THRESHOLD                    0  /* always */
-#define FAC_DSC_THRESHOLD                   70
diff --git a/gmp/mpn/mips32/add_n.asm b/gmp/mpn/mips32/add_n.asm
index e7d4c48f48..f7dc7efab9 100644
--- a/gmp/mpn/mips32/add_n.asm
+++ b/gmp/mpn/mips32/add_n.asm
@@ -4,30 +4,19 @@ dnl  sum in a third limb vector.
 dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/addmul_1.asm b/gmp/mpn/mips32/addmul_1.asm
index 9aa9e163ce..f43e3c638b 100644
--- a/gmp/mpn/mips32/addmul_1.asm
+++ b/gmp/mpn/mips32/addmul_1.asm
@@ -4,30 +4,19 @@ dnl  the product to a second limb vector.
 dnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/gmp-mparam.h b/gmp/mpn/mips32/gmp-mparam.h
index 986135df96..d86fd3f019 100644
--- a/gmp/mpn/mips32/gmp-mparam.h
+++ b/gmp/mpn/mips32/gmp-mparam.h
@@ -1,45 +1,35 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* Generated by tuneup.c, 2002-02-20, gcc 2.95 (R3000) */
 
-#define MUL_TOOM22_THRESHOLD             20
-#define MUL_TOOM33_THRESHOLD             50
+#define MUL_KARATSUBA_THRESHOLD          20
+#define MUL_TOOM3_THRESHOLD              50
 
 #define SQR_BASECASE_THRESHOLD            7
-#define SQR_TOOM2_THRESHOLD              57
+#define SQR_KARATSUBA_THRESHOLD          57
 #define SQR_TOOM3_THRESHOLD              78
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* always */
diff --git a/gmp/mpn/mips32/lshift.asm b/gmp/mpn/mips32/lshift.asm
index 6a58bb4579..8a27951775 100644
--- a/gmp/mpn/mips32/lshift.asm
+++ b/gmp/mpn/mips32/lshift.asm
@@ -3,30 +3,19 @@ dnl  MIPS32 mpn_lshift -- Left shift.
 dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/mips-defs.m4 b/gmp/mpn/mips32/mips-defs.m4
index 5fa89eca35..a30e8df090 100644
--- a/gmp/mpn/mips32/mips-defs.m4
+++ b/gmp/mpn/mips32/mips-defs.m4
@@ -3,33 +3,22 @@ divert(-1)
 dnl  m4 macros for MIPS assembly code (both 32-bit and 64-bit).
 
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Usage: ASM_START()
diff --git a/gmp/mpn/mips32/mips.m4 b/gmp/mpn/mips32/mips.m4
index 8b49e575e4..37c6ca8f72 100644
--- a/gmp/mpn/mips32/mips.m4
+++ b/gmp/mpn/mips32/mips.m4
@@ -3,33 +3,22 @@ divert(-1)
 dnl  m4 macros for MIPS assembly code.
 
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Usage: ASM_START()
diff --git a/gmp/mpn/mips32/mul_1.asm b/gmp/mpn/mips32/mul_1.asm
index 4337bc2bd4..1e1a275f66 100644
--- a/gmp/mpn/mips32/mul_1.asm
+++ b/gmp/mpn/mips32/mul_1.asm
@@ -4,30 +4,19 @@ dnl  the product in a second limb vector.
 dnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/rshift.asm b/gmp/mpn/mips32/rshift.asm
index 4b54510408..23d1e780e6 100644
--- a/gmp/mpn/mips32/rshift.asm
+++ b/gmp/mpn/mips32/rshift.asm
@@ -3,30 +3,19 @@ dnl  MIPS32 mpn_rshift -- Right shift.
 dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/sub_n.asm b/gmp/mpn/mips32/sub_n.asm
index a962ce1b79..ed41271676 100644
--- a/gmp/mpn/mips32/sub_n.asm
+++ b/gmp/mpn/mips32/sub_n.asm
@@ -4,30 +4,19 @@ dnl  store difference in a third limb vector.
 dnl  Copyright 1995, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/submul_1.asm b/gmp/mpn/mips32/submul_1.asm
index 335722b4e5..4e43654e0a 100644
--- a/gmp/mpn/mips32/submul_1.asm
+++ b/gmp/mpn/mips32/submul_1.asm
@@ -4,30 +4,19 @@ dnl  subtract the product from a second limb vector.
 dnl  Copyright 1992, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips32/umul.asm b/gmp/mpn/mips32/umul.asm
index 1ced0eb883..04ecbe5095 100644
--- a/gmp/mpn/mips32/umul.asm
+++ b/gmp/mpn/mips32/umul.asm
@@ -3,30 +3,19 @@ dnl  MIPS32 umul_ppmm -- longlong.h support.
 dnl  Copyright 1999, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips64/README b/gmp/mpn/mips64/README
index 7ddd0e572c..65a1af1668 100644
--- a/gmp/mpn/mips64/README
+++ b/gmp/mpn/mips64/README
@@ -3,28 +3,17 @@ Copyright 1996 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -41,7 +30,7 @@ RELEVANT OPTIMIZATION ISSUES
 
    On the R4600, branches takes a single cycle
 
-   On the R8000, branches often take no noticeable cycles, as they are
+   On the R8000, branches often take no noticable cycles, as they are
    executed in a separate function unit..
 
 2. The R4000 and R4400 have a load latency of 4 cycles.
diff --git a/gmp/mpn/mips64/add_n.asm b/gmp/mpn/mips64/add_n.asm
index 6856407efd..1a3978c3f9 100644
--- a/gmp/mpn/mips64/add_n.asm
+++ b/gmp/mpn/mips64/add_n.asm
@@ -1,33 +1,22 @@
 dnl  MIPS64 mpn_add_n -- Add two limb vectors of the same length > 0 and store
 dnl  sum in a third limb vector.
 
-dnl  Copyright 1995, 2000-2002, 2011 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -38,17 +27,6 @@ C s2_ptr	$6
 C size		$7
 
 ASM_START()
-PROLOGUE(mpn_add_nc)
-	ld	$10,0($5)
-	ld	$11,0($6)
-
-	daddiu	$7,$7,-1
-	and	$9,$7,4-1	C number of limbs in first loop
-	beq	$9,$0,.L0	C if multiple of 4 limbs, skip first loop
-	 move	$2,$8
-	b	.Loop0
-	 dsubu	$7,$7,$9
-EPILOGUE()
 PROLOGUE(mpn_add_n)
 	ld	$10,0($5)
 	ld	$11,0($6)
@@ -131,4 +109,4 @@ PROLOGUE(mpn_add_n)
 	sd	$11,0($4)
 	j	$31
 	or	$2,$2,$8
-EPILOGUE()
+EPILOGUE(mpn_add_n)
diff --git a/gmp/mpn/mips64/addmul_1.asm b/gmp/mpn/mips64/addmul_1.asm
index 8ff0976e25..a116298a76 100644
--- a/gmp/mpn/mips64/addmul_1.asm
+++ b/gmp/mpn/mips64/addmul_1.asm
@@ -1,33 +1,23 @@
 dnl  MIPS64 mpn_addmul_1 -- Multiply a limb vector with a single limb and add
 dnl  the product to a second limb vector.
 
-dnl  Copyright 1992, 1994, 1995, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips64/gmp-mparam.h b/gmp/mpn/mips64/gmp-mparam.h
index b7fcf24a41..d189e895c5 100644
--- a/gmp/mpn/mips64/gmp-mparam.h
+++ b/gmp/mpn/mips64/gmp-mparam.h
@@ -1,45 +1,35 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 
 /* Generated by tuneup.c, 2004-02-10, gcc 3.2 & MIPSpro C 7.2.1 (R1x000) */
 
-#define MUL_TOOM22_THRESHOLD             16
-#define MUL_TOOM33_THRESHOLD             89
+#define MUL_KARATSUBA_THRESHOLD          16
+#define MUL_TOOM3_THRESHOLD              89
 
 #define SQR_BASECASE_THRESHOLD            6
-#define SQR_TOOM2_THRESHOLD              32
+#define SQR_KARATSUBA_THRESHOLD          32
 #define SQR_TOOM3_THRESHOLD              98
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* always */
diff --git a/gmp/mpn/mips64/lshift.asm b/gmp/mpn/mips64/lshift.asm
index 3440eaf80b..16da93c5ab 100644
--- a/gmp/mpn/mips64/lshift.asm
+++ b/gmp/mpn/mips64/lshift.asm
@@ -1,32 +1,21 @@
 dnl  MIPS64 mpn_lshift -- Left shift.
 
-dnl  Copyright 1995, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips64/mul_1.asm b/gmp/mpn/mips64/mul_1.asm
index 77acf0ac25..d16e08d594 100644
--- a/gmp/mpn/mips64/mul_1.asm
+++ b/gmp/mpn/mips64/mul_1.asm
@@ -1,33 +1,23 @@
 dnl  MIPS64 mpn_mul_1 -- Multiply a limb vector with a single limb and store
 dnl  the product in a second limb vector.
 
-dnl  Copyright 1992, 1994, 1995, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips64/rshift.asm b/gmp/mpn/mips64/rshift.asm
index 9253cb51d8..5294875621 100644
--- a/gmp/mpn/mips64/rshift.asm
+++ b/gmp/mpn/mips64/rshift.asm
@@ -1,32 +1,21 @@
 dnl  MIPS64 mpn_rshift -- Right shift.
 
-dnl  Copyright 1995, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips64/sqr_diagonal.asm b/gmp/mpn/mips64/sqr_diagonal.asm
index dcb87dc21f..511a7552c9 100644
--- a/gmp/mpn/mips64/sqr_diagonal.asm
+++ b/gmp/mpn/mips64/sqr_diagonal.asm
@@ -3,30 +3,19 @@ dnl  MIPS64 mpn_sqr_diagonal.
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/mips64/sub_n.asm b/gmp/mpn/mips64/sub_n.asm
index 6a698976eb..b28c1ced9c 100644
--- a/gmp/mpn/mips64/sub_n.asm
+++ b/gmp/mpn/mips64/sub_n.asm
@@ -1,33 +1,22 @@
 dnl  MIPS64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
 dnl  store difference in a third limb vector.
 
-dnl  Copyright 1995, 2000-2002, 2011 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -38,17 +27,6 @@ C s2_ptr	$6
 C size		$7
 
 ASM_START()
-PROLOGUE(mpn_sub_nc)
-	ld	$10,0($5)
-	ld	$11,0($6)
-
-	daddiu	$7,$7,-1
-	and	$9,$7,4-1	C number of limbs in first loop
-	beq	$9,$0,.L0	C if multiple of 4 limbs, skip first loop
-	 move	$2,$8
-	b	.Loop0
-	 dsubu	$7,$7,$9
-EPILOGUE()
 PROLOGUE(mpn_sub_n)
 	ld	$10,0($5)
 	ld	$11,0($6)
@@ -131,4 +109,4 @@ PROLOGUE(mpn_sub_n)
 	sd	$11,0($4)
 	j	$31
 	or	$2,$2,$8
-EPILOGUE()
+EPILOGUE(mpn_sub_n)
diff --git a/gmp/mpn/mips64/submul_1.asm b/gmp/mpn/mips64/submul_1.asm
index 089589cd73..11e17370c0 100644
--- a/gmp/mpn/mips64/submul_1.asm
+++ b/gmp/mpn/mips64/submul_1.asm
@@ -1,33 +1,23 @@
 dnl  MIPS64 mpn_submul_1 -- Multiply a limb vector with a single limb and
 dnl  subtract the product from a second limb vector.
 
-dnl  Copyright 1992, 1994, 1995, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/mips64/umul.asm b/gmp/mpn/mips64/umul.asm
index b9aac57591..1792d97fdb 100644
--- a/gmp/mpn/mips64/umul.asm
+++ b/gmp/mpn/mips64/umul.asm
@@ -3,30 +3,19 @@ dnl  MIPS64 umul_ppmm -- longlong.h support.
 dnl  Copyright 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/ns32k/add_n.s b/gmp/mpn/ns32k/add_n.s
new file mode 100644
index 0000000000..962cc1657b
--- /dev/null
+++ b/gmp/mpn/ns32k/add_n.s
@@ -0,0 +1,44 @@
+# ns32000 __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+	.align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+	save	[r3,r4,r5]
+	negd	28(sp),r3
+	movd	r3,r0
+	lshd	2,r0
+	movd	24(sp),r4
+	subd	r0,r4			# r4 -> to end of S2
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r2
+	subd	r0,r2			# r2 -> to end of RES
+	subd	r0,r0			# cy = 0
+
+Loop:	movd	r5[r3:d],r0
+	addcd	r4[r3:d],r0
+	movd	r0,r2[r3:d]
+	acbd	1,r3,Loop
+
+	scsd	r0			# r0 = cy.
+	restore	[r5,r4,r3]
+	ret	0
diff --git a/gmp/mpn/ns32k/addmul_1.s b/gmp/mpn/ns32k/addmul_1.s
new file mode 100644
index 0000000000..1dd8791be3
--- /dev/null
+++ b/gmp/mpn/ns32k/addmul_1.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+	.align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+	save	[r3,r4,r5,r6,r7]
+	negd	24(sp),r4
+	movd	r4,r0
+	lshd	2,r0
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r6
+	subd	r0,r6			# r6 -> to end of RES
+	subd	r0,r0			# r0 = 0, cy = 0
+	movd	28(sp),r7		# r7 = s2_limb
+
+Loop:	movd	r5[r4:d],r2
+	meid	r7,r2			# r2 = low_prod, r3 = high_prod
+	addcd	r0,r2			# r2 = low_prod + cy_limb
+	movd	r3,r0			# r0 = new cy_limb
+	addcd	0,r0
+	addd	r2,r6[r4:d]
+	acbd	1,r4,Loop
+
+	addcd	0,r0
+	restore	[r7,r6,r5,r4,r3]
+	ret	0
diff --git a/gmp/mpn/ns32k/mul_1.s b/gmp/mpn/ns32k/mul_1.s
new file mode 100644
index 0000000000..abc911e0c9
--- /dev/null
+++ b/gmp/mpn/ns32k/mul_1.s
@@ -0,0 +1,45 @@
+# ns32000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+	.align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+	save	[r3,r4,r5,r6,r7]
+	negd	24(sp),r4
+	movd	r4,r0
+	lshd	2,r0
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r6
+	subd	r0,r6			# r6 -> to end of RES
+	subd	r0,r0			# r0 = 0, cy = 0
+	movd	28(sp),r7		# r7 = s2_limb
+
+Loop:	movd	r5[r4:d],r2
+	meid	r7,r2			# r2 = low_prod, r3 = high_prod
+	addcd	r0,r2			# r2 = low_prod + cy_limb
+	movd	r3,r0			# r0 = new cy_limb
+	movd	r2,r6[r4:d]
+	acbd	1,r4,Loop
+
+	addcd	0,r0
+	restore	[r7,r6,r5,r4,r3]
+	ret	0
diff --git a/gmp/mpn/ns32k/sub_n.s b/gmp/mpn/ns32k/sub_n.s
new file mode 100644
index 0000000000..5252ddf5c6
--- /dev/null
+++ b/gmp/mpn/ns32k/sub_n.s
@@ -0,0 +1,44 @@
+# ns32000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+	.align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+	save	[r3,r4,r5]
+	negd	28(sp),r3
+	movd	r3,r0
+	lshd	2,r0
+	movd	24(sp),r4
+	subd	r0,r4			# r4 -> to end of S2
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r2
+	subd	r0,r2			# r2 -> to end of RES
+	subd	r0,r0			# cy = 0
+
+Loop:	movd	r5[r3:d],r0
+	subcd	r4[r3:d],r0
+	movd	r0,r2[r3:d]
+	acbd	1,r3,Loop
+
+	scsd	r0			# r0 = cy.
+	restore	[r5,r4,r3]
+	ret	0
diff --git a/gmp/mpn/ns32k/submul_1.s b/gmp/mpn/ns32k/submul_1.s
new file mode 100644
index 0000000000..7a0ba9a73c
--- /dev/null
+++ b/gmp/mpn/ns32k/submul_1.s
@@ -0,0 +1,46 @@
+# ns32000 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright 1992, 1994, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+	.align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+	save	[r3,r4,r5,r6,r7]
+	negd	24(sp),r4
+	movd	r4,r0
+	lshd	2,r0
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r6
+	subd	r0,r6			# r6 -> to end of RES
+	subd	r0,r0			# r0 = 0, cy = 0
+	movd	28(sp),r7		# r7 = s2_limb
+
+Loop:	movd	r5[r4:d],r2
+	meid	r7,r2			# r2 = low_prod, r3 = high_prod
+	addcd	r0,r2			# r2 = low_prod + cy_limb
+	movd	r3,r0			# r0 = new cy_limb
+	addcd	0,r0
+	subd	r2,r6[r4:d]
+	acbd	1,r4,Loop
+
+	addcd	0,r0
+	restore	[r7,r6,r5,r4,r3]
+	ret	0
diff --git a/gmp/mpn/pa32/README b/gmp/mpn/pa32/README
index 4323390c9b..72158d30ea 100644
--- a/gmp/mpn/pa32/README
+++ b/gmp/mpn/pa32/README
@@ -3,28 +3,17 @@ Copyright 1996, 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/pa32/add_n.asm b/gmp/mpn/pa32/add_n.asm
index 46f39377ea..1bb27ae883 100644
--- a/gmp/mpn/pa32/add_n.asm
+++ b/gmp/mpn/pa32/add_n.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
 dnl  sum in a third limb vector.
 
-dnl  Copyright 1992, 1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/gmp-mparam.h b/gmp/mpn/pa32/gmp-mparam.h
index 377efcb156..005539c0d7 100644
--- a/gmp/mpn/pa32/gmp-mparam.h
+++ b/gmp/mpn/pa32/gmp-mparam.h
@@ -1,61 +1,53 @@
 /* HP-PA 1.0 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2012 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 /* These values are for the PA7100 using GCC.  */
 /* Generated by tuneup.c, 2000-10-27. */
 
-#ifndef MUL_TOOM22_THRESHOLD
-#define MUL_TOOM22_THRESHOLD      30
+#ifndef MUL_KARATSUBA_THRESHOLD
+#define MUL_KARATSUBA_THRESHOLD   30
 #endif
-#ifndef MUL_TOOM33_THRESHOLD
-#define MUL_TOOM33_THRESHOLD     141
+#ifndef MUL_TOOM3_THRESHOLD
+#define MUL_TOOM3_THRESHOLD      141
 #endif
 
-#ifndef SQR_TOOM2_THRESHOLD
-#define SQR_TOOM2_THRESHOLD       59
+#ifndef SQR_KARATSUBA_THRESHOLD
+#define SQR_KARATSUBA_THRESHOLD   59
 #endif
 #ifndef SQR_TOOM3_THRESHOLD
 #define SQR_TOOM3_THRESHOLD      177
 #endif
 
 #ifndef DIV_DC_THRESHOLD
-#define DIV_DC_THRESHOLD         108
+#define DIV_DC_THRESHOLD             108
 #endif
 
 #ifndef POWM_THRESHOLD
 #define POWM_THRESHOLD            18
 #endif
 
-#ifndef GCDEXT_THRESHOLD
+#ifndef GCD_ACCEL_THRESHOLD
+#define GCD_ACCEL_THRESHOLD       46
+#endif
 #define GCDEXT_THRESHOLD          33
 #endif
diff --git a/gmp/mpn/pa32/hppa1_1/addmul_1.asm b/gmp/mpn/pa32/hppa1_1/addmul_1.asm
index ec2f2198e8..c50e4e10f7 100644
--- a/gmp/mpn/pa32/hppa1_1/addmul_1.asm
+++ b/gmp/mpn/pa32/hppa1_1/addmul_1.asm
@@ -1,33 +1,23 @@
 dnl  HP-PA 1.1 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
 dnl  result to a second limb vector.
 
-dnl  Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/gmp-mparam.h b/gmp/mpn/pa32/hppa1_1/gmp-mparam.h
index 1261b24c83..5ced745486 100644
--- a/gmp/mpn/pa32/hppa1_1/gmp-mparam.h
+++ b/gmp/mpn/pa32/hppa1_1/gmp-mparam.h
@@ -1,43 +1,33 @@
 /* HP-PA 1.1 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 /* Generated by tuneup.c, 2004-02-07, gcc 2.8 (pa7100/100MHz) */
 
-#define MUL_TOOM22_THRESHOLD             30
-#define MUL_TOOM33_THRESHOLD             89
+#define MUL_KARATSUBA_THRESHOLD          30
+#define MUL_TOOM3_THRESHOLD              89
 
 #define SQR_BASECASE_THRESHOLD            4
-#define SQR_TOOM2_THRESHOLD              55
+#define SQR_KARATSUBA_THRESHOLD          55
 #define SQR_TOOM3_THRESHOLD             101
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* always */
diff --git a/gmp/mpn/pa32/hppa1_1/mul_1.asm b/gmp/mpn/pa32/hppa1_1/mul_1.asm
index 6e60c2f61f..9e17c2d023 100644
--- a/gmp/mpn/pa32/hppa1_1/mul_1.asm
+++ b/gmp/mpn/pa32/hppa1_1/mul_1.asm
@@ -1,33 +1,23 @@
 dnl  HP-PA 1.1 mpn_mul_1 -- Multiply a limb vector with a limb and store the
 dnl  result in a second limb vector.
 
-dnl  Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/pa7100/add_n.asm b/gmp/mpn/pa32/hppa1_1/pa7100/add_n.asm
index b96d403826..326a133984 100644
--- a/gmp/mpn/pa32/hppa1_1/pa7100/add_n.asm
+++ b/gmp/mpn/pa32/hppa1_1/pa7100/add_n.asm
@@ -2,33 +2,23 @@ dnl  HP-PA mpn_add_n -- Add two limb vectors of the same length > 0 and store
 dnl  sum in a third limb vector.  Optimized for the PA7100, where is runs at
 dnl  4.25 cycles/limb.
 
-dnl  Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm b/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
index fb16100d83..57f4d76745 100644
--- a/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
+++ b/gmp/mpn/pa32/hppa1_1/pa7100/addmul_1.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
 dnl  add the result to a second limb vector.
 
-dnl  Copyright 1995, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/pa7100/lshift.asm b/gmp/mpn/pa32/hppa1_1/pa7100/lshift.asm
index d65db2a76b..f6b4068cfa 100644
--- a/gmp/mpn/pa32/hppa1_1/pa7100/lshift.asm
+++ b/gmp/mpn/pa32/hppa1_1/pa7100/lshift.asm
@@ -1,33 +1,23 @@
 dnl  HP-PA  mpn_lshift -- Shift a number left.
 dnl  Optimized for the PA7100, where is runs at 3.25 cycles/limb.
 
-dnl  Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/pa7100/rshift.asm b/gmp/mpn/pa32/hppa1_1/pa7100/rshift.asm
index f7896fc949..ed7313b5fc 100644
--- a/gmp/mpn/pa32/hppa1_1/pa7100/rshift.asm
+++ b/gmp/mpn/pa32/hppa1_1/pa7100/rshift.asm
@@ -1,33 +1,23 @@
 dnl  HP-PA  mpn_rshift -- Shift a number right.
 dnl  Optimized for the PA7100, where is runs at 3.25 cycles/limb.
 
-dnl  Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/pa7100/sub_n.asm b/gmp/mpn/pa32/hppa1_1/pa7100/sub_n.asm
index df3f6e8b81..38ea0e197e 100644
--- a/gmp/mpn/pa32/hppa1_1/pa7100/sub_n.asm
+++ b/gmp/mpn/pa32/hppa1_1/pa7100/sub_n.asm
@@ -2,33 +2,23 @@ dnl  HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
 dnl  store difference in a third limb vector.  Optimized for the PA7100, where
 dnl  is runs at 4.25 cycles/limb.
 
-dnl  Copyright 1992, 1994, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/pa7100/submul_1.asm b/gmp/mpn/pa32/hppa1_1/pa7100/submul_1.asm
index 5ea08cbee5..aee9d9033e 100644
--- a/gmp/mpn/pa32/hppa1_1/pa7100/submul_1.asm
+++ b/gmp/mpn/pa32/hppa1_1/pa7100/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
 dnl  subtract the result from a second limb vector.
 
-dnl  Copyright 1995, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1995, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/sqr_diagonal.asm b/gmp/mpn/pa32/hppa1_1/sqr_diagonal.asm
index 1c7a18e37d..4eba989276 100644
--- a/gmp/mpn/pa32/hppa1_1/sqr_diagonal.asm
+++ b/gmp/mpn/pa32/hppa1_1/sqr_diagonal.asm
@@ -3,30 +3,19 @@ dnl  HP-PA 1.1 32-bit mpn_sqr_diagonal.
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/submul_1.asm b/gmp/mpn/pa32/hppa1_1/submul_1.asm
index a9b11d24a8..c6bc38394b 100644
--- a/gmp/mpn/pa32/hppa1_1/submul_1.asm
+++ b/gmp/mpn/pa32/hppa1_1/submul_1.asm
@@ -1,33 +1,23 @@
 dnl  HP-PA 1.1 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
 dnl  the result from a second limb vector.
 
-dnl  Copyright 1992-1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000, 2001, 2002 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/udiv.asm b/gmp/mpn/pa32/hppa1_1/udiv.asm
index 626ecd202b..e6a9927edf 100644
--- a/gmp/mpn/pa32/hppa1_1/udiv.asm
+++ b/gmp/mpn/pa32/hppa1_1/udiv.asm
@@ -4,30 +4,19 @@ dnl  This version runs fast on PA 7000 and later.
 dnl  Copyright 1993, 1994, 2000, 2001, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa1_1/umul.asm b/gmp/mpn/pa32/hppa1_1/umul.asm
index 18b923cd5a..7f1cb93949 100644
--- a/gmp/mpn/pa32/hppa1_1/umul.asm
+++ b/gmp/mpn/pa32/hppa1_1/umul.asm
@@ -1,30 +1,19 @@
 dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa2_0/add_n.asm b/gmp/mpn/pa32/hppa2_0/add_n.asm
index 8d881b8b08..685c4c91ae 100644
--- a/gmp/mpn/pa32/hppa2_0/add_n.asm
+++ b/gmp/mpn/pa32/hppa2_0/add_n.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 2.0 32-bit mpn_add_n -- Add two limb vectors of the same length > 0
 dnl  and store sum in a third limb vector.
 
-dnl  Copyright 1997, 1998, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa2_0/gmp-mparam.h b/gmp/mpn/pa32/hppa2_0/gmp-mparam.h
index 6016274714..c356b4acce 100644
--- a/gmp/mpn/pa32/hppa2_0/gmp-mparam.h
+++ b/gmp/mpn/pa32/hppa2_0/gmp-mparam.h
@@ -1,167 +1,73 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2009, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 552 MHz PA8600 (gcc61.fsffrance.org) */
-
-#define DIVREM_1_NORM_THRESHOLD              3
-#define DIVREM_1_UNNORM_THRESHOLD            3
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     28
-#define USE_PREINV_DIVREM_1                  1
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           36
-
-#define MUL_TOOM22_THRESHOLD                18
-#define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               202
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     105
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
-
-#define SQR_BASECASE_THRESHOLD               7
-#define SQR_TOOM2_THRESHOLD                 55
-#define SQR_TOOM3_THRESHOLD                 93
-#define SQR_TOOM4_THRESHOLD                250
-#define SQR_TOOM6_THRESHOLD                306
-#define SQR_TOOM8_THRESHOLD                527
-
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               15
-
-#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    244, 5}, {      8, 4}, {     17, 5}, {     13, 6}, \
-    {      7, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     11, 6}, {     24, 7}, {     13, 8}, \
-    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 8}, \
-    {     15, 7}, {     33, 8}, {     23, 9}, {     15, 8}, \
-    {     39, 9}, {     23,10}, {     15, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
-    {     31, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    135, 8}, {    271, 9}, {    143,10}, \
-    {     79, 9}, {    159, 8}, {    319, 9}, {    175, 8}, \
-    {    351,10}, {     95, 9}, {    191, 8}, {    383, 9}, \
-    {    207,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
-    {    575,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207, 9}, {    415,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223, 9}, \
-    {    895,10}, {    479,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,11}, {    351,10}, {    703, 9}, {   1407,12}, \
-    {    191,11}, {    415,10}, {    831,11}, {    479,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 107
-#define MUL_FFT_THRESHOLD                 2112
-
-#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    240, 5}, {      8, 4}, {     17, 5}, {     19, 6}, \
-    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     25, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 9}, {     15, 8}, \
-    {     39, 9}, {     23,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 7}, {    511, 9}, {    135, 8}, {    271, 9}, \
-    {    143,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
-    {    175, 8}, {    351, 7}, {    703,10}, {     95, 9}, \
-    {    191, 8}, {    383, 9}, {    207,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
-    {    319,10}, {    175, 9}, {    351, 8}, {    703,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543, 8}, {   1087,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223, 8}, \
-    {   1791,10}, {    479, 9}, {    959,12}, {    127,11}, \
-    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    415,10}, {    831,11}, \
-    {    479,10}, {    959,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 109
-#define SQR_FFT_THRESHOLD                 1600
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 116
-#define MULLO_MUL_N_THRESHOLD             3574
-
-#define DC_DIV_QR_THRESHOLD                100
-#define DC_DIVAPPR_Q_THRESHOLD             348
-#define DC_BDIV_QR_THRESHOLD               109
-#define DC_BDIV_Q_THRESHOLD                254
-
-#define INV_MULMOD_BNM1_THRESHOLD           34
-#define INV_NEWTON_THRESHOLD               276
-#define INV_APPR_THRESHOLD                 276
-
-#define BINV_NEWTON_THRESHOLD              278
-#define REDC_1_TO_REDC_N_THRESHOLD          78
-
-#define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD             263
-#define MUPI_DIV_QR_THRESHOLD              102
-#define MU_BDIV_QR_THRESHOLD               807
-#define MU_BDIV_Q_THRESHOLD               1187
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                     100
-#define GCD_DC_THRESHOLD                   379
-#define GCDEXT_DC_THRESHOLD                249
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                 7
-#define GET_STR_PRECOMPUTE_THRESHOLD        16
-#define SET_STR_DC_THRESHOLD               270
-#define SET_STR_PRECOMPUTE_THRESHOLD       782
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2009-03-05, gcc 4.3 */
+
+#define MUL_KARATSUBA_THRESHOLD          15
+#define MUL_TOOM3_THRESHOLD              98
+#define MUL_TOOM44_THRESHOLD            158
+
+#define SQR_BASECASE_THRESHOLD            6
+#define SQR_KARATSUBA_THRESHOLD          48
+#define SQR_TOOM3_THRESHOLD              97
+#define SQR_TOOM4_THRESHOLD             232
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              92
+#define MULLOW_MUL_N_THRESHOLD          363
+
+#define DIV_SB_PREINV_THRESHOLD           4
+#define DIV_DC_THRESHOLD                 92
+#define POWM_THRESHOLD                  142
+
+#define MATRIX22_STRASSEN_THRESHOLD      17
+#define HGCD_THRESHOLD                  100
+#define GCD_DC_THRESHOLD                365
+#define GCDEXT_DC_THRESHOLD             339
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           3
+#define DIVREM_1_UNNORM_THRESHOLD         5
+#define MOD_1_NORM_THRESHOLD              4
+#define MOD_1_UNNORM_THRESHOLD            5
+#define MOD_1_1_THRESHOLD                 6
+#define MOD_1_2_THRESHOLD                 9
+#define MOD_1_4_THRESHOLD                24
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD      MP_SIZE_T_MAX  /* never */
+
+#define GET_STR_DC_THRESHOLD              8
+#define GET_STR_PRECOMPUTE_THRESHOLD     13
+#define SET_STR_DC_THRESHOLD            224
+#define SET_STR_PRECOMPUTE_THRESHOLD    702
+
+#define MUL_FFT_TABLE  { 272, 672, 896, 2560, 6144, 24576, 98304, 393216, 0 }
+#define MUL_FFT_MODF_THRESHOLD          232
+#define MUL_FFT_THRESHOLD              1792
+
+#define SQR_FFT_TABLE  { 304, 672, 1152, 2560, 10240, 24576, 98304, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD          232
+#define SQR_FFT_THRESHOLD              1792
diff --git a/gmp/mpn/pa32/hppa2_0/sqr_diagonal.asm b/gmp/mpn/pa32/hppa2_0/sqr_diagonal.asm
index c55112fac5..3493c87322 100644
--- a/gmp/mpn/pa32/hppa2_0/sqr_diagonal.asm
+++ b/gmp/mpn/pa32/hppa2_0/sqr_diagonal.asm
@@ -3,30 +3,19 @@ dnl  HP-PA 32-bit mpn_sqr_diagonal optimized for the PA8x00.
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/hppa2_0/sub_n.asm b/gmp/mpn/pa32/hppa2_0/sub_n.asm
index 47b3163fe3..b0aefb4abb 100644
--- a/gmp/mpn/pa32/hppa2_0/sub_n.asm
+++ b/gmp/mpn/pa32/hppa2_0/sub_n.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 2.0 32-bit mpn_sub_n -- Subtract two limb vectors of the same
 dnl  length > 0 and store difference in a third limb vector.
 
-dnl  Copyright 1997, 1998, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1997, 1998, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/lshift.asm b/gmp/mpn/pa32/lshift.asm
index 5ea497c1f1..2128fbeed4 100644
--- a/gmp/mpn/pa32/lshift.asm
+++ b/gmp/mpn/pa32/lshift.asm
@@ -1,32 +1,21 @@
 dnl  HP-PA  mpn_lshift -- Shift a number left.
 
-dnl  Copyright 1992, 1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/pa-defs.m4 b/gmp/mpn/pa32/pa-defs.m4
index b26e715fc5..affaef897a 100644
--- a/gmp/mpn/pa32/pa-defs.m4
+++ b/gmp/mpn/pa32/pa-defs.m4
@@ -3,37 +3,26 @@ divert(-1)
 dnl  m4 macros for HPPA assembler.
 
 dnl  Copyright 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  hppa assembler comments are introduced with ";".
 dnl
-dnl  For cooperation with cpp, apparently lines "# 123" set the line number,
+dnl  For cooperation with cpp, aparently lines "# 123" set the line number,
 dnl  and other lines starting with a "#" are ignored.
 
 changecom(;)
diff --git a/gmp/mpn/pa32/rshift.asm b/gmp/mpn/pa32/rshift.asm
index c5eac830c9..238b0be7ed 100644
--- a/gmp/mpn/pa32/rshift.asm
+++ b/gmp/mpn/pa32/rshift.asm
@@ -1,32 +1,21 @@
 dnl  HP-PA  mpn_rshift -- Shift a number right.
 
-dnl  Copyright 1992, 1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/sub_n.asm b/gmp/mpn/pa32/sub_n.asm
index 9c71655b98..d07ebb5bb6 100644
--- a/gmp/mpn/pa32/sub_n.asm
+++ b/gmp/mpn/pa32/sub_n.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
 dnl  store difference in a third limb vector.
 
-dnl  Copyright 1992, 1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa32/udiv.asm b/gmp/mpn/pa32/udiv.asm
index addbf41ef5..86886e484c 100644
--- a/gmp/mpn/pa32/udiv.asm
+++ b/gmp/mpn/pa32/udiv.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA  __udiv_qrnnd division support, used from longlong.h.
 dnl  This version runs fast on pre-PA7000 CPUs.
 
-dnl  Copyright 1993, 1994, 2000-2002 Free Software Foundation, Inc.
+dnl  Copyright 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa64/README b/gmp/mpn/pa64/README
index a51ce028a4..6234a407f2 100644
--- a/gmp/mpn/pa64/README
+++ b/gmp/mpn/pa64/README
@@ -3,28 +3,17 @@ Copyright 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/pa64/add_n.asm b/gmp/mpn/pa64/add_n.asm
new file mode 100644
index 0000000000..1c2055590c
--- /dev/null
+++ b/gmp/mpn/pa64/add_n.asm
@@ -0,0 +1,93 @@
+dnl  HP-PA 2.0 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl  store sum in a third limb vector.
+
+dnl  Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
+dnl  should be possible to reach the cache bandwith 1.5 cycles/limb at least
+dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
+dnl  where the processor gets confused about where carry comes from.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`vp',`%r24')
+define(`n',`%r23')
+
+ifdef(`HAVE_ABI_2_0w',
+`       .level  2.0w
+',`     .level  2.0
+')
+PROLOGUE(mpn_add_n)
+	sub		%r0, n, %r22
+	depw,z		%r22, 30, 3, %r28	C r28 = 2 * (-n & 7)
+	depw,z		%r22, 28, 3, %r22	C r22 = 8 * (-n & 7)
+	sub		up, %r22, up		C offset up
+	sub		vp, %r22, vp		C offset vp
+	sub		rp, %r22, rp		C offset rp
+	blr		%r28, %r0		C branch into loop
+	add		%r0, %r0, %r0		C reset carry
+
+LDEF(loop)
+	ldd		0(up), %r20
+	ldd		0(vp), %r31
+	add,dc		%r20, %r31, %r20
+	std		%r20, 0(rp)
+LDEF(7)	ldd		8(up), %r21
+	ldd		8(vp), %r19
+	add,dc		%r21, %r19, %r21
+	std		%r21, 8(rp)
+LDEF(6)	ldd		16(up), %r20
+	ldd		16(vp), %r31
+	add,dc		%r20, %r31, %r20
+	std		%r20, 16(rp)
+LDEF(5)	ldd		24(up), %r21
+	ldd		24(vp), %r19
+	add,dc		%r21, %r19, %r21
+	std		%r21, 24(rp)
+LDEF(4)	ldd		32(up), %r20
+	ldd		32(vp), %r31
+	add,dc		%r20, %r31, %r20
+	std		%r20, 32(rp)
+LDEF(3)	ldd		40(up), %r21
+	ldd		40(vp), %r19
+	add,dc		%r21, %r19, %r21
+	std		%r21, 40(rp)
+LDEF(2)	ldd		48(up), %r20
+	ldd		48(vp), %r31
+	add,dc		%r20, %r31, %r20
+	std		%r20, 48(rp)
+LDEF(1)	ldd		56(up), %r21
+	ldd		56(vp), %r19
+	add,dc		%r21, %r19, %r21
+	ldo		64(up), up
+	std		%r21, 56(rp)
+	ldo		64(vp), vp
+	addib,>		-8, n, L(loop)
+	ldo		64(rp), rp
+
+	add,dc		%r0, %r0, %r29
+	bve		(%r2)
+ifdef(`HAVE_ABI_2_0w',
+`	copy		%r29, %r28
+',`	ldi		0, %r28
+')
+EPILOGUE(mpn_add_n)
diff --git a/gmp/mpn/pa64/addmul_1.asm b/gmp/mpn/pa64/addmul_1.asm
index 2cb9af9f14..4e76546050 100644
--- a/gmp/mpn/pa64/addmul_1.asm
+++ b/gmp/mpn/pa64/addmul_1.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 2.0 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and
 dnl  add the result to a second limb vector.
 
-dnl  Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
+dnl  Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa64/aors_n.asm b/gmp/mpn/pa64/aors_n.asm
deleted file mode 100644
index ab4536fefb..0000000000
--- a/gmp/mpn/pa64/aors_n.asm
+++ /dev/null
@@ -1,130 +0,0 @@
-dnl  HP-PA 2.0 mpn_add_n, mpn_sub_n
-
-dnl  Copyright 1997, 2000, 2002, 2003, 2009, 2010 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
-dnl  should be possible to reach the cache bandwidth 1.5 cycles/limb at least
-dnl  with PA8500.  The problem now is stalling of the first ADD,DC after LDO,
-dnl  where the processor gets confused about where carry comes from.
-
-include(`../config.m4')
-
-dnl INPUT PARAMETERS
-define(`rp',`%r26')
-define(`up',`%r25')
-define(`vp',`%r24')
-define(`n',`%r23')
-
-ifdef(`OPERATION_add_n', `
-	define(ADCSBC,	      `add,dc')
-	define(INITCY,	      `addi -1,%r22,%r0')
-	define(func,	      mpn_add_n)
-	define(func_nc,	      mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-	define(ADCSBC,	      `sub,db')
-	define(INITCY,	      `subi 0,%r22,%r0')
-	define(func,	      mpn_sub_n)
-	define(func_nc,	      mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ifdef(`HAVE_ABI_2_0w',
-`       .level  2.0w
-',`     .level  2.0
-')
-PROLOGUE(func_nc)
-ifdef(`HAVE_ABI_2_0w',
-`	b		L(com)
-	nop
-',`	b		L(com)
-	ldw		-52(%r30), %r22
-')
-EPILOGUE()
-PROLOGUE(func)
-	ldi		0, %r22
-LDEF(com)
-	sub		%r0, n, %r21
-	depw,z		%r21, 30, 3, %r28	C r28 = 2 * (-n & 7)
-	depw,z		%r21, 28, 3, %r21	C r21 = 8 * (-n & 7)
-	sub		up, %r21, up		C offset up
-	sub		vp, %r21, vp		C offset vp
-	sub		rp, %r21, rp		C offset rp
-	blr		%r28, %r0		C branch into loop
-	INITCY
-
-LDEF(loop)
-	ldd		0(up), %r20
-	ldd		0(vp), %r31
-	ADCSBC		%r20, %r31, %r20
-	std		%r20, 0(rp)
-LDEF(7)	ldd		8(up), %r21
-	ldd		8(vp), %r19
-	ADCSBC		%r21, %r19, %r21
-	std		%r21, 8(rp)
-LDEF(6)	ldd		16(up), %r20
-	ldd		16(vp), %r31
-	ADCSBC		%r20, %r31, %r20
-	std		%r20, 16(rp)
-LDEF(5)	ldd		24(up), %r21
-	ldd		24(vp), %r19
-	ADCSBC		%r21, %r19, %r21
-	std		%r21, 24(rp)
-LDEF(4)	ldd		32(up), %r20
-	ldd		32(vp), %r31
-	ADCSBC		%r20, %r31, %r20
-	std		%r20, 32(rp)
-LDEF(3)	ldd		40(up), %r21
-	ldd		40(vp), %r19
-	ADCSBC		%r21, %r19, %r21
-	std		%r21, 40(rp)
-LDEF(2)	ldd		48(up), %r20
-	ldd		48(vp), %r31
-	ADCSBC		%r20, %r31, %r20
-	std		%r20, 48(rp)
-LDEF(1)	ldd		56(up), %r21
-	ldd		56(vp), %r19
-	ADCSBC		%r21, %r19, %r21
-	ldo		64(up), up
-	std		%r21, 56(rp)
-	ldo		64(vp), vp
-	addib,>		-8, n, L(loop)
-	ldo		64(rp), rp
-
-	add,dc		%r0, %r0, %r29
-ifdef(`OPERATION_sub_n',`
-	subi		1, %r29, %r29
-')
-	bve		(%r2)
-ifdef(`HAVE_ABI_2_0w',
-`	copy		%r29, %r28
-',`	ldi		0, %r28
-')
-EPILOGUE()
diff --git a/gmp/mpn/pa64/aorslsh1_n.asm b/gmp/mpn/pa64/aorslsh1_n.asm
index 2a55ddea30..b2cca7a356 100644
--- a/gmp/mpn/pa64/aorslsh1_n.asm
+++ b/gmp/mpn/pa64/aorslsh1_n.asm
@@ -3,30 +3,19 @@ dnl  PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
 dnl  Copyright 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa64/gmp-mparam.h b/gmp/mpn/pa64/gmp-mparam.h
index c2719c3c89..aa92cb9191 100644
--- a/gmp/mpn/pa64/gmp-mparam.h
+++ b/gmp/mpn/pa64/gmp-mparam.h
@@ -1,247 +1,72 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2010 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 /* 440MHz PA8200 */
 
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD              21
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                31
-#define MUL_TOOM33_THRESHOLD               114
-#define MUL_TOOM44_THRESHOLD               179
-#define MUL_TOOM6H_THRESHOLD               222
-#define MUL_TOOM8H_THRESHOLD               296
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     130
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     229
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     129
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      54
-
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 58
-#define SQR_TOOM3_THRESHOLD                153
-#define SQR_TOOM4_THRESHOLD                278
-#define SQR_TOOM6_THRESHOLD                  0  /* always */
-#define SQR_TOOM8_THRESHOLD                  0  /* always */
-
-#define MULMID_TOOM42_THRESHOLD             56
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               19
-
-#define POWM_SEC_TABLE  2,23,228,1084
-
-#define MUL_FFT_MODF_THRESHOLD             336  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    336, 5}, {     11, 4}, {     23, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     19, 7}, {     39, 8}, \
-    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,10}, \
-    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
-    {     47, 9}, {     95,10}, {     55,11}, {     31,10}, \
-    {     63, 9}, {    127,10}, {     71, 8}, {    287,10}, \
-    {     79,11}, {     47,10}, {     95, 9}, {    191, 8}, \
-    {    383, 7}, {    767,10}, {    103, 9}, {    207, 8}, \
-    {    415, 7}, {    831,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    543, 7}, {   1087, 6}, \
-    {   2175,10}, {    143, 9}, {    287, 8}, {    575,11}, \
-    {     79, 9}, {    319, 8}, {    639, 7}, {   1279, 9}, \
-    {    335, 8}, {    671,10}, {    175, 9}, {    351, 8}, \
-    {    703,11}, {     95,10}, {    191, 9}, {    383, 8}, \
-    {    767,10}, {    207, 9}, {    415, 8}, {    831, 7}, \
-    {   1663,11}, {    111,10}, {    223, 9}, {    447, 8}, \
-    {    895,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    543, 8}, {   1087, 7}, {   2175,10}, {    287, 9}, \
-    {    575, 8}, {   1215, 7}, {   2431,10}, {    319, 9}, \
-    {    639, 8}, {   1279,10}, {    335, 9}, {    671, 8}, \
-    {   1343, 9}, {    703, 8}, {   1407,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207, 9}, {    831, 8}, \
-    {   1663,11}, {    223,10}, {    447, 9}, {    959,13}, \
-    {     63,12}, {    127,11}, {    255, 8}, {   2047,11}, \
-    {    271,10}, {    543, 9}, {   1087, 8}, {   2175,11}, \
-    {    287,10}, {    575, 9}, {   1215, 8}, {   2431,11}, \
-    {    319,10}, {    671, 9}, {   1343, 8}, {   2687,11}, \
-    {    351,10}, {    703, 9}, {   1471, 8}, {   2943,12}, \
-    {    191,11}, {    383, 8}, {   3071,11}, {    415,10}, \
-    {    831, 9}, {   1663,11}, {    479,10}, {    959, 9}, \
-    {   1919, 8}, {   3839,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087, 9}, {   2175,12}, {    287,11}, \
-    {    607,10}, {   1215, 9}, {   2431, 8}, {   4863,12}, \
-    {    319,11}, {    671,10}, {   1343,13}, {    191, 9}, \
-    {   3071,12}, {    415,11}, {    831,10}, {   1663, 8}, \
-    {   6655, 9}, {   3455,12}, {    447, 9}, {   3583,13}, \
-    {    255,12}, {    511,11}, {   1023,10}, {   2175,13}, \
-    {    319,11}, {   1279,12}, {    671,10}, {   2815,12}, \
-    {    735,10}, {   2943, 9}, {   5887,13}, {    383,12}, \
-    {    767,11}, {   1535,10}, {   3071,13}, {    447,10}, \
-    {   3583,12}, {    959,13}, {    511,12}, {   1087,13}, \
-    {    639,12}, {   1343,13}, {    767,11}, {   3071,13}, \
-    {    831,12}, {   1663,11}, {   3455,10}, {   6911,13}, \
-    {    895,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1087,12}, {   2303,13}, {   1215,12}, {   2431,14}, \
-    {    639,13}, {   1279,12}, {   2559,13}, {   1343,12}, \
-    {   2687,11}, {   5375,13}, {   1407,12}, {   2815,11}, \
-    {   5631,12}, {   2943,13}, {   1535,12}, {   3199,13}, \
-    {   1663,12}, {   3327,13}, {   1727,14}, {    895,13}, \
-    {   1791,12}, {   3583,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2047,12}, {   4095,14}, {   1151,13}, \
-    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2815,12}, {   5631,15}, {    767,14}, {   1535,13}, \
-    {   3071,14}, {   1663,13}, {   3327,14}, {   1791,13}, \
-    {   3583,14}, {   1919,15}, {   1023,14}, {   2303,13}, \
-    {   4607,14}, {   2431,13}, {   4863,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 252
-#define MUL_FFT_THRESHOLD                 2368
-
-#define SQR_FFT_MODF_THRESHOLD             284  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    284, 5}, {      9, 4}, {     21, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     25, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     63, 8}, {    255, 7}, {    511,10}, \
-    {     71, 8}, {    287, 7}, {    575,10}, {     79,11}, \
-    {     47,10}, {     95, 9}, {    191, 8}, {    383, 7}, \
-    {    767,10}, {    103, 9}, {    207, 8}, {    415,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    543, 7}, {   1087, 8}, {    575, 7}, {   1151,11}, \
-    {     79, 8}, {    639, 7}, {   1279, 9}, {    335, 8}, \
-    {    671, 7}, {   1343,10}, {    175, 8}, {    703, 7}, \
-    {   1407,11}, {     95,10}, {    191, 9}, {    383, 8}, \
-    {    767,10}, {    207, 9}, {    415, 8}, {    831, 7}, \
-    {   1663, 9}, {    447, 8}, {    895,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    543, 8}, {   1087, 7}, \
-    {   2175, 9}, {    575, 8}, {   1151,10}, {    303, 9}, \
-    {    607, 8}, {   1215, 7}, {   2431,10}, {    319, 9}, \
-    {    639, 8}, {   1279, 9}, {    671, 8}, {   1343, 7}, \
-    {   2687,10}, {    351, 9}, {    703, 8}, {   1407,12}, \
-    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
-    {    207,10}, {    415, 9}, {    831, 8}, {   1663,11}, \
-    {    223,10}, {    447, 9}, {    895,13}, {     63,11}, \
-    {    255,10}, {    543, 8}, {   2175,11}, {    287,10}, \
-    {    575, 9}, {   1151,10}, {    607, 9}, {   1215, 8}, \
-    {   2431,11}, {    319, 9}, {   1279,10}, {    671, 9}, \
-    {   1343, 8}, {   2687,11}, {    351,10}, {    703, 9}, \
-    {   1407,10}, {    735,12}, {    191,11}, {    383,10}, \
-    {    831, 9}, {   1663,12}, {    223,11}, {    447,10}, \
-    {    895,11}, {    479, 9}, {   1919, 8}, {   3839,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
-    {   1087, 9}, {   2175,12}, {    287,11}, {    575,10}, \
-    {   1151,11}, {    607,10}, {   1215, 9}, {   2431, 8}, \
-    {   4863,10}, {   1279,11}, {    671,10}, {   1343, 9}, \
-    {   2687,12}, {    351,11}, {    703,10}, {   1407,11}, \
-    {    735,13}, {    191, 9}, {   3071, 7}, {  12287,11}, \
-    {    799,12}, {    415,11}, {    831,10}, {   1663,12}, \
-    {    447, 8}, {   7167,12}, {    479, 9}, {   3839,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,10}, {   2175, 9}, {   4607,11}, {   1215,10}, \
-    {   2431,11}, {   1279,10}, {   2559,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    799,10}, {   3199, 9}, \
-    {   6399,12}, {    895,13}, {    511,12}, {   1023,11}, \
-    {   2047,12}, {   1087,13}, {    575,12}, {   1151,10}, \
-    {   4607,13}, {    639,12}, {   1279,11}, {   2687,14}, \
-    {    383,13}, {    767,11}, {   3071,12}, {   1599,13}, \
-    {    895,12}, {   1791,11}, {   3583,13}, {    959,15}, \
-    {    255,12}, {   2175,13}, {   1215,14}, {    639,13}, \
-    {   1279,12}, {   2559,13}, {   1343,12}, {   2687,13}, \
-    {   1471,11}, {   5887,14}, {    767,13}, {   1535,12}, \
-    {   3071,13}, {   1599,12}, {   3199,13}, {   1663,12}, \
-    {   3327,13}, {   1727,14}, {    895,13}, {   1791,12}, \
-    {   3583,15}, {    511,14}, {   1023,13}, {   2175,14}, \
-    {   1151,12}, {   4607,13}, {   2431,14}, {   1279,13}, \
-    {   2687,14}, {   1407,13}, {   2815,15}, {    767,13}, \
-    {   3199,14}, {   1663,13}, {   3327,14}, {   1791,13}, \
-    {   3583,14}, {   1919,15}, {   1023,14}, {   2047,13}, \
-    {   4095,14}, {   2303,13}, {   4607,14}, {   2431,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 257
-#define SQR_FFT_THRESHOLD                 1856
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 113
-#define MULLO_MUL_N_THRESHOLD             4658
-
-#define DC_DIV_QR_THRESHOLD                123
-#define DC_DIVAPPR_Q_THRESHOLD             372
-#define DC_BDIV_QR_THRESHOLD               142
-#define DC_BDIV_Q_THRESHOLD                312
-
-#define INV_MULMOD_BNM1_THRESHOLD           58
-#define INV_NEWTON_THRESHOLD               315
-#define INV_APPR_THRESHOLD                 315
-
-#define BINV_NEWTON_THRESHOLD              360
-#define REDC_1_TO_REDC_N_THRESHOLD         101
-
-#define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD               93
-#define MU_BDIV_QR_THRESHOLD               889
-#define MU_BDIV_Q_THRESHOLD               1187
-
-#define MATRIX22_STRASSEN_THRESHOLD          9
-#define HGCD_THRESHOLD                     234
-#define HGCD_APPR_THRESHOLD                300
-#define HGCD_REDUCE_THRESHOLD             1553
-#define GCD_DC_THRESHOLD                   684
-#define GCDEXT_DC_THRESHOLD                525
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                21
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD              1951
-#define SET_STR_PRECOMPUTE_THRESHOLD      4034
+/* Generated by tuneup.c, 2009-01-04, system compiler */
+
+#define MUL_KARATSUBA_THRESHOLD          30
+#define MUL_TOOM3_THRESHOLD             114
+#define MUL_TOOM44_THRESHOLD            244
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_KARATSUBA_THRESHOLD          58
+#define SQR_TOOM3_THRESHOLD             174
+#define SQR_TOOM4_THRESHOLD             312
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD             142
+#define MULLOW_MUL_N_THRESHOLD          507
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                124
+#define POWM_THRESHOLD                  224
+
+#define MATRIX22_STRASSEN_THRESHOLD      11
+#define HGCD_THRESHOLD                  294
+#define GCD_DC_THRESHOLD                913
+#define GCDEXT_DC_THRESHOLD             830
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             23
+#define GET_STR_PRECOMPUTE_THRESHOLD     26
+#define SET_STR_DC_THRESHOLD           2743
+#define SET_STR_PRECOMPUTE_THRESHOLD   5147
+
+#define MUL_FFT_TABLE  { 400, 800, 1600, 2816, 7168, 20480, 81920, 327680, 0 }
+#define MUL_FFT_MODF_THRESHOLD          280
+#define MUL_FFT_THRESHOLD              1664
+
+#define SQR_FFT_TABLE  { 368, 800, 1728, 3328, 7168, 20480, 81920, 327680, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD          264
+#define SQR_FFT_THRESHOLD              1632
diff --git a/gmp/mpn/pa64/lshift.asm b/gmp/mpn/pa64/lshift.asm
index c0fc2921c1..0dceba20c1 100644
--- a/gmp/mpn/pa64/lshift.asm
+++ b/gmp/mpn/pa64/lshift.asm
@@ -3,30 +3,19 @@ dnl  HP-PA 2.0 mpn_lshift -- Left shift.
 dnl  Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
diff --git a/gmp/mpn/pa64/mul_1.asm b/gmp/mpn/pa64/mul_1.asm
index 6935c23ccd..fbb5f174ae 100644
--- a/gmp/mpn/pa64/mul_1.asm
+++ b/gmp/mpn/pa64/mul_1.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 2.0 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
 dnl  the result in a second limb vector.
 
-dnl  Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
+dnl  Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa64/rshift.asm b/gmp/mpn/pa64/rshift.asm
index cfc242ea9c..80470c9892 100644
--- a/gmp/mpn/pa64/rshift.asm
+++ b/gmp/mpn/pa64/rshift.asm
@@ -3,30 +3,19 @@ dnl  HP-PA 2.0 mpn_rshift -- Right shift.
 dnl  Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  This runs at 1.5 cycles/limb on PA8000 and 1.0 cycles/limb on PA8500.
diff --git a/gmp/mpn/pa64/sqr_diagonal.asm b/gmp/mpn/pa64/sqr_diagonal.asm
index f6fadc93c6..73c64b06ed 100644
--- a/gmp/mpn/pa64/sqr_diagonal.asm
+++ b/gmp/mpn/pa64/sqr_diagonal.asm
@@ -1,32 +1,21 @@
 dnl  HP-PA 2.0 64-bit mpn_sqr_diagonal.
 
-dnl  Copyright 2001-2003 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  This code runs at 7.25 cycles/limb on PA8000 and 7.75 cycles/limb on
diff --git a/gmp/mpn/pa64/sub_n.asm b/gmp/mpn/pa64/sub_n.asm
new file mode 100644
index 0000000000..8ad524da01
--- /dev/null
+++ b/gmp/mpn/pa64/sub_n.asm
@@ -0,0 +1,93 @@
+dnl  HP-PA 2.0 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl  and store difference in a third limb vector.
+
+dnl  Copyright 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+dnl  This runs at 2 cycles/limb on PA8000 and 1.6875 cycles/limb on PA8500.  It
+dnl  should be possible to reach the cache bandwith 1.5 cycles/limb at least
+dnl  with PA8500.  The problem now is stalling of the first SUB,DB after LDO,
+dnl  where the processor gets confused about where carry comes from.
+
+include(`../config.m4')
+
+dnl INPUT PARAMETERS
+define(`rp',`%r26')
+define(`up',`%r25')
+define(`vp',`%r24')
+define(`n',`%r23')
+
+ifdef(`HAVE_ABI_2_0w',
+`       .level  2.0w
+',`     .level  2.0
+')
+PROLOGUE(mpn_sub_n)
+	sub		%r0, n, %r22
+	depw,z		%r22, 30, 3, %r28	C r28 = 2 * (-n & 7)
+	depw,z		%r22, 28, 3, %r22	C r22 = 8 * (-n & 7)
+	sub		up, %r22, up		C offset up
+	sub		vp, %r22, vp		C offset vp
+	blr		%r28, %r0		C branch into loop
+	sub		rp, %r22, rp		C offset rp and set carry
+
+LDEF(loop)
+	ldd		0(up), %r20
+	ldd		0(vp), %r31
+	sub,db		%r20, %r31, %r20
+	std		%r20, 0(rp)
+LDEF(7)	ldd		8(up), %r21
+	ldd		8(vp), %r19
+	sub,db		%r21, %r19, %r21
+	std		%r21, 8(rp)
+LDEF(6)	ldd		16(up), %r20
+	ldd		16(vp), %r31
+	sub,db		%r20, %r31, %r20
+	std		%r20, 16(rp)
+LDEF(5)	ldd		24(up), %r21
+	ldd		24(vp), %r19
+	sub,db		%r21, %r19, %r21
+	std		%r21, 24(rp)
+LDEF(4)	ldd		32(up), %r20
+	ldd		32(vp), %r31
+	sub,db		%r20, %r31, %r20
+	std		%r20, 32(rp)
+LDEF(3)	ldd		40(up), %r21
+	ldd		40(vp), %r19
+	sub,db		%r21, %r19, %r21
+	std		%r21, 40(rp)
+LDEF(2)	ldd		48(up), %r20
+	ldd		48(vp), %r31
+	sub,db		%r20, %r31, %r20
+	std		%r20, 48(rp)
+LDEF(1)	ldd		56(up), %r21
+	ldd		56(vp),%r19
+	sub,db		%r21, %r19, %r21
+	ldo		64(up), up
+	std		%r21, 56(rp)
+	ldo		64(vp), vp
+	addib,>		-8, n, L(loop)
+	ldo		64(rp), rp
+
+	add,dc		%r0, %r0, %r29
+	subi		1, %r29, %r29
+	bve		(%r2)
+ifdef(`HAVE_ABI_2_0w',
+`	copy		%r29, %r28
+',`	ldi		0, %r28
+')
+EPILOGUE(mpn_sub_n)
diff --git a/gmp/mpn/pa64/submul_1.asm b/gmp/mpn/pa64/submul_1.asm
index f8a1968e45..40678239fa 100644
--- a/gmp/mpn/pa64/submul_1.asm
+++ b/gmp/mpn/pa64/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  HP-PA 2.0 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
 dnl  subtract the result from a second limb vector.
 
-dnl  Copyright 1998-2000, 2002, 2003 Free Software Foundation, Inc.
+dnl  Copyright 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa64/udiv.asm b/gmp/mpn/pa64/udiv.asm
index 1380a85932..3775783a05 100644
--- a/gmp/mpn/pa64/udiv.asm
+++ b/gmp/mpn/pa64/udiv.asm
@@ -1,32 +1,21 @@
 dnl  HP-PA 2.0 64-bit mpn_udiv_qrnnd_r.
 
-dnl  Copyright 2001-2003 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/pa64/umul.asm b/gmp/mpn/pa64/umul.asm
index c3341ecfe6..635e44fea0 100644
--- a/gmp/mpn/pa64/umul.asm
+++ b/gmp/mpn/pa64/umul.asm
@@ -1,36 +1,26 @@
 dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Optimizations:
 dnl  * Avoid skip instructions
 dnl  * Put carry-generating and carry-consuming insns consecutively
-dnl  * Don't allocate any stack, "home" positions for parameters could be used.
+dnl  * Don't allocate any stack, "home" positions for parameteters could be
+dnl    used.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/power/add_n.asm b/gmp/mpn/power/add_n.asm
index 6d6ca73da9..4fcafab7e1 100644
--- a/gmp/mpn/power/add_n.asm
+++ b/gmp/mpn/power/add_n.asm
@@ -1,32 +1,22 @@
 dnl  IBM POWER mpn_add_n -- Add two limb vectors of equal, non-zero length.
 
-dnl  Copyright 1992, 1994-1996, 1999-2001, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2005 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/addmul_1.asm b/gmp/mpn/power/addmul_1.asm
index 76d8df3c76..fcda2c1263 100644
--- a/gmp/mpn/power/addmul_1.asm
+++ b/gmp/mpn/power/addmul_1.asm
@@ -1,33 +1,22 @@
 dnl  IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
 dnl  result to a second limb vector.
 
-dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/gmp-mparam.h b/gmp/mpn/power/gmp-mparam.h
index 7cb36f963e..f9b10e6a47 100644
--- a/gmp/mpn/power/gmp-mparam.h
+++ b/gmp/mpn/power/gmp-mparam.h
@@ -1,40 +1,29 @@
 /* POWER gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2002-2004 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* Generated by tuneup.c, 2003-02-10, gcc 3.2, POWER2 66.7MHz */
 
-#define MUL_TOOM22_THRESHOLD             12
-#define MUL_TOOM33_THRESHOLD             75
+#define MUL_KARATSUBA_THRESHOLD          12
+#define MUL_TOOM3_THRESHOLD              75
 
 #define SQR_BASECASE_THRESHOLD            7
-#define SQR_TOOM2_THRESHOLD              28
+#define SQR_KARATSUBA_THRESHOLD          28
 #define SQR_TOOM3_THRESHOLD              86
 
 #define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
diff --git a/gmp/mpn/power/lshift.asm b/gmp/mpn/power/lshift.asm
index efa210556d..a4adb7aad5 100644
--- a/gmp/mpn/power/lshift.asm
+++ b/gmp/mpn/power/lshift.asm
@@ -1,32 +1,21 @@
 dnl  IBM POWER mpn_lshift -- Shift a number left.
 
-dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/mul_1.asm b/gmp/mpn/power/mul_1.asm
index 38b7b66be0..bd33942adf 100644
--- a/gmp/mpn/power/mul_1.asm
+++ b/gmp/mpn/power/mul_1.asm
@@ -1,33 +1,22 @@
 dnl  IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the
 dnl  result in a second limb vector.
 
-dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/rshift.asm b/gmp/mpn/power/rshift.asm
index 1d1815ccb5..4645015ccd 100644
--- a/gmp/mpn/power/rshift.asm
+++ b/gmp/mpn/power/rshift.asm
@@ -1,32 +1,21 @@
 dnl  IBM POWER mpn_rshift -- Shift a number right.
 
-dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/sdiv.asm b/gmp/mpn/power/sdiv.asm
index 4a9ed143b8..7a798022cd 100644
--- a/gmp/mpn/power/sdiv.asm
+++ b/gmp/mpn/power/sdiv.asm
@@ -1,30 +1,19 @@
 dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/power/sub_n.asm b/gmp/mpn/power/sub_n.asm
index 390c802d8b..d34415d7e4 100644
--- a/gmp/mpn/power/sub_n.asm
+++ b/gmp/mpn/power/sub_n.asm
@@ -1,33 +1,23 @@
 dnl  IBM POWER mpn_sub_n -- Subtract two limb vectors of equal, non-zero
 dnl  length.
 
-dnl  Copyright 1992, 1994-1996, 1999-2001, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2005 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/submul_1.asm b/gmp/mpn/power/submul_1.asm
index 1788e0d4f4..3c3492d00c 100644
--- a/gmp/mpn/power/submul_1.asm
+++ b/gmp/mpn/power/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
 dnl  the result from a second limb vector.
 
-dnl  Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  INPUT PARAMETERS
diff --git a/gmp/mpn/power/umul.asm b/gmp/mpn/power/umul.asm
index 5a0599e21d..996f2e6cb1 100644
--- a/gmp/mpn/power/umul.asm
+++ b/gmp/mpn/power/umul.asm
@@ -1,30 +1,19 @@
 dnl  Copyright 1999, 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/750/com.asm b/gmp/mpn/powerpc32/750/com.asm
deleted file mode 100644
index 1b8b574b9c..0000000000
--- a/gmp/mpn/powerpc32/750/com.asm
+++ /dev/null
@@ -1,79 +0,0 @@
-dnl  PowerPC 750 mpn_com -- mpn bitwise one's complement
-
-dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C                cycles/limb
-C 603e:            ?
-C 604e:            3.0
-C 75x (G3):        2.0
-C 7400,7410 (G4):  2.0
-C 744x,745x (G4+): 3.0
-
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
-C
-C This loop form is necessary for the claimed speed.
-
-ASM_START()
-PROLOGUE(mpn_com)
-
-	C r3	dst
-	C r4	src
-	C r5	size
-
-	mtctr	r5		C size
-	lwz	r5, 0(r4)	C src low limb
-
-	sub	r4, r4, r3	C src-dst
-	subi	r3, r3, 4	C dst-4
-
-	addi	r4, r4, 8	C src-dst+8
-	bdz	L(one)
-
-L(top):
-	C r3	&dst[i-1]
-	C r4	src-dst
-	C r5	src[i]
-	C r6	scratch
-
-	not	r6, r5		C ~src[i]
-	lwzx	r5, r4,r3	C src[i+1]
-
-	stwu	r6, 4(r3)	C dst[i]
-	bdnz	L(top)
-
-L(one):
-	not	r6, r5
-
-	stw	r6, 4(r3)	C dst[size-1]
-	blr
-
-EPILOGUE()
diff --git a/gmp/mpn/powerpc32/750/com_n.asm b/gmp/mpn/powerpc32/750/com_n.asm
new file mode 100644
index 0000000000..02fc4b6587
--- /dev/null
+++ b/gmp/mpn/powerpc32/750/com_n.asm
@@ -0,0 +1,68 @@
+dnl  PowerPC 750 mpn_com_n -- mpn bitwise one's complement
+
+dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C                cycles/limb
+C 603e:            ?
+C 604e:            3.0
+C 75x (G3):        2.0
+C 7400,7410 (G4):  2.0
+C 744x,745x (G4+): 3.0
+
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C
+C This loop form is necessary for the claimed speed.
+
+ASM_START()
+PROLOGUE(mpn_com_n)
+
+	C r3	dst
+	C r4	src
+	C r5	size
+
+	mtctr	r5		C size
+	lwz	r5, 0(r4)	C src low limb
+
+	sub	r4, r4, r3	C src-dst
+	subi	r3, r3, 4	C dst-4
+
+	addi	r4, r4, 8	C src-dst+8
+	bdz	L(one)
+
+L(top):
+	C r3	&dst[i-1]
+	C r4	src-dst
+	C r5	src[i]
+	C r6	scratch
+
+	not	r6, r5		C ~src[i]
+	lwzx	r5, r4,r3	C src[i+1]
+
+	stwu	r6, 4(r3)	C dst[i]
+	bdnz	L(top)
+
+L(one):
+	not	r6, r5
+
+	stw	r6, 4(r3)	C dst[size-1]
+	blr
+
+EPILOGUE()
diff --git a/gmp/mpn/powerpc32/750/gmp-mparam.h b/gmp/mpn/powerpc32/750/gmp-mparam.h
index 3667e8596d..448f2676df 100644
--- a/gmp/mpn/powerpc32/750/gmp-mparam.h
+++ b/gmp/mpn/powerpc32/750/gmp-mparam.h
@@ -1,35 +1,24 @@
 /* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2002, 2004, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2002, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* This file is used for 75x (G3) and for 7400/7410 (G4), both which have
@@ -37,156 +26,49 @@ see https://www.gnu.org/licenses/.  */
 
 /* 450 MHz PPC 7400 */
 
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        18
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     38
-#define USE_PREINV_DIVREM_1                  1
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                10
-#define MUL_TOOM33_THRESHOLD                38
-#define MUL_TOOM44_THRESHOLD                99
-#define MUL_TOOM6H_THRESHOLD               141
-#define MUL_TOOM8H_THRESHOLD               212
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
-
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 18
-#define SQR_TOOM3_THRESHOLD                 57
-#define SQR_TOOM4_THRESHOLD                142
-#define SQR_TOOM6_THRESHOLD                173
-#define SQR_TOOM8_THRESHOLD                309
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD               11
-
-#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    220, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {      8, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
-    {      7, 7}, {     19, 8}, {     11, 7}, {     23, 9}, \
-    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 9}, {     15, 8}, {     39, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
-    {     67, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
-    {    127, 7}, {    255, 9}, {     71, 8}, {    143, 7}, \
-    {    287, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
-    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
-    {    319, 9}, {    175, 8}, {    351, 7}, {    703,10}, \
-    {     95, 9}, {    191, 8}, {    383, 9}, {    207,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
-    {    319,10}, {    175, 9}, {    351, 8}, {    703,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415, 8}, {    831,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    351, 9}, \
-    {    703, 8}, {   1407,11}, {    191,10}, {    415, 9}, \
-    {    831,11}, {    223,10}, {    447, 9}, {    895,12}, \
-    {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
-    {    575,11}, {    351,10}, {    703, 9}, {   1407,12}, \
-    {    191,11}, {    415,10}, {    831,11}, {    447,10}, \
-    {    895,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,11}, {    575,12}, {    319,11}, {    703,10}, \
-    {   1407,12}, {    383,11}, {    831,12}, {    447,11}, \
-    {    895,10}, {   1791,11}, {    959,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
-    {    703,11}, {   1407,13}, {    383,12}, {    895,11}, \
-    {   1791,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1215,13}, {    639,12}, {   1407,13}, {    895,12}, \
-    {   1919,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1151,12}, {   2303,13}, {   1407,14}, {    767,13}, \
-    {   1919,10}, {  15359,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 154
-#define MUL_FFT_THRESHOLD                 2688
-
-#define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    184, 5}, {      6, 4}, {     13, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
-    {     31, 8}, {     19, 7}, {     39, 8}, {     27, 9}, \
-    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
-    {    127, 7}, {    255, 9}, {     71, 8}, {    143, 7}, \
-    {    287, 9}, {     79, 8}, {    159,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    143, 8}, {    287, 7}, {    575,10}, \
-    {     79, 9}, {    159, 8}, {    319, 9}, {    175, 8}, \
-    {    351,10}, {     95, 9}, {    191, 8}, {    383, 9}, \
-    {    207,10}, {    111,11}, {     63,10}, {    127, 9}, \
-    {    255,10}, {    143, 9}, {    287, 8}, {    575,10}, \
-    {    159, 9}, {    319,10}, {    175, 9}, {    351,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415, 8}, {    831,10}, {    223,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447, 9}, {    895,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    287,10}, {    575,11}, {    319,10}, \
-    {    639,11}, {    351,10}, {    703, 9}, {   1407,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,11}, {    447,10}, {    895,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    575,12}, \
-    {    319,11}, {    703,10}, {   1407,12}, {    383,11}, \
-    {    831,12}, {    447,11}, {    895,10}, {   1791,11}, \
-    {    959,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    575,11}, {   1215,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    895,11}, {   1791,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1215,13}, {    639,12}, \
-    {   1471,13}, {    767,12}, {   1535,13}, {    895,12}, \
-    {   1919,14}, {    511,13}, {   1151,12}, {   2431,13}, \
-    {   1407,14}, {    767,13}, {   1919,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 152
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             5240
-
-#define DC_DIV_QR_THRESHOLD                 31
-#define DC_DIVAPPR_Q_THRESHOLD             108
-#define DC_BDIV_QR_THRESHOLD                35
-#define DC_BDIV_Q_THRESHOLD                 88
-
-#define INV_MULMOD_BNM1_THRESHOLD           42
-#define INV_NEWTON_THRESHOLD               149
-#define INV_APPR_THRESHOLD                 125
-
-#define BINV_NEWTON_THRESHOLD              156
-#define REDC_1_TO_REDC_N_THRESHOLD          39
-
-#define MU_DIV_QR_THRESHOLD                807
-#define MU_DIVAPPR_Q_THRESHOLD             807
-#define MUPI_DIV_QR_THRESHOLD               66
-#define MU_BDIV_QR_THRESHOLD               667
-#define MU_BDIV_Q_THRESHOLD                807
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                      87
-#define GCD_DC_THRESHOLD                   233
-#define GCDEXT_DC_THRESHOLD                198
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               390
-#define SET_STR_PRECOMPUTE_THRESHOLD       814
+/* Generated by tuneup.c, 2008-12-23, gcc 4.0 */
+
+#define MUL_KARATSUBA_THRESHOLD          10
+#define MUL_TOOM3_THRESHOLD              41
+#define MUL_TOOM44_THRESHOLD             88
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_KARATSUBA_THRESHOLD          18
+#define SQR_TOOM3_THRESHOLD              57
+#define SQR_TOOM4_THRESHOLD              88
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              32
+#define MULLOW_MUL_N_THRESHOLD          194
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 33
+#define POWM_THRESHOLD                   48
+
+#define MATRIX22_STRASSEN_THRESHOLD      13
+#define HGCD_THRESHOLD                   91
+#define GCD_DC_THRESHOLD                256
+#define GCDEXT_DC_THRESHOLD             256
+#define JACOBI_BASE_METHOD                1
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             13
+#define GET_STR_PRECOMPUTE_THRESHOLD     27
+#define SET_STR_DC_THRESHOLD            390
+#define SET_STR_PRECOMPUTE_THRESHOLD    814
+
+#define MUL_FFT_TABLE  { 240, 608, 896, 2560, 6144, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          232
+#define MUL_FFT_THRESHOLD              1792
+
+#define SQR_FFT_TABLE  { 240, 544, 896, 2560, 6144, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          216
+#define SQR_FFT_THRESHOLD              1792
diff --git a/gmp/mpn/powerpc32/750/lshift.asm b/gmp/mpn/powerpc32/750/lshift.asm
index 3a1c1a7212..9298793f27 100644
--- a/gmp/mpn/powerpc32/750/lshift.asm
+++ b/gmp/mpn/powerpc32/750/lshift.asm
@@ -3,30 +3,19 @@ dnl  PowerPC 750 mpn_lshift -- mpn left shift.
 dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/750/rshift.asm b/gmp/mpn/powerpc32/750/rshift.asm
index 4825fee618..944e8690a7 100644
--- a/gmp/mpn/powerpc32/750/rshift.asm
+++ b/gmp/mpn/powerpc32/750/rshift.asm
@@ -3,30 +3,19 @@ dnl  PowerPC 750 mpn_rshift -- mpn right shift.
 dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/README b/gmp/mpn/powerpc32/README
index 887e78b290..43aca466c2 100644
--- a/gmp/mpn/powerpc32/README
+++ b/gmp/mpn/powerpc32/README
@@ -3,28 +3,17 @@ Copyright 2002, 2005 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/powerpc32/addlsh1_n.asm b/gmp/mpn/powerpc32/addlsh1_n.asm
index 71645c3ec3..db627a0a31 100644
--- a/gmp/mpn/powerpc32/addlsh1_n.asm
+++ b/gmp/mpn/powerpc32/addlsh1_n.asm
@@ -3,30 +3,19 @@ dnl  PowerPC-32 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
 dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/addmul_1.asm b/gmp/mpn/powerpc32/addmul_1.asm
index 7f47ab2ce7..6260691b34 100644
--- a/gmp/mpn/powerpc32/addmul_1.asm
+++ b/gmp/mpn/powerpc32/addmul_1.asm
@@ -1,33 +1,23 @@
 dnl  PowerPC-32 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
 dnl  result to a second limb vector.
 
-dnl  Copyright 1995, 1997, 1998, 2000-2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1995, 1997, 1998, 2000, 2001, 2002, 2003, 2005 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/aix.m4 b/gmp/mpn/powerpc32/aix.m4
index fde20200b2..81199c78d4 100644
--- a/gmp/mpn/powerpc32/aix.m4
+++ b/gmp/mpn/powerpc32/aix.m4
@@ -1,33 +1,22 @@
 divert(-1)
 dnl  m4 macros for AIX 32-bit assembly.
 
-dnl  Copyright 2000-2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`ASM_START',
 `	.toc')
diff --git a/gmp/mpn/powerpc32/aors_n.asm b/gmp/mpn/powerpc32/aors_n.asm
index 25ece0966e..f9e9b50d52 100644
--- a/gmp/mpn/powerpc32/aors_n.asm
+++ b/gmp/mpn/powerpc32/aors_n.asm
@@ -3,44 +3,30 @@ dnl  PowerPC-32 mpn_add_n and mpn_sub_n.
 dnl  Copyright 2002, 2005, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                   cycles/limb
-C 603e:                  ?
-C 604e:                  ?		old: 3.25
-C 75x (G3):              ?		old: 3.5
-C 7400,7410 (G4):        3.25
-C 744x,745x (G4+):       4
-C POWER3/PPC630          2
-C POWER4/PPC970          2.4
-C POWER5                 2.75
-C POWER6               40-140
-C POWER7                 3
+C                cycles/limb
+C 603e:              ?
+C 604e:              ?		old: 3.25
+C 75x (G3):          ?		old: 3.5
+C 7400,7410 (G4):    3.25
+C 744x,745x (G4+):   4
+C power4/ppc970:     ?		old: 2.0
+C power5:            ?		old: 2.5
 
 C INPUT PARAMETERS
 define(`rp',	`r3')
diff --git a/gmp/mpn/powerpc32/bdiv_dbm1c.asm b/gmp/mpn/powerpc32/bdiv_dbm1c.asm
index 72b2c482e4..41870fbe8a 100644
--- a/gmp/mpn/powerpc32/bdiv_dbm1c.asm
+++ b/gmp/mpn/powerpc32/bdiv_dbm1c.asm
@@ -3,30 +3,19 @@ dnl  PPC32 mpn_bdiv_dbm1c.
 dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/darwin.m4 b/gmp/mpn/powerpc32/darwin.m4
index db4226800b..b76103a8ca 100644
--- a/gmp/mpn/powerpc32/darwin.m4
+++ b/gmp/mpn/powerpc32/darwin.m4
@@ -2,44 +2,31 @@ divert(-1)
 dnl  m4 macros for Mac OS 32-bit assembly.
 
 dnl  Copyright 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`ASM_START',`')
 
-dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
 dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
 dnl
 
 define(`PROLOGUE_cpu',
-m4_assert_numargs_range(1,2)
-`ifelse(`$2',toc,,
-`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
-	.text
+m4_assert_numargs(1)
+`	.text
 	.globl	$1
 	.align	3
 $1:')
diff --git a/gmp/mpn/powerpc32/diveby3.asm b/gmp/mpn/powerpc32/diveby3.asm
index 288a7d30ac..cf11a19824 100644
--- a/gmp/mpn/powerpc32/diveby3.asm
+++ b/gmp/mpn/powerpc32/diveby3.asm
@@ -1,32 +1,21 @@
 dnl  PowerPC-32 mpn_divexact_by3 -- mpn by 3 exact division
 
 dnl  Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/divrem_2.asm b/gmp/mpn/powerpc32/divrem_2.asm
index c6e64efe23..3261cbd727 100644
--- a/gmp/mpn/powerpc32/divrem_2.asm
+++ b/gmp/mpn/powerpc32/divrem_2.asm
@@ -1,32 +1,21 @@
 dnl  PPC-32 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
 
-dnl  Copyright 2007, 2008, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -95,9 +84,9 @@ C Compute di from d1
 	bge-	cr7, L(9)
 	add	r0, r0, r10
 	cmplw	cr7, r0, r10
-	cmplw	cr6, r6, r0
+	cmplw	cr6, r0, r6
 	addi	r31, r31, -1		C q1--
-	crorc	28, 28, 25
+	cror	28, 28, 25
 	bc+	12, 28, L(9)
 	addi	r31, r31, -1		C q1--
 	add	r0, r0, r10
@@ -112,9 +101,9 @@ L(9):	subf	r0, r6, r0
 	bge-	cr7, L(13)
 	add	r0, r0, r10
 	cmplw	cr7, r0, r10
-	cmplw	cr6, r11, r0
+	cmplw	cr6, r0, r11
 	addi	r6, r6, -1		C q0--
-	crorc	28, 28, 25
+	cror	28, 28, 25
 	bc+	12, 28, L(13)
 C	add	r0, r0, r10		C final remainder
 	addi	r6, r6, -1		C q0--
diff --git a/gmp/mpn/powerpc32/eabi.m4 b/gmp/mpn/powerpc32/eabi.m4
index cd7633c633..20f9a2f327 100644
--- a/gmp/mpn/powerpc32/eabi.m4
+++ b/gmp/mpn/powerpc32/eabi.m4
@@ -2,32 +2,21 @@ divert(-1)
 dnl  m4 macros for powerpc32 eABI assembly.
 
 dnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`ASM_START',`')
 
diff --git a/gmp/mpn/powerpc32/elf.m4 b/gmp/mpn/powerpc32/elf.m4
index a64a1271ff..ab1559ebd4 100644
--- a/gmp/mpn/powerpc32/elf.m4
+++ b/gmp/mpn/powerpc32/elf.m4
@@ -2,43 +2,31 @@ divert(-1)
 dnl  m4 macros for powerpc32 GNU/Linux assembly.
 
 dnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`ASM_START',`')
 
-dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
 dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
 dnl
 
 define(`PROLOGUE_cpu',
-m4_assert_numargs_range(1,2)
-`ifelse(`$2',toc,,
-`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
+m4_assert_numargs(1)
+	`
 	.section	".text"
 	.align	3
 	.globl	$1
diff --git a/gmp/mpn/powerpc32/gmp-mparam.h b/gmp/mpn/powerpc32/gmp-mparam.h
index 784a6d7b74..1676317a40 100644
--- a/gmp/mpn/powerpc32/gmp-mparam.h
+++ b/gmp/mpn/powerpc32/gmp-mparam.h
@@ -1,36 +1,25 @@
 /* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* This file is supposed to be used for 604, 604e, 744x/745x/747x (G4+), i.e.,
@@ -42,176 +31,53 @@ see https://www.gnu.org/licenses/.  */
    7400/7410 (G4), both which have much slower multiply instructions.  */
 
 /* 1417 MHz PPC 7447A */
-/* FFT tuning limit = 12500000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        49
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD            1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           69
-
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                73
-#define MUL_TOOM44_THRESHOLD               106
-#define MUL_TOOM6H_THRESHOLD               156
-#define MUL_TOOM8H_THRESHOLD               236
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      71
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      82
-
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 22
-#define SQR_TOOM3_THRESHOLD                 74
-#define SQR_TOOM4_THRESHOLD                130
-#define SQR_TOOM6_THRESHOLD                189
-#define SQR_TOOM8_THRESHOLD                284
-
-#define MULMID_TOOM42_THRESHOLD             32
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD               14
-
-#define MUL_FFT_MODF_THRESHOLD             284  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    284, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
-    {     33, 8}, {     19, 7}, {     39, 8}, {     23, 7}, \
-    {     47, 8}, {     27, 9}, {     15, 8}, {     39, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95,10}, {     31, 9}, {     71, 8}, {    143, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255, 9}, {    135, 8}, \
-    {    271, 9}, {    143,10}, {     79, 9}, {    159, 8}, \
-    {    319, 9}, {    175,10}, {     95, 9}, {    191, 8}, \
-    {    383, 9}, {    207, 8}, {    415,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
-    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207, 9}, {    415, 8}, {    831,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543, 8}, {   1087,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    415, 9}, \
-    {    831,11}, {    223,10}, {    447, 9}, {    895,10}, \
-    {    479, 9}, {    959,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    575,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,10}, {    831,11}, {    447,10}, {    895,11}, \
-    {    479,10}, {    959,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,12}, {    319,11}, \
-    {    639,10}, {   1279,11}, {    703,10}, {   1407,12}, \
-    {    383,11}, {    831,12}, {    447,11}, {    959,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,10}, {   2431,12}, {    639,11}, {   1279,12}, \
-    {    703,11}, {   1407,13}, {    383,12}, {    895,11}, \
-    {   1791,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1215,11}, {   2431,13}, {    639,12}, {   1471,13}, \
-    {    767,12}, {   1599,13}, {    895,12}, {   1919,14}, \
-    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
-    {   2431,13}, {   1407,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD                 3712
-
-#define SQR_FFT_MODF_THRESHOLD             248  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    248, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
-    {     33, 8}, {     19, 7}, {     39, 8}, {     27, 9}, \
-    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95,10}, {     31, 9}, \
-    {     63, 8}, {    127, 9}, {     71, 8}, {    143, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255, 7}, {    511, 9}, \
-    {    143,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
-    {    175, 8}, {    351,10}, {     95, 9}, {    191, 8}, \
-    {    383, 9}, {    207, 8}, {    415, 7}, {    831,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
-    {    319,10}, {    175, 9}, {    351,11}, {     95,10}, \
-    {    191, 9}, {    383,10}, {    207, 9}, {    415, 8}, \
-    {    831,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447, 9}, {    895,12}, {    127,11}, {    255,10}, \
-    {    543,11}, {    287,10}, {    607,11}, {    319,10}, \
-    {    639,11}, {    351,10}, {    703, 9}, {   1407,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,11}, {    447,10}, {    895,11}, {    479,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,12}, {    319,11}, {    639,10}, {   1279,11}, \
-    {    703,10}, {   1407,12}, {    383,11}, {    831,12}, \
-    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
-    {   1279,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,13}, {    767,12}, {   1599,13}, \
-    {    895,12}, {   1919,14}, {    511,13}, {   1023,12}, \
-    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 154
-#define SQR_FFT_THRESHOLD                 2688
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  45
-#define MULLO_MUL_N_THRESHOLD             6633
-
-#define DC_DIV_QR_THRESHOLD                 44
-#define DC_DIVAPPR_Q_THRESHOLD             142
-#define DC_BDIV_QR_THRESHOLD                54
-#define DC_BDIV_Q_THRESHOLD                124
-
-#define INV_MULMOD_BNM1_THRESHOLD           43
-#define INV_NEWTON_THRESHOLD               179
-#define INV_APPR_THRESHOLD                 157
-
-#define BINV_NEWTON_THRESHOLD              214
-#define REDC_1_TO_REDC_N_THRESHOLD          55
-
-#define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD            1078
-#define MUPI_DIV_QR_THRESHOLD               84
-#define MU_BDIV_QR_THRESHOLD               872
-#define MU_BDIV_Q_THRESHOLD               1078
-
-#define POWM_SEC_TABLE  1,19,102,428,1378
-
-#define MATRIX22_STRASSEN_THRESHOLD         12
-#define HGCD_THRESHOLD                     120
-#define HGCD_APPR_THRESHOLD                166
-#define HGCD_REDUCE_THRESHOLD             1679
-#define GCD_DC_THRESHOLD                   339
-#define GCDEXT_DC_THRESHOLD                273
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               781
-#define SET_STR_PRECOMPUTE_THRESHOLD      1505
-
-#define FAC_DSC_THRESHOLD                  141
-#define FAC_ODD_THRESHOLD                   29
+
+/* Generated by tuneup.c, 2009-01-14, gcc 4.3 */
+
+#define MUL_KARATSUBA_THRESHOLD          14
+#define MUL_TOOM3_THRESHOLD              73
+#define MUL_TOOM44_THRESHOLD            106
+
+#define SQR_BASECASE_THRESHOLD            4
+#define SQR_KARATSUBA_THRESHOLD          24
+#define SQR_TOOM3_THRESHOLD              77
+#define SQR_TOOM4_THRESHOLD             130
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              52
+#define MULLOW_MUL_N_THRESHOLD          292
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 46
+#define POWM_THRESHOLD                   87
+
+#define MATRIX22_STRASSEN_THRESHOLD      15
+#define HGCD_THRESHOLD                  127
+#define GCD_DC_THRESHOLD                361
+#define GCDEXT_DC_THRESHOLD             382
+#define JACOBI_BASE_METHOD                1
+
+#define DIVREM_1_NORM_THRESHOLD           0  /* always */
+#define DIVREM_1_UNNORM_THRESHOLD         0  /* always */
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 7
+#define MOD_1_2_THRESHOLD                21
+#define MOD_1_4_THRESHOLD                68
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             22
+#define GET_STR_PRECOMPUTE_THRESHOLD     42
+#define SET_STR_DC_THRESHOLD            788
+#define SET_STR_PRECOMPUTE_THRESHOLD   1554
+
+#define MUL_FFT_TABLE  { 304, 672, 1152, 2560, 6144, 24576, 0 }
+#define MUL_FFT_MODF_THRESHOLD          320
+#define MUL_FFT_THRESHOLD              2816
+
+#define SQR_FFT_TABLE  { 272, 672, 1152, 2560, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          288
+#define SQR_FFT_THRESHOLD              2304
diff --git a/gmp/mpn/powerpc32/invert_limb.asm b/gmp/mpn/powerpc32/invert_limb.asm
deleted file mode 100644
index 612bfe523c..0000000000
--- a/gmp/mpn/powerpc32/invert_limb.asm
+++ /dev/null
@@ -1,142 +0,0 @@
-dnl  PowerPC-32 mpn_invert_limb -- Invert a normalized limb.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		 cycles/limb
-C 603e:		      ?
-C 604e:		      ?
-C 75x (G3):	      ?
-C 7400,7410 (G4):     ?
-C 744x,745x (G4+):   32
-C power4/ppc970:      ?
-C power5:	      ?
-
-EXTERN(approx_tab)
-
-ASM_START()
-PROLOGUE(mpn_invert_limb)
-	rlwinm	r6, r3, 11, 22, 30	C extract bits 30..22 to pos 2^1
-	srwi	r10, r3, 11		C extract bits 31..11
-	LEA(	r9, approx_tab)		C N.B. clobbers r0 for ELF and Darwin
-	lhzx	r9, r9, r6		C w2
-	addi	r0, r10, 1
-	mullw	r11, r9, r9
-	slwi	r9, r9, 4
-	mulhwu	r7, r11, r0
-	rlwinm	r11, r3, 0, 31, 31	C extract bit 0
-	addi	r0, r9, -1
-	srwi	r9, r3, 1		C d >> 1
-	subf	r0, r7, r0		C w1
-	add	r9, r9, r11		C d31
-	mullw	r9, r0, r9		C w1 * d31
-	srwi	r10, r0, 1		C w1 >> 1
-	neg	r11, r11
-	and	r11, r10, r11
-	subf	r11, r9, r11
-	mulhwu	r9, r11, r0
-	slwi	r0, r0, 15
-	srwi	r9, r9, 1
-	add	r0, r9, r0		C w0
-	mullw	r10, r0, r3
-	mulhwu	r9, r0, r3
-	addc	r11, r10, r3
-	adde	r3, r9, r3
-	subf	r3, r3, r0
-	blr
-EPILOGUE()
-
-DEF_OBJECT(approx_tab)
-	.short 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
-	.short 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
-	.short 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
-	.short 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
-	.short 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
-	.short 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
-	.short 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
-	.short 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
-	.short 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
-	.short 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
-	.short 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
-	.short 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
-	.short 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
-	.short 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
-	.short 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
-	.short 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
-	.short 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
-	.short 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
-	.short 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
-	.short 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
-	.short 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
-	.short 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
-	.short 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
-	.short 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
-	.short 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
-	.short 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
-	.short 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
-	.short 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
-	.short 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
-	.short 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
-	.short 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
-	.short 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
-	.short 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
-	.short 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
-	.short 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
-	.short 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
-	.short 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
-	.short 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
-	.short 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
-	.short 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
-	.short 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
-	.short 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
-	.short 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
-	.short 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
-	.short 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
-	.short 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
-	.short 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
-	.short 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
-	.short 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
-	.short 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
-	.short 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
-	.short 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
-	.short 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
-	.short 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
-	.short 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
-	.short 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
-	.short 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
-	.short 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
-	.short 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
-	.short 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
-	.short 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
-	.short 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
-	.short 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
-	.short 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
-END_OBJECT(approx_tab)
-ASM_END()
diff --git a/gmp/mpn/powerpc32/lshift.asm b/gmp/mpn/powerpc32/lshift.asm
index 948f8c6cf3..e306173146 100644
--- a/gmp/mpn/powerpc32/lshift.asm
+++ b/gmp/mpn/powerpc32/lshift.asm
@@ -1,32 +1,22 @@
 dnl  PowerPC-32 mpn_lshift -- Shift a number left.
 
-dnl  Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
+dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -48,7 +38,7 @@ C cnt	r6
 
 ASM_START()
 PROLOGUE(mpn_lshift)
-	cmpwi	cr0, r5, 30	C more than 30 limbs?
+	cmpwi	cr0, r5, 12	C more than 12 limbs?
 	slwi	r0, r5, 2
 	add	r4, r4, r0	C make r4 point at end of s1
 	add	r7, r3, r0	C make r7 point at end of res
@@ -163,4 +153,4 @@ L(loopU):
 	stw	r12, -20(r7)
 	lmw	r24, -32(r1)	C restore registers
 	blr
-EPILOGUE()
+EPILOGUE(mpn_lshift)
diff --git a/gmp/mpn/powerpc32/lshiftc.asm b/gmp/mpn/powerpc32/lshiftc.asm
deleted file mode 100644
index 61606d1b66..0000000000
--- a/gmp/mpn/powerpc32/lshiftc.asm
+++ /dev/null
@@ -1,168 +0,0 @@
-dnl  PowerPC-32 mpn_lshiftc.
-
-dnl  Copyright 1995, 1998, 2000, 2002-2005, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                cycles/limb
-C 603e:            ?
-C 604e:            3.0
-C 75x (G3):        3.0
-C 7400,7410 (G4):  3.0
-C 7445,7455 (G4+): 2.5
-C 7447,7457 (G4+): 2.25
-C power4/ppc970:   2.5
-C power5:          2.5
-
-C INPUT PARAMETERS
-C rp	r3
-C up	r4
-C n	r5
-C cnt	r6
-
-ASM_START()
-PROLOGUE(mpn_lshiftc)
-	cmpwi	cr0, r5, 30	C more than 30 limbs?
-	slwi	r0, r5, 2
-	add	r4, r4, r0	C make r4 point at end of s1
-	add	r7, r3, r0	C make r7 point at end of res
-	bgt	L(BIG)		C branch if more than 12 limbs
-
-	mtctr	r5		C copy size into CTR
-	subfic	r8, r6, 32
-	lwzu	r11, -4(r4)	C load first s1 limb
-	srw	r3, r11, r8	C compute function return value
-	bdz	L(end1)
-
-L(oop):	lwzu	r10, -4(r4)
-	slw	r9, r11, r6
-	srw	r12, r10, r8
-	nor	r9, r9, r12
-	stwu	r9, -4(r7)
-	bdz	L(end2)
-	lwzu	r11, -4(r4)
-	slw	r9, r10, r6
-	srw	r12, r11, r8
-	nor	r9, r9, r12
-	stwu	r9, -4(r7)
-	bdnz	L(oop)
-
-L(end1):
-	slw	r0, r11, r6
-	nor	r0, r0, r0
-	stw	r0, -4(r7)
-	blr
-L(end2):
-	slw	r0, r10, r6
-	nor	r0, r0, r0
-	stw	r0, -4(r7)
-	blr
-
-L(BIG):
-	stmw	r24, -32(r1)	C save registers we are supposed to preserve
-	lwzu	r9, -4(r4)
-	subfic	r8, r6, 32
-	srw	r3, r9, r8	C compute function return value
-	slw	r0, r9, r6
-	addi	r5, r5, -1
-
-	andi.	r10, r5, 3	C count for spill loop
-	beq	L(e)
-	mtctr	r10
-	lwzu	r28, -4(r4)
-	bdz	L(xe0)
-
-L(loop0):
-	slw	r12, r28, r6
-	srw	r24, r28, r8
-	lwzu	r28, -4(r4)
-	nor	r24, r0, r24
-	stwu	r24, -4(r7)
-	mr	r0, r12
-	bdnz	L(loop0)	C taken at most once!
-
-L(xe0):	slw	r12, r28, r6
-	srw	r24, r28, r8
-	nor	r24, r0, r24
-	stwu	r24, -4(r7)
-	mr	r0, r12
-
-L(e):	srwi	r5, r5, 2	C count for unrolled loop
-	addi	r5, r5, -1
-	mtctr	r5
-	lwz	r28, -4(r4)
-	lwz	r29, -8(r4)
-	lwz	r30, -12(r4)
-	lwzu	r31, -16(r4)
-
-L(loopU):
-	slw	r9, r28, r6
-	srw	r24, r28, r8
-	lwz	r28, -4(r4)
-	slw	r10, r29, r6
-	srw	r25, r29, r8
-	lwz	r29, -8(r4)
-	slw	r11, r30, r6
-	srw	r26, r30, r8
-	lwz	r30, -12(r4)
-	slw	r12, r31, r6
-	srw	r27, r31, r8
-	lwzu	r31, -16(r4)
-	nor	r24, r0, r24
-	stw	r24, -4(r7)
-	nor	r25, r9, r25
-	stw	r25, -8(r7)
-	nor	r26, r10, r26
-	stw	r26, -12(r7)
-	nor	r27, r11, r27
-	stwu	r27, -16(r7)
-	mr	r0, r12
-	bdnz	L(loopU)
-
-	slw	r9, r28, r6
-	srw	r24, r28, r8
-	slw	r10, r29, r6
-	srw	r25, r29, r8
-	slw	r11, r30, r6
-	srw	r26, r30, r8
-	slw	r12, r31, r6
-	srw	r27, r31, r8
-	nor	r24, r0, r24
-	stw	r24, -4(r7)
-	nor	r25, r9, r25
-	stw	r25, -8(r7)
-	nor	r26, r10, r26
-	stw	r26, -12(r7)
-	nor	r27, r11, r27
-	stw	r27, -16(r7)
-	nor	r12, r12, r12
-	stw	r12, -20(r7)
-	lmw	r24, -32(r1)	C restore registers
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc32/mod_34lsub1.asm b/gmp/mpn/powerpc32/mod_34lsub1.asm
index 6d7fe4d089..fa0f0139ee 100644
--- a/gmp/mpn/powerpc32/mod_34lsub1.asm
+++ b/gmp/mpn/powerpc32/mod_34lsub1.asm
@@ -3,30 +3,19 @@ dnl  PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
 dnl  Copyright 2002, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/mode1o.asm b/gmp/mpn/powerpc32/mode1o.asm
index e8a6b5e28a..ba9a393b09 100644
--- a/gmp/mpn/powerpc32/mode1o.asm
+++ b/gmp/mpn/powerpc32/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  PowerPC-32 mpn_modexact_1_odd -- mpn by limb exact remainder.
 
 dnl  Copyright 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/mul_1.asm b/gmp/mpn/powerpc32/mul_1.asm
index e42087cfa8..e6f44e21d9 100644
--- a/gmp/mpn/powerpc32/mul_1.asm
+++ b/gmp/mpn/powerpc32/mul_1.asm
@@ -5,30 +5,19 @@ dnl  Copyright 1995, 1997, 2000, 2002, 2003, 2005 Free Software Foundation,
 dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/p3-p7/aors_n.asm b/gmp/mpn/powerpc32/p3-p7/aors_n.asm
deleted file mode 100644
index c44df8fa50..0000000000
--- a/gmp/mpn/powerpc32/p3-p7/aors_n.asm
+++ /dev/null
@@ -1,186 +0,0 @@
-dnl  PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-
-dnl  Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          1.5
-C POWER4/PPC970          2
-C POWER5                 2
-C POWER6                 2.78
-C POWER7               2.15-2.87
-
-C This code is based on powerpc64/aors_n.asm.
-
-C INPUT PARAMETERS
-C rp	r3
-C up	r4
-C vp	r5
-C n	r6
-
-ifdef(`OPERATION_add_n',`
-  define(ADDSUBC,	adde)
-  define(ADDSUB,	addc)
-  define(func,		mpn_add_n)
-  define(func_nc,	mpn_add_nc)
-  define(GENRVAL,	`addi	r3, r3, 1')
-  define(SETCBR,	`addic	r0, $1, -1')
-  define(CLRCB,		`addic	r0, r0, 0')
-')
-ifdef(`OPERATION_sub_n',`
-  define(ADDSUBC,	subfe)
-  define(ADDSUB,	subfc)
-  define(func,		mpn_sub_n)
-  define(func_nc,	mpn_sub_nc)
-  define(GENRVAL,	`neg	r3, r3')
-  define(SETCBR,	`subfic	r0, $1, 0')
-  define(CLRCB,		`addic	r0, r1, -1')
-')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-PROLOGUE(func_nc)
-	SETCBR(r7)
-	b	L(ent)
-EPILOGUE()
-
-PROLOGUE(func)
-	CLRCB
-L(ent):	stw	r31, -4(r1)
-	stw	r30, -8(r1)
-	stw	r29, -12(r1)
-	stw	r28, -16(r1)
-
-	rlwinm.	r0, r6, 0,30,31	C r0 = n & 3, set cr0
-	cmpwi	cr6, r0, 2
-	addi	r6, r6, 3	C compute count...
-	srwi	r6, r6, 2	C ...for ctr
-	mtctr	r6		C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	beq	cr6, L(b10)
-
-L(b11):	lwz	r8, 0(r4)	C load s1 limb
-	lwz	r9, 0(r5)	C load s2 limb
-	lwz	r10, 4(r4)	C load s1 limb
-	lwz	r11, 4(r5)	C load s2 limb
-	lwz	r12, 8(r4)	C load s1 limb
-	addi	r4, r4, 12
-	lwz	r0, 8(r5)	C load s2 limb
-	addi	r5, r5, 12
-	ADDSUBC	r29, r9, r8
-	ADDSUBC	r30, r11, r10
-	ADDSUBC	r31, r0, r12
-	stw	r29, 0(r3)
-	stw	r30, 4(r3)
-	stw	r31, 8(r3)
-	addi	r3, r3, 12
-	bdnz	L(go)
-	b	L(ret)
-
-L(b01):	lwz	r12, 0(r4)	C load s1 limb
-	addi	r4, r4, 4
-	lwz	r0, 0(r5)	C load s2 limb
-	addi	r5, r5, 4
-	ADDSUBC	r31, r0, r12	C add
-	stw	r31, 0(r3)
-	addi	r3, r3, 4
-	bdnz	L(go)
-	b	L(ret)
-
-L(b10):	lwz	r10, 0(r4)	C load s1 limb
-	lwz	r11, 0(r5)	C load s2 limb
-	lwz	r12, 4(r4)	C load s1 limb
-	addi	r4, r4, 8
-	lwz	r0, 4(r5)	C load s2 limb
-	addi	r5, r5, 8
-	ADDSUBC	r30, r11, r10	C add
-	ADDSUBC	r31, r0, r12	C add
-	stw	r30, 0(r3)
-	stw	r31, 4(r3)
-	addi	r3, r3, 8
-	bdnz	L(go)
-	b	L(ret)
-
-L(b00):	C INITCY		C clear/set cy
-L(go):	lwz	r6, 0(r4)	C load s1 limb
-	lwz	r7, 0(r5)	C load s2 limb
-	lwz	r8, 4(r4)	C load s1 limb
-	lwz	r9, 4(r5)	C load s2 limb
-	lwz	r10, 8(r4)	C load s1 limb
-	lwz	r11, 8(r5)	C load s2 limb
-	lwz	r12, 12(r4)	C load s1 limb
-	lwz	r0, 12(r5)	C load s2 limb
-	bdz	L(end)
-
-	addi	r4, r4, 16
-	addi	r5, r5, 16
-
-	ALIGN(16)
-L(top):	ADDSUBC	r28, r7, r6
-	lwz	r6, 0(r4)	C load s1 limb
-	lwz	r7, 0(r5)	C load s2 limb
-	ADDSUBC	r29, r9, r8
-	lwz	r8, 4(r4)	C load s1 limb
-	lwz	r9, 4(r5)	C load s2 limb
-	ADDSUBC	r30, r11, r10
-	lwz	r10, 8(r4)	C load s1 limb
-	lwz	r11, 8(r5)	C load s2 limb
-	ADDSUBC	r31, r0, r12
-	lwz	r12, 12(r4)	C load s1 limb
-	lwz	r0, 12(r5)	C load s2 limb
-	stw	r28, 0(r3)
-	addi	r4, r4, 16
-	stw	r29, 4(r3)
-	addi	r5, r5, 16
-	stw	r30, 8(r3)
-	stw	r31, 12(r3)
-	addi	r3, r3, 16
-	bdnz	L(top)		C decrement ctr and loop back
-
-L(end):	ADDSUBC	r28, r7, r6
-	ADDSUBC	r29, r9, r8
-	ADDSUBC	r30, r11, r10
-	ADDSUBC	r31, r0, r12
-	stw	r28, 0(r3)
-	stw	r29, 4(r3)
-	stw	r30, 8(r3)
-	stw	r31, 12(r3)
-
-L(ret):	lwz	r31, -4(r1)
-	lwz	r30, -8(r1)
-	lwz	r29, -12(r1)
-	lwz	r28, -16(r1)
-
-	subfe	r3, r0, r0	C -cy
-	GENRVAL
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc32/p3/gmp-mparam.h b/gmp/mpn/powerpc32/p3/gmp-mparam.h
deleted file mode 100644
index 33826956a2..0000000000
--- a/gmp/mpn/powerpc32/p3/gmp-mparam.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 450 MHz POWER3 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        18
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
-#define USE_PREINV_DIVREM_1                  1
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                10
-#define MUL_TOOM33_THRESHOLD                38
-#define MUL_TOOM44_THRESHOLD                58
-#define MUL_TOOM6H_THRESHOLD               129
-#define MUL_TOOM8H_THRESHOLD               212
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      63
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      59
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      64
-
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 14
-#define SQR_TOOM3_THRESHOLD                 53
-#define SQR_TOOM4_THRESHOLD                 76
-#define SQR_TOOM6_THRESHOLD                106
-#define SQR_TOOM8_THRESHOLD                284
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD                9
-
-#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    220, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
-    {     16, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     23, 9}, {      7, 8}, {     15, 7}, \
-    {     33, 8}, {     23, 9}, {     15, 8}, {     35, 9}, \
-    {     23,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
-    {     63, 8}, {    127, 9}, {     71, 8}, {    143, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 9}, {    143,10}, {     79, 9}, \
-    {    159, 8}, {    319, 9}, {    175, 8}, {    351,10}, \
-    {     95, 9}, {    191, 8}, {    383,10}, {    111,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287, 8}, {    575,10}, {    159, 9}, {    319,10}, \
-    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207, 9}, {    415,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    351, 9}, {    703, 8}, \
-    {   1407,11}, {    191,10}, {    415,11}, {    223,10}, \
-    {    447, 9}, {    895,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 82
-#define MUL_FFT_THRESHOLD                 2688
-
-#define SQR_FFT_MODF_THRESHOLD             176  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    176, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     13, 7}, {      7, 6}, {     16, 7}, {      9, 6}, \
-    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
-    {      7, 7}, {     19, 8}, {     11, 7}, {     23, 9}, \
-    {      7, 8}, {     15, 7}, {     31, 8}, {     23, 9}, \
-    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     71, 8}, {    143, 7}, {    287, 6}, \
-    {    575, 9}, {     79, 8}, {    159,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    143, 8}, {    287, 7}, {    575,10}, \
-    {     79, 9}, {    159, 8}, {    319, 9}, {    175,10}, \
-    {     95, 9}, {    191, 8}, {    383,10}, {    111, 9}, \
-    {    223,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
-    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    223,12}, {     63,11}, {    127,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    351, 9}, \
-    {    703, 8}, {   1407,11}, {    191,10}, {    383,11}, \
-    {    223,10}, {    447, 9}, {    895,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 87
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             5240
-
-#define DC_DIV_QR_THRESHOLD                 32
-#define DC_DIVAPPR_Q_THRESHOLD             123
-#define DC_BDIV_QR_THRESHOLD                34
-#define DC_BDIV_Q_THRESHOLD                 84
-
-#define INV_MULMOD_BNM1_THRESHOLD           42
-#define INV_NEWTON_THRESHOLD               129
-#define INV_APPR_THRESHOLD                 124
-
-#define BINV_NEWTON_THRESHOLD              148
-#define REDC_1_TO_REDC_N_THRESHOLD          38
-
-#define MU_DIV_QR_THRESHOLD                748
-#define MU_DIVAPPR_Q_THRESHOLD             748
-#define MUPI_DIV_QR_THRESHOLD               59
-#define MU_BDIV_QR_THRESHOLD               562
-#define MU_BDIV_Q_THRESHOLD                654
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                      76
-#define GCD_DC_THRESHOLD                   205
-#define GCDEXT_DC_THRESHOLD                174
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               181
-#define SET_STR_PRECOMPUTE_THRESHOLD       525
diff --git a/gmp/mpn/powerpc32/p4/gmp-mparam.h b/gmp/mpn/powerpc32/p4/gmp-mparam.h
deleted file mode 100644
index 20830a0bd7..0000000000
--- a/gmp/mpn/powerpc32/p4/gmp-mparam.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2011, 2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* 1800 MHz PowerPC-970 */
-/* FFT tuning limit = 10000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.0 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        42
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD            1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           45
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                73
-#define MUL_TOOM44_THRESHOLD               130
-#define MUL_TOOM6H_THRESHOLD               222
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     107
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     108
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      92
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     100
-
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 85
-#define SQR_TOOM4_THRESHOLD                160
-#define SQR_TOOM6_THRESHOLD                197
-#define SQR_TOOM8_THRESHOLD                357
-
-#define MULMID_TOOM42_THRESHOLD             32
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               16
-
-#define MUL_FFT_MODF_THRESHOLD             444  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    444, 5}, {     17, 6}, {      9, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     24, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
-    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    167,10}, {     95, 9}, {    191, 8}, {    383,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575, 9}, \
-    {    303,10}, {    159, 9}, {    319,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    335, 9}, {    671, 8}, {   1343,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447,12}, {    127,11}, {    255,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    671, 9}, {   1343,11}, {    351,10}, \
-    {    703, 9}, {   1407,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,10}, {   1215,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    703,10}, {   1407,11}, {    735,12}, \
-    {    383,11}, {    767,10}, {   1535,11}, {    831,12}, \
-    {    447,10}, {   1791,11}, {    959,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,10}, \
-    {   2431,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    767,11}, {   1535,12}, \
-    {    831,11}, {   1727,10}, {   3455,11}, {   1791,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,13}, {    767,12}, \
-    {   1727,11}, {   3455,12}, {   1791,14}, {    511,13}, \
-    {   1151,12}, {   2431,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 157
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    340, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
-    {     28, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    135,10}, {     79, 9}, {    159, 8}, \
-    {    319,10}, {     95, 9}, {    191, 8}, {    383, 9}, \
-    {    207,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
-    {    575, 9}, {    303, 8}, {    607,10}, {    159, 9}, \
-    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    575,10}, {    303, 9}, \
-    {    607,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    335, 9}, {    671,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    415, 9}, \
-    {    831,11}, {    223,10}, {    447,12}, {    127,11}, \
-    {    255,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    607, 9}, {   1215,11}, {    319,10}, {    671,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,11}, {    479,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,10}, {   1215,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    703,10}, {   1407,11}, {    735,12}, \
-    {    383,11}, {    831,12}, {    447,11}, {    959,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    831,11}, {   1727,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,13}, {    767,12}, {   1727,13}, \
-    {    895,12}, {   1919,14}, {    511,13}, {   1023,12}, \
-    {   2111,13}, {   1151,12}, {   2431,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 150
-#define SQR_FFT_THRESHOLD                 4736
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  55
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 50
-#define DC_DIVAPPR_Q_THRESHOLD             196
-#define DC_BDIV_QR_THRESHOLD                51
-#define DC_BDIV_Q_THRESHOLD                166
-
-#define INV_MULMOD_BNM1_THRESHOLD           50
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 202
-
-#define BINV_NEWTON_THRESHOLD              228
-#define REDC_1_TO_REDC_N_THRESHOLD          67
-
-#define MU_DIV_QR_THRESHOLD               1187
-#define MU_DIVAPPR_Q_THRESHOLD            1308
-#define MUPI_DIV_QR_THRESHOLD              114
-#define MU_BDIV_QR_THRESHOLD               998
-#define MU_BDIV_Q_THRESHOLD               1142
-
-#define POWM_SEC_TABLE  3,28,78,480,1099
-
-#define MATRIX22_STRASSEN_THRESHOLD          9
-#define HGCD_THRESHOLD                      93
-#define HGCD_APPR_THRESHOLD                109
-#define HGCD_REDUCE_THRESHOLD             2479
-#define GCD_DC_THRESHOLD                   379
-#define GCDEXT_DC_THRESHOLD                273
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               381
-#define SET_STR_PRECOMPUTE_THRESHOLD      1002
-
-#define FAC_DSC_THRESHOLD                  179
-#define FAC_ODD_THRESHOLD                   28
diff --git a/gmp/mpn/powerpc32/p5/gmp-mparam.h b/gmp/mpn/powerpc32/p5/gmp-mparam.h
deleted file mode 100644
index faa1e81da4..0000000000
--- a/gmp/mpn/powerpc32/p5/gmp-mparam.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1650 MHz POWER5 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        50
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     18
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           61
-
-#define MUL_TOOM22_THRESHOLD                22
-#define MUL_TOOM33_THRESHOLD                57
-#define MUL_TOOM44_THRESHOLD               130
-#define MUL_TOOM6H_THRESHOLD               189
-#define MUL_TOOM8H_THRESHOLD               309
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      83
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
-
-#define SQR_BASECASE_THRESHOLD               6
-#define SQR_TOOM2_THRESHOLD                 40
-#define SQR_TOOM3_THRESHOLD                 77
-#define SQR_TOOM4_THRESHOLD                124
-#define SQR_TOOM6_THRESHOLD                140
-#define SQR_TOOM8_THRESHOLD                238
-
-#define MULMID_TOOM42_THRESHOLD             40
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               16
-
-#define POWM_SEC_TABLE  4,29,252,840,2080
-
-#define MUL_FFT_MODF_THRESHOLD             412  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    412, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     21, 8}, \
-    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
-    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     55,10}, {     31, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
-    {     95,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
-    {    159,10}, {    335, 9}, {    671,10}, {    351, 9}, \
-    {    703,11}, {    191,10}, {    383, 9}, {    767,10}, \
-    {    415, 9}, {    831,11}, {    223,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 71
-#define MUL_FFT_THRESHOLD                 4736
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    340, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
-    {     27, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     47,10}, {     31, 9}, \
-    {     71,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
-    {    143, 9}, {    287, 8}, {    575, 9}, {    303,10}, \
-    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
-    {    671,10}, {    351,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    415,11}, {    223,10}, {    447,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 76
-#define SQR_FFT_THRESHOLD                 3712
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  68
-#define MULLO_MUL_N_THRESHOLD             9236
-
-#define DC_DIV_QR_THRESHOLD                 69
-#define DC_DIVAPPR_Q_THRESHOLD             220
-#define DC_BDIV_QR_THRESHOLD                75
-#define DC_BDIV_Q_THRESHOLD                188
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD               230
-#define INV_APPR_THRESHOLD                 230
-
-#define BINV_NEWTON_THRESHOLD              278
-#define REDC_1_TO_REDC_N_THRESHOLD          87
-
-#define MU_DIV_QR_THRESHOLD               1210
-#define MU_DIVAPPR_Q_THRESHOLD            1308
-#define MUPI_DIV_QR_THRESHOLD              106
-#define MU_BDIV_QR_THRESHOLD              1017
-#define MU_BDIV_Q_THRESHOLD               1210
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                     110
-#define HGCD_APPR_THRESHOLD                138
-#define HGCD_REDUCE_THRESHOLD             2578
-#define GCD_DC_THRESHOLD                   408
-#define GCDEXT_DC_THRESHOLD                298
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               527
-#define SET_STR_PRECOMPUTE_THRESHOLD      1090
diff --git a/gmp/mpn/powerpc32/p6/gmp-mparam.h b/gmp/mpn/powerpc32/p6/gmp-mparam.h
deleted file mode 100644
index c9504b63b3..0000000000
--- a/gmp/mpn/powerpc32/p6/gmp-mparam.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 3500 MHz POWER6 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                19
-#define MUL_TOOM33_THRESHOLD                55
-#define MUL_TOOM44_THRESHOLD                88
-#define MUL_TOOM6H_THRESHOLD               137
-#define MUL_TOOM8H_THRESHOLD               181
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      57
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      56
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      57
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
-
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 56
-#define SQR_TOOM4_THRESHOLD                130
-#define SQR_TOOM6_THRESHOLD                189
-#define SQR_TOOM8_THRESHOLD                296
-
-#define MULMID_TOOM42_THRESHOLD             26
-
-#define MULMOD_BNM1_THRESHOLD                7
-#define SQRMOD_BNM1_THRESHOLD               12
-
-#define POWM_SEC_TABLE  2,26,127,453,1068
-
-#define MUL_FFT_MODF_THRESHOLD             212  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    212, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     13, 7}, {      7, 6}, {     16, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
-    {     31, 8}, {     19, 7}, {     39, 8}, {     23, 9}, \
-    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     63, 9}, {     39, 8}, \
-    {     79, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     71, 8}, {    143, 7}, {    287, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 7}, {    511, 9}, {    143, 8}, \
-    {    287,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
-    {    175, 8}, {    351,10}, {     95, 9}, {    191, 8}, \
-    {    383, 9}, {    207,10}, {    111,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511,10}, {    143, 9}, \
-    {    287, 8}, {    575,10}, {    159, 9}, {    319,10}, \
-    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207, 9}, {    415,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 89
-#define MUL_FFT_THRESHOLD                 1728
-
-#define SQR_FFT_MODF_THRESHOLD             184  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    184, 5}, {      6, 4}, {     13, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     23, 9}, {      7, 8}, {     23, 9}, \
-    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     63, 8}, {    127, 7}, \
-    {    255, 9}, {     71, 8}, {    143, 7}, {    287, 6}, \
-    {    575, 9}, {     79,10}, {     47,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255, 9}, {    143, 8}, \
-    {    287, 7}, {    575,10}, {     79, 9}, {    159, 8}, \
-    {    319, 9}, {    175, 8}, {    351,10}, {     95, 9}, \
-    {    191, 8}, {    383, 9}, {    207,10}, {    111, 9}, \
-    {    223,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159, 9}, \
-    {    319,10}, {    175, 9}, {    351,11}, {     95,10}, \
-    {    191, 9}, {    383,10}, {    207, 9}, {    415,10}, \
-    {    223,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    351, 9}, {    703, 8}, {   1407,11}, {    191,10}, \
-    {    415,11}, {    223,10}, {    447, 9}, {    895,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 92
-#define SQR_FFT_THRESHOLD                 1600
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  57
-#define MULLO_MUL_N_THRESHOLD             3176
-
-#define DC_DIV_QR_THRESHOLD                 52
-#define DC_DIVAPPR_Q_THRESHOLD             187
-#define DC_BDIV_QR_THRESHOLD                64
-#define DC_BDIV_Q_THRESHOLD                146
-
-#define INV_MULMOD_BNM1_THRESHOLD           68
-#define INV_NEWTON_THRESHOLD               182
-#define INV_APPR_THRESHOLD                 182
-
-#define BINV_NEWTON_THRESHOLD              186
-#define REDC_1_TO_REDC_N_THRESHOLD          60
-
-#define MU_DIV_QR_THRESHOLD                924
-#define MU_DIVAPPR_Q_THRESHOLD             807
-#define MUPI_DIV_QR_THRESHOLD               73
-#define MU_BDIV_QR_THRESHOLD               667
-#define MU_BDIV_Q_THRESHOLD                823
-
-#define MATRIX22_STRASSEN_THRESHOLD          8
-#define HGCD_THRESHOLD                      61
-#define HGCD_APPR_THRESHOLD                 50
-#define HGCD_REDUCE_THRESHOLD              974
-#define GCD_DC_THRESHOLD                   195
-#define GCDEXT_DC_THRESHOLD                134
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               190
-#define SET_STR_PRECOMPUTE_THRESHOLD       411
diff --git a/gmp/mpn/powerpc32/p7/gmp-mparam.h b/gmp/mpn/powerpc32/p7/gmp-mparam.h
deleted file mode 100644
index 35bb61dca2..0000000000
--- a/gmp/mpn/powerpc32/p7/gmp-mparam.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2004, 2008-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 3550 MHz POWER7/T4 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           34
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                89
-#define MUL_TOOM44_THRESHOLD               130
-#define MUL_TOOM6H_THRESHOLD               286
-#define MUL_TOOM8H_THRESHOLD               363
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     121
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
-
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 50
-#define SQR_TOOM3_THRESHOLD                 89
-#define SQR_TOOM4_THRESHOLD                154
-#define SQR_TOOM6_THRESHOLD                222
-#define SQR_TOOM8_THRESHOLD                381
-
-#define MULMID_TOOM42_THRESHOLD             40
-
-#define MULMOD_BNM1_THRESHOLD               18
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define POWM_SEC_TABLE  4,35,225,780,2212
-
-#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    476, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     14, 5}, {     29, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     29, 7}, {     15, 6}, \
-    {     31, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
-    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
-    {    159,11}, {     95,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543, 8}, \
-    {   1087,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    335, 9}, {    671, 8}, {   1343,10}, {    351,11}, \
-    {    191,10}, {    415, 9}, {    831,10}, {    431,11}, \
-    {    223,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 77
-#define MUL_FFT_THRESHOLD                 5312
-
-#define SQR_FFT_MODF_THRESHOLD             344  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    344, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     79,10}, {     47,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
-    {    143, 9}, {    287, 8}, {    575, 9}, {    303,10}, \
-    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543, 8}, {   1087,10}, {    287, 9}, {    575,10}, \
-    {    303,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    335, 9}, {    671,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    415, 9}, \
-    {    831,11}, {    223,10}, {    447,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 79
-#define SQR_FFT_THRESHOLD                 3712
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  34
-#define MULLO_MUL_N_THRESHOLD            10323
-
-#define DC_DIV_QR_THRESHOLD                 52
-#define DC_DIVAPPR_Q_THRESHOLD             202
-#define DC_BDIV_QR_THRESHOLD                68
-#define DC_BDIV_Q_THRESHOLD                152
-
-#define INV_MULMOD_BNM1_THRESHOLD           66
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 189
-
-#define BINV_NEWTON_THRESHOLD              292
-#define REDC_1_TO_REDC_N_THRESHOLD          79
-
-#define MU_DIV_QR_THRESHOLD               1442
-#define MU_DIVAPPR_Q_THRESHOLD            1442
-#define MUPI_DIV_QR_THRESHOLD               91
-#define MU_BDIV_QR_THRESHOLD              1308
-#define MU_BDIV_Q_THRESHOLD               1442
-
-#define MATRIX22_STRASSEN_THRESHOLD         16
-#define HGCD_THRESHOLD                     126
-#define HGCD_APPR_THRESHOLD                139
-#define HGCD_REDUCE_THRESHOLD             2681
-#define GCD_DC_THRESHOLD                   573
-#define GCDEXT_DC_THRESHOLD                448
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        20
-#define SET_STR_DC_THRESHOLD               834
-#define SET_STR_PRECOMPUTE_THRESHOLD      1888
diff --git a/gmp/mpn/powerpc32/powerpc-defs.m4 b/gmp/mpn/powerpc32/powerpc-defs.m4
index 0c142a2e0c..33cf97e387 100644
--- a/gmp/mpn/powerpc32/powerpc-defs.m4
+++ b/gmp/mpn/powerpc32/powerpc-defs.m4
@@ -3,32 +3,21 @@ divert(-1)
 dnl  m4 macros for PowerPC assembler (32 and 64 bit).
 
 dnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
diff --git a/gmp/mpn/powerpc32/rshift.asm b/gmp/mpn/powerpc32/rshift.asm
index cb0046d5ee..b069a93d12 100644
--- a/gmp/mpn/powerpc32/rshift.asm
+++ b/gmp/mpn/powerpc32/rshift.asm
@@ -1,32 +1,22 @@
 dnl  PowerPC-32 mpn_rshift -- Shift a number right.
 
-dnl  Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
+dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -48,7 +38,7 @@ C cnt	r6
 
 ASM_START()
 PROLOGUE(mpn_rshift)
-	cmpwi	cr0, r5, 30	C more than 30 limbs?
+	cmpwi	cr0, r5, 12	C more than 12 limbs?
 	addi	r7, r3, -4	C dst-4
 	bgt	L(BIG)		C branch if more than 12 limbs
 
@@ -161,4 +151,4 @@ L(loopU):
 	stw	r12, 20(r7)
 	lmw	r24, -32(r1)	C restore registers
 	blr
-EPILOGUE()
+EPILOGUE(mpn_rshift)
diff --git a/gmp/mpn/powerpc32/sec_tabselect.asm b/gmp/mpn/powerpc32/sec_tabselect.asm
deleted file mode 100644
index a3f24d5678..0000000000
--- a/gmp/mpn/powerpc32/sec_tabselect.asm
+++ /dev/null
@@ -1,141 +0,0 @@
-dnl  PowerPC-32 mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C 603e:			 ?
-C 604e:			 ?
-C 75x (G3):		 ?
-C 7400,7410 (G4):	 2.5
-C 744x,745x (G4+):	 2.0
-C power4/ppc970:	 2.0
-C power5:		 ?
-
-define(`rp',     `r3')
-define(`tp',     `r4')
-define(`n',      `r5')
-define(`nents',  `r6')
-define(`which',  `r7')
-
-define(`i',      `r8')
-define(`j',      `r9')
-define(`stride', `r12')
-define(`mask',   `r11')
-
-
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
-	addic.	j, n, -4		C outer loop induction variable
-	stmw	r27, -32(r1)
-	slwi	stride, n, 2
-
-	blt	cr0, L(outer_end)
-L(outer_top):
-	mtctr	nents
-	mr	r10, tp
-	li	r28, 0
-	li	r29, 0
-	li	r30, 0
-	li	r31, 0
-	addic.	j, j, -4		C outer loop induction variable
-	mr	i, which
-
-	ALIGN(16)
-L(top):	addic	i, i, -1		C set carry iff i != 0
-	subfe	mask, mask, mask
-	lwz	r0, 0(tp)
-	lwz	r27, 4(tp)
-	and	r0, r0, mask
-	and	r27, r27, mask
-	or	r28, r28, r0
-	or	r29, r29, r27
-	lwz	r0, 8(tp)
-	lwz	r27, 12(tp)
-	and	r0, r0, mask
-	and	r27, r27, mask
-	or	r30, r30, r0
-	or	r31, r31, r27
-	add	tp, tp, stride
-	bdnz	L(top)
-
-	stw	r28, 0(rp)
-	stw	r29, 4(rp)
-	stw	r30, 8(rp)
-	stw	r31, 12(rp)
-	addi	tp, r10, 16
-	addi	rp, rp, 16
-	bge	cr0, L(outer_top)
-L(outer_end):
-
-	andi.	r0, n, 2
-	beq	cr0, L(b0x)
-L(b1x):	mtctr	nents
-	mr	r10, tp
-	li	r28, 0
-	li	r29, 0
-	mr	i, which
-	ALIGN(16)
-L(tp2):	addic	i, i, -1
-	subfe	mask, mask, mask
-	lwz	r0, 0(tp)
-	lwz	r27, 4(tp)
-	and	r0, r0, mask
-	and	r27, r27, mask
-	or	r28, r28, r0
-	or	r29, r29, r27
-	add	tp, tp, stride
-	bdnz	L(tp2)
-	stw	r28, 0(rp)
-	stw	r29, 4(rp)
-	addi	tp, r10, 8
-	addi	rp, rp, 8
-
-L(b0x):	andi.	r0, n, 1
-	beq	cr0, L(b00)
-L(b01):	mtctr	nents
-	mr	r10, tp
-	li	r28, 0
-	mr	i, which
-	ALIGN(16)
-L(tp1):	addic	i, i, -1
-	subfe	mask, mask, mask
-	lwz	r0, 0(tp)
-	and	r0, r0, mask
-	or	r28, r28, r0
-	add	tp, tp, stride
-	bdnz	L(tp1)
-	stw	r28, 0(rp)
-
-L(b00):	lmw	r27, -32(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc32/sqr_diag_addlsh1.asm b/gmp/mpn/powerpc32/sqr_diag_addlsh1.asm
deleted file mode 100644
index f7aba33ee5..0000000000
--- a/gmp/mpn/powerpc32/sqr_diag_addlsh1.asm
+++ /dev/null
@@ -1,80 +0,0 @@
-dnl  PowerPC-32 mpn_sqr_diag_addlsh1.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                cycles/limb
-C 603e			 ?
-C 604e			 ?
-C 75x (G3)		 ?
-C 7400,7410 (G4)	 ?
-C 744x,745x (G4+)	 6
-C power4/ppc970		 ?
-C power5		 ?
-
-C This has been feebly optimised for 7447 but not for any other CPU.
-
-define(`rp',	r3)
-define(`tp',	r4)
-define(`up',	r5)
-define(`n',	r6)
-
-ASM_START()
-PROLOGUE(mpn_sqr_diag_addlsh1)
-	addi	n, n, -1
-	addi	tp, tp, -4
-	mtctr	n
-	lwz	r0, 0(up)
-	li	r10, 0
-	mullw	r7, r0, r0
-	stw	r7, 0(rp)
-	mulhwu	r6, r0, r0
-	addic	r31, r31, 0	C clear CF
-
-	ALIGN(16)
-L(top):	lwzu	r0, 4(up)
-	mullw	r7, r0, r0
-	lwz	r8, 4(tp)
-	lwzu	r9, 8(tp)
-	rlwimi	r10, r8, 1,0,30
-	srwi	r11, r8, 31
-	rlwimi	r11, r9, 1,0,30
-	adde	r10, r10, r6
-	adde	r11, r11, r7
-	stw	r10, 4(rp)
-	srwi	r10, r9, 31
-	mulhwu	r6, r0, r0
-	stwu	r11, 8(rp)
-	bdnz	L(top)
-
-	adde	r10, r10, r6
-	stw	r10, 4(rp)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc32/sqr_diagonal.asm b/gmp/mpn/powerpc32/sqr_diagonal.asm
new file mode 100644
index 0000000000..d315349f63
--- /dev/null
+++ b/gmp/mpn/powerpc32/sqr_diagonal.asm
@@ -0,0 +1,103 @@
+dnl  PowerPC-32 mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C                cycles/limb
+C 603e:             ?
+C 604e:             4.0
+C 75x (G3):        10.5
+C 7400,7410 (G4):  10.5
+C 744x,745x (G4+):  4.0
+C power4/ppc970:    8.6
+C power5:           7.0
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C n	r5
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+	lwz	r6,0(r4)
+	mtctr	r5
+
+	addi	r3,r3,-4
+	bdz	L(end1)
+
+	lwzu	r7,4(r4)
+	mullw	r9,r6,r6
+	mulhwu	r11,r6,r6
+	bdz	L(end2)
+
+	lwzu	r6,4(r4)
+	mullw	r8,r7,r7
+	mulhwu	r10,r7,r7
+	bdz	L(ende)
+
+L(loop):
+	lwzu	r7,4(r4)
+	stw	r9,4(r3)
+	mullw	r9,r6,r6
+	stwu	r11,8(r3)
+	mulhwu	r11,r6,r6
+	bdz	L(endo)
+	lwzu	r6,4(r4)
+	stw	r8,4(r3)
+	mullw	r8,r7,r7
+	stwu	r10,8(r3)
+	mulhwu	r10,r7,r7
+	bdnz	L(loop)
+
+L(ende):
+	stw	r9,4(r3)
+	mullw	r9,r6,r6
+	stw	r11,8(r3)
+	mulhwu	r11,r6,r6
+	stw	r8,12(r3)
+	stw	r10,16(r3)
+	stw	r9,20(r3)
+	stw	r11,24(r3)
+	blr
+L(endo):
+	stw	r8,4(r3)
+	mullw	r8,r7,r7
+	stw	r10,8(r3)
+	mulhwu	r10,r7,r7
+	stw	r9,12(r3)
+	stw	r11,16(r3)
+	stw	r8,20(r3)
+	stw	r10,24(r3)
+	blr
+
+L(end2):
+	mullw	r8,r7,r7
+	stw	r9,4(r3)
+	mulhwu	r10,r7,r7
+	stw	r11,8(r3)
+	stw	r8,12(r3)
+	stw	r10,16(r3)
+	blr
+L(end1):
+	mullw	r9,r6,r6
+	mulhwu	r11,r6,r6
+	stw	r9,4(r3)
+	stw	r11,8(r3)
+	blr
+EPILOGUE(mpn_sqr_diagonal)
diff --git a/gmp/mpn/powerpc32/sublsh1_n.asm b/gmp/mpn/powerpc32/sublsh1_n.asm
index 6dc6460016..c8711d09a6 100644
--- a/gmp/mpn/powerpc32/sublsh1_n.asm
+++ b/gmp/mpn/powerpc32/sublsh1_n.asm
@@ -3,30 +3,19 @@ dnl  PowerPC-32 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
 dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/submul_1.asm b/gmp/mpn/powerpc32/submul_1.asm
index 9fcdaa291b..ae40bb4473 100644
--- a/gmp/mpn/powerpc32/submul_1.asm
+++ b/gmp/mpn/powerpc32/submul_1.asm
@@ -5,30 +5,19 @@ dnl  Copyright 1995, 1997, 1998, 2000, 2002, 2005 Free Software Foundation,
 dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/umul.asm b/gmp/mpn/powerpc32/umul.asm
index a5811e1651..400f009337 100644
--- a/gmp/mpn/powerpc32/umul.asm
+++ b/gmp/mpn/powerpc32/umul.asm
@@ -1,32 +1,21 @@
-dnl  PowerPC-32 umul_ppmm -- support for longlong.h
+dnl PowerPC-32 umul_ppmm -- support for longlong.h
 
-dnl  Copyright 2000, 2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl Copyright 2000, 2001 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+dnl General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/vmx/copyd.asm b/gmp/mpn/powerpc32/vmx/copyd.asm
index 6aac6b8389..e345eef01f 100644
--- a/gmp/mpn/powerpc32/vmx/copyd.asm
+++ b/gmp/mpn/powerpc32/vmx/copyd.asm
@@ -3,30 +3,19 @@ dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_copyd.
 dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -48,7 +37,7 @@ C    read-modify-write tricks.
 C  * The VMX code is used from the smallest sizes it handles, but measurements
 C    show a large speed bump at the cutoff points.  Small copying (perhaps
 C    using some read-modify-write technique) should be optimized.
-C  * Make a mpn_com based on this code.
+C  * Make a mpn_com_n based on this code.
 
 define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
 define(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))
diff --git a/gmp/mpn/powerpc32/vmx/copyi.asm b/gmp/mpn/powerpc32/vmx/copyi.asm
index a97a0fa6dc..b6b2e7ea8d 100644
--- a/gmp/mpn/powerpc32/vmx/copyi.asm
+++ b/gmp/mpn/powerpc32/vmx/copyi.asm
@@ -3,30 +3,19 @@ dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_copyi.
 dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -48,7 +37,7 @@ C    read-modify-write tricks.
 C  * The VMX code is used from the smallest sizes it handles, but measurements
 C    show a large speed bump at the cutoff points.  Small copying (perhaps
 C    using some read-modify-write technique) should be optimized.
-C  * Make a mpn_com based on this code.
+C  * Make a mpn_com_n based on this code.
 
 define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
 define(`LIMBS_PER_VR',  eval(16/GMP_LIMB_BYTES))
diff --git a/gmp/mpn/powerpc32/vmx/logops_n.asm b/gmp/mpn/powerpc32/vmx/logops_n.asm
index d656d3b73f..7ed731e483 100644
--- a/gmp/mpn/powerpc32/vmx/logops_n.asm
+++ b/gmp/mpn/powerpc32/vmx/logops_n.asm
@@ -5,30 +5,19 @@ dnl  logical operations.
 dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm b/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm
index 9b7e4f1a50..8aee6f81de 100644
--- a/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm
+++ b/gmp/mpn/powerpc32/vmx/mod_34lsub1.asm
@@ -1,32 +1,21 @@
 dnl  PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
 
-dnl  Copyright 2002, 2003, 2005-2007, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
@@ -146,15 +135,15 @@ L(large):
 
 	andi.	r7, up, 15
 	vxor	a0, v0, v0
-	lis	r9, 0xaaaa
+	lis	r0, 0xaaaa
 	vxor	a1, v0, v0
-	ori	r9, r9, 0xaaab
+	ori	r0, r0, 0xaaab
 	vxor	a2, v0, v0
 	li	r5, 16
 	vxor	c0, v0, v0
 	li	r6, 32
 	vxor	c1, v0, v0
-	LEAL(	r11, cnsts)		C CAUTION clobbers r0 for elf, darwin
+	LEAL(	r11, cnsts)
 	vxor	c2, v0, v0
 	vxor	z, v0, v0
 
@@ -169,7 +158,7 @@ L(large):
 	vsldoi	a2, z, a2, 12
 
 	addi	n, n, 9
-	mulhwu	r0, n, r9
+	mulhwu	r0, n, r0
 	srwi	r0, r0, 3		C r0 = floor(n/12)
 	mtctr	r0
 
@@ -185,7 +174,7 @@ L(na4):	bne	cr7, L(na8)
 	vsldoi	a1, z, a1, 8
 
 	addi	n, n, 6
-	mulhwu	r0, n, r9
+	mulhwu	r0, n, r0
 	srwi	r0, r0, 3		C r0 = floor(n/12)
 	mtctr	r0
 
@@ -199,7 +188,7 @@ L(na8):
 	vsldoi	a0, z, a0, 4
 
 	addi	n, n, 3
-	mulhwu	r0, n, r9
+	mulhwu	r0, n, r0
 	srwi	r0, r0, 3		C r0 = floor(n/12)
 	mtctr	r0
 
@@ -208,7 +197,7 @@ L(na8):
 	b	L(0)
 
 L(aligned16):
-	mulhwu	r0, n, r9
+	mulhwu	r0, n, r0
 	srwi	r0, r0, 3		C r0 = floor(n/12)
 	mtctr	r0
 
diff --git a/gmp/mpn/powerpc32/vmx/popcount.asm b/gmp/mpn/powerpc32/vmx/popcount.asm
index 943c92d127..62fcaaee4a 100644
--- a/gmp/mpn/powerpc32/vmx/popcount.asm
+++ b/gmp/mpn/powerpc32/vmx/popcount.asm
@@ -3,32 +3,26 @@ dnl  PowerPC-32/VMX mpn_popcount.
 dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 MULFUNC_PROLOGUE(mpn_popcount)
 include_mpn(`powerpc64/vmx/popcount.asm')
+
+C                   cycles/limb
+C 7400,7410 (G4):       2.75
+C 744x,745x (G4+):      2.25
+C 970 (G5):             5.3
diff --git a/gmp/mpn/powerpc64/README b/gmp/mpn/powerpc64/README
index 50dd3995c3..757357b4d8 100644
--- a/gmp/mpn/powerpc64/README
+++ b/gmp/mpn/powerpc64/README
@@ -1,30 +1,19 @@
-Copyright 1999-2001, 2003-2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -124,7 +113,7 @@ Memory:		  2 ld/st.  Stores go to the L2 cache, which can sustain just
 		  one store per cycle.
 		  L1 load latency: to gregs 3-4 cycles, to fregs 5-6 cycles.
 		  Operations that modify the address register might be split
-		  to use also an integer issue slot.
+		  to use also a an integer issue slot.
 Simple integer:	  2 operations every cycle, latency 2.
 Integer multiply: 2 operations every 6th cycle, latency 7 cycles.
 Integer divide:	  ?
@@ -150,7 +139,7 @@ Problem is to get 32-bit or 16-bit words to the fp registers.  Only 64-bit fp
 memops copies bits without fiddling with them.  We might therefore need to
 load to integer registers with zero extension, store as 64 bits into temp
 space, and then load to fp regs.  Alternatively, load directly to fp space
-and add well-chosen constants to get cancellation.  (Other part after given by
+and add well-chosen constants to get cancelation.  (Other part after given by
 subsequent subtraction.)
 
 Possible code mix for load-via-intregs variant:
diff --git a/gmp/mpn/powerpc64/aix.m4 b/gmp/mpn/powerpc64/aix.m4
index bf6517d69d..589686a868 100644
--- a/gmp/mpn/powerpc64/aix.m4
+++ b/gmp/mpn/powerpc64/aix.m4
@@ -1,53 +1,42 @@
 divert(-1)
 dnl  m4 macros for AIX 64-bit assembly.
 
-dnl  Copyright 2000-2002, 2005, 2006, 2010, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl  Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`ASM_START',
-	`.machine	"any"
+	`.machine	"ppc64"
 	.toc')
 
-dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
 dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
 dnl
 dnl  Don't want ELF style .size in the epilogue.
 
 define(`PROLOGUE_cpu',
-m4_assert_numargs_range(1,2)
-`ifelse(`$2',toc,,
-`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
+m4_assert_numargs(1)
+	`
 	.globl	$1
 	.globl	.$1
 	.csect	[DS], 3
 $1:
 	.llong	.$1, TOC[tc0], 0
-	.csect	.$1[PR], 6
+	.csect	[PR]
+	.align	4
 .$1:')
 
 define(`EPILOGUE_cpu',
@@ -92,6 +81,4 @@ define(`CALL',
 
 define(`ASM_END', `TOC_ENTRY')
 
-undefine(`EXTRA_REGISTER')
-
 divert
diff --git a/gmp/mpn/powerpc64/com.asm b/gmp/mpn/powerpc64/com.asm
deleted file mode 100644
index 074b7ff6e4..0000000000
--- a/gmp/mpn/powerpc64/com.asm
+++ /dev/null
@@ -1,136 +0,0 @@
-dnl  PowerPC-64 mpn_com.
-
-dnl  Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          1.25
-C POWER5                 ?
-C POWER6                 1.32
-C POWER7                 1.13
-
-C INPUT PARAMETERS
-define(`rp',	`r3')
-define(`up',	`r4')
-define(`n',	`r5')
-
-ASM_START()
-PROLOGUE(mpn_com)
-
-ifdef(`HAVE_ABI_mode32',
-`	rldicl	n, n, 0,32')
-
-	cmpdi	cr0, n, 4
-	blt	L(sml)
-
-	addi	r10, n, 4
-	srdi	r10, r10, 3
-	mtctr	r10
-
-	andi.	r0, n, 1
-	rlwinm	r11, n, 0,30,30
-	rlwinm	r12, n, 0,29,29
-	cmpdi	cr6, r11, 0
-	cmpdi	cr7, r12, 0
-
-	beq	cr0, L(xx0)
-L(xx1):	ld	r6, 0(up)
-	addi	up, up, 8
-	nor	r6, r6, r6
-	std	r6, 0(rp)
-	addi	rp, rp, 8
-
-L(xx0):	bne	cr6, L(x10)
-L(x00):	ld	r6, 0(r4)
-	ld	r7, 8(r4)
-	bne	cr7, L(100)
-L(000):	addi	rp, rp, -32
-	b	L(lo0)
-L(100):	addi	up, up, -32
-	b	L(lo4)
-L(x10):	ld	r8, 0(r4)
-	ld	r9, 8(r4)
-	bne	cr7, L(110)
-L(010):	addi	up, up, 16
-	addi	rp, rp, -16
-	b	L(lo2)
-L(110):	addi	up, up, -16
-	addi	rp, rp, -48
-	b	L(lo6)
-
-L(sml):	mtctr	n
-L(t):	ld	r6, 0(up)
-	addi	up, up, 8
-	nor	r6, r6, r6
-	std	r6, 0(rp)
-	addi	rp, rp, 8
-	bdnz	L(t)
-	blr
-
-	ALIGN(32)
-L(top):	nor	r6, r6, r6
-	nor	r7, r7, r7
-	std	r6, 0(rp)
-	std	r7, 8(rp)
-L(lo2):	ld	r6, 0(up)
-	ld	r7, 8(up)
-	nor	r8, r8, r8
-	nor	r9, r9, r9
-	std	r8, 16(rp)
-	std	r9, 24(rp)
-L(lo0):	ld	r8, 16(up)
-	ld	r9, 24(up)
-	nor	r6, r6, r6
-	nor	r7, r7, r7
-	std	r6, 32(rp)
-	std	r7, 40(rp)
-L(lo6):	ld	r6, 32(up)
-	ld	r7, 40(up)
-	nor	r8, r8, r8
-	nor	r9, r9, r9
-	std	r8, 48(rp)
-	std	r9, 56(rp)
-	addi	rp, rp, 64
-L(lo4):	ld	r8, 48(up)
-	ld	r9, 56(up)
-	addi	up, up, 64
-	bdnz	L(top)
-
-L(end):	nor	r6, r6, r6
-	nor	r7, r7, r7
-	std	r6, 0(rp)
-	std	r7, 8(rp)
-	nor	r8, r8, r8
-	nor	r9, r9, r9
-	std	r8, 16(rp)
-	std	r9, 24(rp)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/com_n.asm b/gmp/mpn/powerpc64/com_n.asm
new file mode 100644
index 0000000000..0c43d06cfe
--- /dev/null
+++ b/gmp/mpn/powerpc64/com_n.asm
@@ -0,0 +1,74 @@
+dnl  PowerPC-64 mpn_com_n.
+
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:     1?
+C POWER4/PPC970:     1.6
+
+C TODO
+C  * 8-way unrolling brings timing down to about 1.3 cycles/limb.
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C n	r5
+
+ASM_START()
+PROLOGUE(mpn_com_n)
+	rldic.	r0, r5, 3, 59	C r0 = (r5 & 3) << 3; cr0 = (n == 4t)?
+	cmpldi	cr6, r0, 16	C cr6 = (n cmp 4t + 2)?
+
+	addi	r5, r5, 3	C compute...
+ifdef(`HAVE_ABI_mode32',
+`	rldicl	r5, r5, 62,34',	C ...branch count
+`	rldicl	r5, r5, 62, 2')	C ...branch count
+	mtctr	r5
+
+	add	r4, r4, r0	C offset up
+	add	r3, r3, r0	C offset rp
+
+	beq	cr0, L(L00)
+	blt	cr6, L(L01)
+	beq	cr6, L(L10)
+	b	L(L11)
+
+L(L00):	addi	r4, r4, 32
+	addi	r3, r3, 32
+
+	ALIGN(16)
+L(oop):	ld	r6, -32(r4)
+	nor	r6, r6, r6
+	std	r6, -32(r3)
+L(L11):	ld	r6, -24(r4)
+	nor	r6, r6, r6
+	std	r6, -24(r3)
+L(L10):	ld	r6, -16(r4)
+	nor	r6, r6, r6
+	std	r6, -16(r3)
+L(L01):	ld	r6, -8(r4)
+	nor	r6, r6, r6
+	addi	r4, r4, 32
+	std	r6, -8(r3)
+	addi	r3, r3, 32
+	bdnz	L(oop)
+
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/copyd.asm b/gmp/mpn/powerpc64/copyd.asm
index c6ce9309f1..6a46a433c9 100644
--- a/gmp/mpn/powerpc64/copyd.asm
+++ b/gmp/mpn/powerpc64/copyd.asm
@@ -3,39 +3,25 @@ dnl  PowerPC-64 mpn_copyd
 dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                  cycles/limb
-C POWER3/PPC630          1
-C POWER4/PPC970          1
-C POWER5                 ?
-C POWER6                 ?
-C POWER7                 1.4
+C		cycles/limb
+C POWER3/PPC630:     1
+C POWER4/PPC970:     1
 
 C INPUT PARAMETERS
 C rp	r3
diff --git a/gmp/mpn/powerpc64/copyi.asm b/gmp/mpn/powerpc64/copyi.asm
index 9a86cb21cc..5cb7e48565 100644
--- a/gmp/mpn/powerpc64/copyi.asm
+++ b/gmp/mpn/powerpc64/copyi.asm
@@ -3,39 +3,25 @@ dnl  PowerPC-64 mpn_copyi.
 dnl  Copyright 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                  cycles/limb
-C POWER3/PPC630          1
-C POWER4/PPC970          1
-C POWER5                 ?
-C POWER6                 ?
-C POWER7                 1.4
+C		cycles/limb
+C POWER3/PPC630:     1
+C POWER4/PPC970:     1
 
 C INPUT PARAMETERS
 C rp	r3
diff --git a/gmp/mpn/powerpc64/darwin.m4 b/gmp/mpn/powerpc64/darwin.m4
index a3180e48fd..10055be13a 100644
--- a/gmp/mpn/powerpc64/darwin.m4
+++ b/gmp/mpn/powerpc64/darwin.m4
@@ -2,48 +2,35 @@ divert(-1)
 dnl  m4 macros for Mac OS 64-bit assembly.
 
 dnl  Copyright 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`ASM_START',`')
 
-dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
 dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
 dnl
 
 define(`DARWIN')
 
 define(`PROLOGUE_cpu',
-m4_assert_numargs_range(1,2)
-`ifelse(`$2',toc,,
-`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
-	.text
+m4_assert_numargs(1)
+`	.text
 	.globl	$1
-	.align	5
+	.align	4
 $1:')
 
 define(`EPILOGUE_cpu',
@@ -114,6 +101,4 @@ define(`CALL',
 
 define(`ASM_END', `dnl')
 
-define(`EXTRA_REGISTER', r2)
-
 divert
diff --git a/gmp/mpn/powerpc64/elf.m4 b/gmp/mpn/powerpc64/elf.m4
index ddb5a8ed79..e6da11f90c 100644
--- a/gmp/mpn/powerpc64/elf.m4
+++ b/gmp/mpn/powerpc64/elf.m4
@@ -2,60 +2,31 @@ divert(-1)
 dnl  m4 macros for powerpc64 GNU/Linux assembly.
 
 dnl  Copyright 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-define(`ASM_START',
-`ifdef(`ELFv2_ABI',
-`
-	.abiversion 2
-')')
+define(`ASM_START',`')
 
-dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo[,toc])
+dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
 dnl          EPILOGUE_cpu(GSYM_PREFIX`'foo)
 dnl
 
 define(`PROLOGUE_cpu',
-m4_assert_numargs_range(1,2)
-`ifelse(`$2',toc,,
-`ifelse(`$2',,,`m4_error(`Unrecognised PROLOGUE parameter')')')dnl
-ifdef(`ELFv2_ABI',
-`
-	.globl	$1
-	.type	$1, @function
-	.section	".text"
-	.align	5
-$1:
-ifelse(`$2',toc,`
-0:	addis	2, 12, (.TOC.-0b)@ha
-	addi	2, 2, (.TOC.-0b)@l
-	.localentry $1, .-$1
-',)
-',`
+m4_assert_numargs(1)
+	`
 	.globl	$1
 	.globl	.$1
 	.section	".opd","aw"
@@ -65,17 +36,12 @@ $1:
 	.size	$1, 24
 	.type	.$1, @function
 	.section	".text"
-	.align	5
-.$1:
-')')
+	.align	4
+.$1:')
 
 define(`EPILOGUE_cpu',
 m4_assert_numargs(1)
-`ifdef(`ELFv2_ABI',`
-	.size	$1, .-$1
-',`
-	.size	.$1, .-.$1
-')')
+`	.size	.$1, .-.$1')
 
 define(`TOC_ENTRY', `')
 
@@ -118,6 +84,4 @@ define(`CALL',
 
 define(`ASM_END', `TOC_ENTRY')
 
-undefine(`EXTRA_REGISTER')
-
 divert
diff --git a/gmp/mpn/powerpc64/gmp-mparam.h b/gmp/mpn/powerpc64/gmp-mparam.h
new file mode 100644
index 0000000000..e0ab478e3e
--- /dev/null
+++ b/gmp/mpn/powerpc64/gmp-mparam.h
@@ -0,0 +1,63 @@
+/* PowerPC-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+
+
+/* ???MHz ppc630 */
+
+/* Generated by tuneup.c, 2004-02-10, gcc "2.9" */
+
+#define MUL_KARATSUBA_THRESHOLD           8
+#define MUL_TOOM3_THRESHOLD              41
+
+#define SQR_BASECASE_THRESHOLD            0  /* always */
+#define SQR_KARATSUBA_THRESHOLD          14
+#define SQR_TOOM3_THRESHOLD              48
+
+#define DIV_SB_PREINV_THRESHOLD           0
+#define DIV_DC_THRESHOLD                 28
+#define POWM_THRESHOLD                   40
+
+#define HGCD_THRESHOLD                   56
+#define GCD_ACCEL_THRESHOLD               3
+#define GCD_DC_THRESHOLD                408
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             14
+#define GET_STR_PRECOMPUTE_THRESHOLD     22
+#define SET_STR_THRESHOLD              1815
+
+#define MUL_FFT_TABLE  { 272, 544, 1344, 2304, 5120, 20480, 49152, 0 }
+#define MUL_FFT_MODF_THRESHOLD          216
+#define MUL_FFT_THRESHOLD              1408
+
+#define SQR_FFT_TABLE  { 272, 608, 1344, 2304, 7168, 20480, 49152, 0 }
+#define SQR_FFT_MODF_THRESHOLD          200
+#define SQR_FFT_THRESHOLD              1408
diff --git a/gmp/mpn/powerpc64/logops_n.asm b/gmp/mpn/powerpc64/logops_n.asm
index 2fa6985d7a..917b59f455 100644
--- a/gmp/mpn/powerpc64/logops_n.asm
+++ b/gmp/mpn/powerpc64/logops_n.asm
@@ -1,42 +1,28 @@
 dnl  PowerPC-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
 dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
 
-dnl  Copyright 2003-2005 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                  cycles/limb
-C POWER3/PPC630          1.75
-C POWER4/PPC970          2.10
-C POWER5                 ?
-C POWER6                 ?
-C POWER7                 1.75
+C		cycles/limb
+C POWER3/PPC630:     1.75
+C POWER4/PPC970:     2.10
 
 C   n	   POWER3/PPC630   POWER4/PPC970
 C     1	       15.00	       15.33
diff --git a/gmp/mpn/powerpc64/lshift.asm b/gmp/mpn/powerpc64/lshift.asm
index 880944a4ae..41e5ddd8e5 100644
--- a/gmp/mpn/powerpc64/lshift.asm
+++ b/gmp/mpn/powerpc64/lshift.asm
@@ -1,207 +1,116 @@
 dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
 
-dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 
-include(`../config.m4')
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-C                   cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          ?
-C POWER5                 2.25
-C POWER6                 9.75
-C POWER7                 2.15
+include(`../config.m4')
 
-C TODO
-C  * Try to reduce the number of needed live registers
-C  * Micro-optimise header code
-C  * Keep in synch with rshift.asm and lshiftc.asm
+C		cycles/limb
+C POWER3/PPC630:     1.5
+C POWER4/PPC970:     3.0
 
 C INPUT PARAMETERS
-define(`rp',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`cnt', `r6')
+define(`rp',`r3')
+define(`up',`r4')
+define(`n',`r5')
+define(`cnt',`r6')
+
+define(`tnc',`r5')
+define(`v0',`r0')
+define(`v1',`r7')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`h0',`r10')
+define(`h1',`r11')
 
-define(`tnc',`r0')
-define(`u0',`r30')
-define(`u1',`r31')
-define(`retval',`r5')
 
 ASM_START()
 PROLOGUE(mpn_lshift)
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	subfic	tnc, cnt, 64
-	sldi	r7, n, 3	C byte count corresponding to n
-	add	up, up, r7	C up = up + n
-	add	rp, rp, r7	C rp = rp + n
-	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
-	cmpdi	cr6, r30, 2
-	addi	r31, n, 3	C compute count...
-	ld	r10, -8(up)	C load 1st limb for b00...b11
-	srd	retval, r10, tnc
 ifdef(`HAVE_ABI_mode32',
-`	rldicl	r31, r31, 62,34',	C ...branch count
-`	srdi	r31, r31, 2')	C ...for ctr
-	mtctr	r31		C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	ld	r11, -16(up)	C load 2nd limb for b10 and b11
-	beq	cr6, L(b10)
-
-	ALIGN(16)
-L(b11):	sld	r8, r10, cnt
-	srd	r9, r11, tnc
-	ld	u1, -24(up)
-	addi	up, up, -24
-	sld	r12, r11, cnt
-	srd	r7, u1, tnc
-	addi	rp, rp, 16
-	bdnz	L(gt3)
-
-	or	r11, r8, r9
-	sld	r8, u1, cnt
-	b	L(cj3)
-
-	ALIGN(16)
-L(gt3):	ld	u0, -8(up)
-	or	r11, r8, r9
-	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -16(up)
-	or	r10, r12, r7
-	b	L(L11)
-
-	ALIGN(32)
-L(b10):	sld	r12, r10, cnt
-	addi	rp, rp, 24
-	srd	r7, r11, tnc
-	bdnz	L(gt2)
-
-	sld	r8, r11, cnt
-	or	r10, r12, r7
-	b	L(cj2)
-
-L(gt2):	ld	u0, -24(up)
-	sld	r8, r11, cnt
-	srd	r9, u0, tnc
-	ld	u1, -32(up)
-	or	r10, r12, r7
-	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -40(up)
-	or	r11, r8, r9
-	addi	up, up, -16
-	b	L(L10)
-
-	ALIGN(16)
-L(b00):	ld	u1, -16(up)
-	sld	r12, r10, cnt
-	srd	r7, u1, tnc
-	ld	u0, -24(up)
-	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -32(up)
-	or	r10, r12, r7
-	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	addi	rp, rp, 8
-	bdz	L(cj4)
-
-L(gt4):	addi	up, up, -32
-	ld	u0, -8(up)
-	or	r11, r8, r9
-	b	L(L00)
-
-	ALIGN(16)
-L(b01):	bdnz	L(gt1)
-	sld	r8, r10, cnt
-	std	r8, -8(rp)
-	b	L(ret)
-
-L(gt1):	ld	u0, -16(up)
-	sld	r8, r10, cnt
-	srd	r9, u0, tnc
-	ld	u1, -24(up)
-	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -32(up)
-	or	r11, r8, r9
-	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -40(up)
-	addi	up, up, -40
-	or	r10, r12, r7
-	bdz	L(end)
-
-	ALIGN(32)
-L(top):	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -8(up)
-	std	r11, -8(rp)
-	or	r11, r8, r9
-L(L00):	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -16(up)
-	std	r10, -16(rp)
-	or	r10, r12, r7
-L(L11):	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -24(up)
-	std	r11, -24(rp)
-	or	r11, r8, r9
-L(L10):	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -32(up)
-	addi	up, up, -32
-	std	r10, -32(rp)
-	addi	rp, rp, -32
-	or	r10, r12, r7
-	bdnz	L(top)
-
-	ALIGN(32)
-L(end):	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	std	r11, -8(rp)
-L(cj4):	or	r11, r8, r9
-	sld	r8, u1, cnt
-	std	r10, -16(rp)
-L(cj3):	or	r10, r12, r7
-	std	r11, -24(rp)
-L(cj2):	std	r10, -32(rp)
-	std	r8, -40(rp)
-
-L(ret):	ld	r31, -8(r1)
-	ld	r30, -16(r1)
+`	rldicl	r7, r5, 0, 32	C zero extend n
+	mtctr	r7',		C copy n to count register
+`	mtctr	n')		C copy n to count register
+
+ifdef(`HAVE_ABI_mode32',
+`	rldic	r0, n, 3, 32',	C byte count corresponding to n
+`	rldicr	r0, n, 3, 60')	C byte count corresponding to n
+
+	add	rp, rp, r0	C rp = rp + n
+	add	up, up, r0	C up = up + n
+	addi	rp, rp, 8	C rp now points 16 beyond end
+	addi	up, up, -8	C up now points to last limb
+	subfic	tnc, cnt, 64	C reverse shift count
+
+	ld	u0, 0(up)
+	sld	h0, u0, cnt
+	srd	r12, u0, tnc	C return value
+	bdz	L(1)		C jump for n = 1
+
+	ld	u1, -8(up)
+	bdz	L(2)		C jump for n = 2
+
+	ldu	u0, -16(up)
+	bdz	L(end)		C jump for n = 3
+
+L(oop):	srd	v1, u1, tnc
+	sld	h1, u1, cnt
+	ld	u1, -8(up)
+	or	h0, v1, h0
+	stdu	h0, -16(rp)
+
+	bdz	L(exit)
+
+	srd	v0, u0, tnc
+	sld	h0, u0, cnt
+	ldu	u0, -16(up)
+	or	h1, v0, h1
+	std	h1, -8(rp)
+
+	bdnz	L(oop)
+
+L(end):	srd	v1, u1, tnc
+	sld	h1, u1, cnt
+	or	h0, v1, h0
+	stdu	h0, -16(rp)
+	srd	v0, u0, tnc
+	sld	h0, u0, cnt
+	or	h1, v0, h1
+	std	h1, -8(rp)
+L(1):	std	h0, -16(rp)
+ifdef(`HAVE_ABI_mode32',
+`	srdi	r3, r12, 32
+	mr	r4, r12
+',`	mr	r3, r12
+')
+	blr
+
+L(exit):	srd	v0, u0, tnc
+	sld	h0, u0, cnt
+	or	h1, v0, h1
+	std	h1, -8(rp)
+L(2):	srd	v1, u1, tnc
+	sld	h1, u1, cnt
+	or	h0, v1, h0
+	stdu	h0, -16(rp)
+	std	h1, -8(rp)
 ifdef(`HAVE_ABI_mode32',
-`	srdi	r3, retval, 32
-	mr	r4, retval
-',`	mr	r3, retval')
+`	srdi	r3, r12, 32
+	mr	r4, r12
+',`	mr	r3, r12
+')
 	blr
 EPILOGUE()
diff --git a/gmp/mpn/powerpc64/lshiftc.asm b/gmp/mpn/powerpc64/lshiftc.asm
deleted file mode 100644
index 7cf6a83428..0000000000
--- a/gmp/mpn/powerpc64/lshiftc.asm
+++ /dev/null
@@ -1,210 +0,0 @@
-dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
-
-dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          ?
-C POWER5                 2.25
-C POWER6                 9.5
-C POWER7                 2.15
-
-C TODO
-C  * Try to reduce the number of needed live registers
-C  * Micro-optimise header code
-C  * Keep in synch with lshift.asm and rshift.asm
-C  * Could the long-scheduled std insns be less scheduled?
-
-C INPUT PARAMETERS
-define(`rp',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`cnt', `r6')
-
-define(`tnc',`r0')
-define(`u0',`r30')
-define(`u1',`r31')
-define(`retval',`r5')
-
-ASM_START()
-PROLOGUE(mpn_lshiftc)
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	subfic	tnc, cnt, 64
-	sldi	r7, n, 3	C byte count corresponding to n
-	add	up, up, r7	C up = up + n
-	add	rp, rp, r7	C rp = rp + n
-	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
-	cmpdi	cr6, r30, 2
-	addi	r31, n, 3	C compute count...
-	ld	r10, -8(up)	C load 1st limb for b00...b11
-	srd	retval, r10, tnc
-	srdi	r31, r31, 2	C ...for ctr
-	mtctr	r31		C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	ld	r11, -16(up)	C load 2nd limb for b10 and b11
-	beq	cr6, L(b10)
-
-	ALIGN(16)
-L(b11):	sld	r8, r10, cnt
-	srd	r9, r11, tnc
-	ld	u1, -24(up)
-	addi	up, up, -24
-	sld	r12, r11, cnt
-	srd	r7, u1, tnc
-	addi	rp, rp, 16
-	bdnz	L(gt3)
-
-	nor	r11, r8, r9
-	sld	r8, u1, cnt
-	nor	r8, r8, r8
-	b	L(cj3)
-
-	ALIGN(16)
-L(gt3):	ld	u0, -8(up)
-	nor	r11, r8, r9
-	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -16(up)
-	nor	r10, r12, r7
-	b	L(L11)
-
-	ALIGN(32)
-L(b10):	sld	r12, r10, cnt
-	addi	rp, rp, 24
-	srd	r7, r11, tnc
-	bdnz	L(gt2)
-
-	sld	r8, r11, cnt
-	nor	r10, r12, r7
-	nor	r8, r8, r8
-	b	L(cj2)
-
-L(gt2):	ld	u0, -24(up)
-	sld	r8, r11, cnt
-	srd	r9, u0, tnc
-	ld	u1, -32(up)
-	nor	r10, r12, r7
-	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -40(up)
-	nor	r11, r8, r9
-	addi	up, up, -16
-	b	L(L10)
-
-	ALIGN(16)
-L(b00):	ld	u1, -16(up)
-	sld	r12, r10, cnt
-	srd	r7, u1, tnc
-	ld	u0, -24(up)
-	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -32(up)
-	nor	r10, r12, r7
-	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	addi	rp, rp, 8
-	bdz	L(cj4)
-
-L(gt4):	addi	up, up, -32
-	ld	u0, -8(up)
-	nor	r11, r8, r9
-	b	L(L00)
-
-	ALIGN(16)
-L(b01):	bdnz	L(gt1)
-	sld	r8, r10, cnt
-	nor	r8, r8, r8
-	std	r8, -8(rp)
-	b	L(ret)
-
-L(gt1):	ld	u0, -16(up)
-	sld	r8, r10, cnt
-	srd	r9, u0, tnc
-	ld	u1, -24(up)
-	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -32(up)
-	nor	r11, r8, r9
-	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -40(up)
-	addi	up, up, -40
-	nor	r10, r12, r7
-	bdz	L(end)
-
-	ALIGN(32)
-L(top):	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -8(up)
-	std	r11, -8(rp)
-	nor	r11, r8, r9
-L(L00):	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -16(up)
-	std	r10, -16(rp)
-	nor	r10, r12, r7
-L(L11):	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	ld	u0, -24(up)
-	std	r11, -24(rp)
-	nor	r11, r8, r9
-L(L10):	sld	r8, u1, cnt
-	srd	r9, u0, tnc
-	ld	u1, -32(up)
-	addi	up, up, -32
-	std	r10, -32(rp)
-	addi	rp, rp, -32
-	nor	r10, r12, r7
-	bdnz	L(top)
-
-	ALIGN(32)
-L(end):	sld	r12, u0, cnt
-	srd	r7, u1, tnc
-	std	r11, -8(rp)
-L(cj4):	nor	r11, r8, r9
-	sld	r8, u1, cnt
-	std	r10, -16(rp)
-	nor	r8, r8, r8
-L(cj3):	nor	r10, r12, r7
-	std	r11, -24(rp)
-L(cj2):	std	r10, -32(rp)
-	std	r8, -40(rp)
-
-L(ret):	ld	r31, -8(r1)
-	ld	r30, -16(r1)
-ifdef(`HAVE_ABI_mode32',
-`	srdi	r3, retval, 32
-	mr	r4, retval
-',`	mr	r3, retval')
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode32/add_n.asm b/gmp/mpn/powerpc64/mode32/add_n.asm
index 1da8087fe1..4c62041e73 100644
--- a/gmp/mpn/powerpc64/mode32/add_n.asm
+++ b/gmp/mpn/powerpc64/mode32/add_n.asm
@@ -1,33 +1,22 @@
 dnl  PowerPC-64/mode32 mpn_add_n -- Add two limb vectors of the same length > 0
 dnl  and store sum in a third limb vector.
 
-dnl  Copyright 1999-2001, 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc64/mode32/addmul_1.asm b/gmp/mpn/powerpc64/mode32/addmul_1.asm
index bdc39512ac..41a90781a5 100644
--- a/gmp/mpn/powerpc64/mode32/addmul_1.asm
+++ b/gmp/mpn/powerpc64/mode32/addmul_1.asm
@@ -1,33 +1,22 @@
 dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
 dnl  the result to a second limb vector.
 
-dnl  Copyright 1999-2001, 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc64/mode32/mul_1.asm b/gmp/mpn/powerpc64/mode32/mul_1.asm
index 3a17e98797..091be4d272 100644
--- a/gmp/mpn/powerpc64/mode32/mul_1.asm
+++ b/gmp/mpn/powerpc64/mode32/mul_1.asm
@@ -1,33 +1,22 @@
 dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and add
 dnl  the result to a second limb vector.
 
-dnl  Copyright 1999-2001, 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc64/mode32/p4/gmp-mparam.h b/gmp/mpn/powerpc64/mode32/p4/gmp-mparam.h
deleted file mode 100644
index a7271381c5..0000000000
--- a/gmp/mpn/powerpc64/mode32/p4/gmp-mparam.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/* PowerPC-64 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 2008, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* 1800 MHz PPC970 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         6
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        46
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     14
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD              12
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           90
-
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                57
-#define MUL_TOOM44_THRESHOLD                94
-#define MUL_TOOM6H_THRESHOLD               125
-#define MUL_TOOM8H_THRESHOLD               187
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      61
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      70
-
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 98
-#define SQR_TOOM4_THRESHOLD                136
-#define SQR_TOOM6_THRESHOLD                180
-#define SQR_TOOM8_THRESHOLD                272
-
-#define MULMID_TOOM42_THRESHOLD             34
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               13
-
-#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    244, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 7}, {     29, 8}, {     19, 9}, {     11, 8}, \
-    {     27,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
-    {     19, 8}, {     39, 9}, {     23, 8}, {     47, 9}, \
-    {     27,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     47,11}, {     15,10}, {     31, 9}, {     67,10}, \
-    {     39, 9}, {     83,10}, {     47, 9}, {     95, 8}, \
-    {    191, 9}, {     99,10}, {     55,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255,10}, {     71, 9}, \
-    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
-    {    319,11}, {     47,10}, {     95, 9}, {    191, 8}, \
-    {    383,10}, {    103,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511,10}, {    143, 9}, \
-    {    287,11}, {     79,10}, {    159, 9}, {    319, 8}, \
-    {    639,10}, {    175, 9}, {    351, 8}, {    703,11}, \
-    {     95,10}, {    191, 9}, {    383, 8}, {    767,10}, \
-    {    207, 9}, {    415,10}, {    223, 9}, {    447,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1151,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
-    {    351, 9}, {    703,12}, {     95,11}, {    191,10}, \
-    {    383, 9}, {    767,11}, {    207,10}, {    415, 9}, \
-    {    831,11}, {    223,10}, {    447,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 106
-#define MUL_FFT_THRESHOLD                 2688
-
-#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    212, 5}, {     13, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
-    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 7}, {     27, 9}, \
-    {      7, 8}, {     21, 9}, {     11, 8}, {     25,10}, \
-    {      7, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     67,10}, {     39, 9}, {     79, 8}, {    159,10}, \
-    {     47, 9}, {     95, 8}, {    191,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255,10}, {     71, 9}, \
-    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
-    {    319,11}, {     47, 9}, {    191, 8}, {    383,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575,11}, \
-    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
-    {    175, 9}, {    351, 8}, {    703,10}, {    191, 9}, \
-    {    383, 8}, {    767,10}, {    207, 9}, {    415,11}, \
-    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    175,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
-    {    415,11}, {    223,10}, {    447,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 102
-#define SQR_FFT_THRESHOLD                 1984
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  55
-#define MULLO_MUL_N_THRESHOLD             5240
-
-#define DC_DIV_QR_THRESHOLD                 27
-#define DC_DIVAPPR_Q_THRESHOLD             108
-#define DC_BDIV_QR_THRESHOLD                51
-#define DC_BDIV_Q_THRESHOLD                126
-
-#define INV_MULMOD_BNM1_THRESHOLD           38
-#define INV_NEWTON_THRESHOLD               129
-#define INV_APPR_THRESHOLD                 116
-
-#define BINV_NEWTON_THRESHOLD              198
-#define REDC_1_TO_REDC_N_THRESHOLD          51
-
-#define MU_DIV_QR_THRESHOLD                807
-#define MU_DIVAPPR_Q_THRESHOLD             807
-#define MUPI_DIV_QR_THRESHOLD               54
-#define MU_BDIV_QR_THRESHOLD               748
-#define MU_BDIV_Q_THRESHOLD                872
-
-#define POWM_SEC_TABLE  4,35,152,780,2145
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                     104
-#define HGCD_APPR_THRESHOLD                118
-#define HGCD_REDUCE_THRESHOLD             1329
-#define GCD_DC_THRESHOLD                   268
-#define GCDEXT_DC_THRESHOLD                241
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        18
-#define SET_STR_DC_THRESHOLD               996
-#define SET_STR_PRECOMPUTE_THRESHOLD      2170
-
-#define FAC_DSC_THRESHOLD                  442
-#define FAC_ODD_THRESHOLD                   26
diff --git a/gmp/mpn/powerpc64/mode32/sqr_diagonal.asm b/gmp/mpn/powerpc64/mode32/sqr_diagonal.asm
deleted file mode 100644
index ff5f4b3cfb..0000000000
--- a/gmp/mpn/powerpc64/mode32/sqr_diagonal.asm
+++ /dev/null
@@ -1,117 +0,0 @@
-dnl  PowerPC-64 mpn_sqr_diagonal.
-
-dnl  Copyright 2001-2003, 2005, 2006, 20010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C POWER3/PPC630		18
-C POWER4/PPC970		 ?
-C POWER5		 7.25
-C POWER6		 9.5
-
-C INPUT PARAMETERS
-define(`rp',  r3)
-define(`up',  r4)
-define(`n',   r5)
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
-ifdef(`HAVE_ABI_mode32',
-`	rldicl	n, n, 0, 32')		C zero extend n
-
-	rldicl.	r0, n, 0,62		C r0 = n & 3, set cr0
-	addi	n, n, 3			C compute count...
-	cmpdi	cr6, r0, 2
-	srdi	n, n, 2			C ...for ctr
-	mtctr	n			C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	beq	cr6, L(b10)
-
-L(b11):	ld	r0, 0(up)
-	ld	r10, 8(up)
-	ld	r12, 16(up)
-	addi	rp, rp, -16
-	mulld	r7, r0, r0
-	mulhdu	r8, r0, r0
-	mulld	r9, r10, r10
-	mulhdu	r10, r10, r10
-	mulld	r11, r12, r12
-	mulhdu	r12, r12, r12
-	addi	up, up, 24
-	b	L(11)
-
-	ALIGN(16)
-L(b01):	ld	r0, 0(up)
-	addi	rp, rp, -48
-	addi	up, up, 8
-	mulld	r11, r0, r0
-	mulhdu	r12, r0, r0
-	b	L(01)
-
-	ALIGN(16)
-L(b10):	ld	r0, 0(up)
-	ld	r12, 8(up)
-	addi	rp, rp, -32
-	addi	up, up, 16
-	mulld	r9, r0, r0
-	mulhdu	r10, r0, r0
-	mulld	r11, r12, r12
-	mulhdu	r12, r12, r12
-	b	L(10)
-
-	ALIGN(32)
-L(b00):
-L(top):	ld	r0, 0(up)
-	ld	r8, 8(up)
-	ld	r10, 16(up)
-	ld	r12, 24(up)
-	mulld	r5, r0, r0
-	mulhdu	r6, r0, r0
-	mulld	r7, r8, r8
-	mulhdu	r8, r8, r8
-	mulld	r9, r10, r10
-	mulhdu	r10, r10, r10
-	mulld	r11, r12, r12
-	mulhdu	r12, r12, r12
-	addi	up, up, 32
-	std	r5, 0(rp)
-	std	r6, 8(rp)
-L(11):	std	r7, 16(rp)
-	std	r8, 24(rp)
-L(10):	std	r9, 32(rp)
-	std	r10, 40(rp)
-L(01):	std	r11, 48(rp)
-	std	r12, 56(rp)
-	addi	rp, rp, 64
-	bdnz	L(top)
-
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode32/sub_n.asm b/gmp/mpn/powerpc64/mode32/sub_n.asm
index 6fdc1d4719..5bcc4a47b5 100644
--- a/gmp/mpn/powerpc64/mode32/sub_n.asm
+++ b/gmp/mpn/powerpc64/mode32/sub_n.asm
@@ -1,33 +1,22 @@
 dnl  PowerPC-64/mode32 mpn_sub_n -- Subtract two limb vectors of the same
 dnl  length and store difference in a third limb vector.
 
-dnl  Copyright 1999-2001, 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc64/mode32/submul_1.asm b/gmp/mpn/powerpc64/mode32/submul_1.asm
index 22601c417e..44ac326994 100644
--- a/gmp/mpn/powerpc64/mode32/submul_1.asm
+++ b/gmp/mpn/powerpc64/mode32/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
 dnl  the result from a second limb vector.
 
-dnl  Copyright 1999-2001, 2003, 2005 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc64/mode64/addlsh1_n.asm b/gmp/mpn/powerpc64/mode64/addlsh1_n.asm
new file mode 100644
index 0000000000..15182e1024
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/addlsh1_n.asm
@@ -0,0 +1,82 @@
+dnl  PowerPC-64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:     2		(1.5 c/l should be possible)
+C POWER4/PPC970:     4		(2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C vp	r5
+C n	r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_addlsh1_n)
+	mtctr	r6		C copy n in ctr
+	addic	r31, r31, 0	C clear cy
+
+	ld	v0, 0(vp)	C load v limb
+	ld	u0, 0(up)	C load u limb
+	addi	up, up, -8	C update up
+	addi	rp, rp, -8	C update rp
+	sldi	s1, v0, 1
+	bdz	L(end)		C If done, skip loop
+
+L(oop):	ld	v1, 8(vp)	C load v limb
+	adde	s1, s1, u0	C add limbs with cy, set cy
+	std	s1, 8(rp)	C store result limb
+	srdi	s0, v0, 63	C shift down previous v limb
+	ldu	u0, 16(up)	C load u limb and update up
+	rldimi	s0, v1, 1, 0	C left shift v limb and merge with prev v limb
+
+	bdz	L(exit)		C decrement ctr and exit if done
+
+	ldu	v0, 16(vp)	C load v limb and update vp
+	adde	s0, s0, u0	C add limbs with cy, set cy
+	stdu	s0, 16(rp)	C store result limb and update rp
+	srdi	s1, v1, 63	C shift down previous v limb
+	ld	u0, 8(up)	C load u limb
+	rldimi	s1, v0, 1, 0	C left shift v limb and merge with prev v limb
+
+	bdnz	L(oop)		C decrement ctr and loop back
+
+L(end):	adde	r7, s1, u0
+	std	r7, 8(rp)	C store last result limb
+	srdi	r3, v0, 63
+	addze	r3, r3
+	blr
+L(exit):	adde	r7, s0, u0
+	std	r7, 16(rp)	C store last result limb
+	srdi	r3, v1, 63
+	addze	r3, r3
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/addmul_1.asm b/gmp/mpn/powerpc64/mode64/addmul_1.asm
new file mode 100644
index 0000000000..cadab3adf8
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/addmul_1.asm
@@ -0,0 +1,185 @@
+dnl  PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl  the result to a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:     8
+C POWER5:            8
+
+C TODO
+C  * Reduce the number of registers used.  Some mul destination registers could
+C    be coalesced.
+C  * Delay std for preserving registers, and suppress them for n=1.
+C  * Write faster feed-in code.  If nothing else, avoid one or two up updates.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vl', `r6')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+	std	r31, -8(r1)
+	std	r30, -16(r1)
+	std	r29, -24(r1)
+	std	r28, -32(r1)
+	std	r27, -40(r1)
+	std	r26, -48(r1)
+
+	rldicl.	r0, n, 0,62	C r0 = n & 3, set cr0
+	cmpdi	cr6, r0, 2
+	addi	n, n, 3		C compute count...
+	srdi	n, n, 2		C ...for ctr
+	mtctr	n		C copy count into ctr
+	beq	cr0, L(b00)
+	blt	cr6, L(b01)
+	beq	cr6, L(b10)
+
+L(b11):	ld	r26, 0(up)
+	ld	r28, 0(rp)
+	addi	up, up, 8
+	nop
+	mulld	r0, r26, r6
+	mulhdu	r12, r26, r6
+	addc	r0, r0, r28
+	std	r0, 0(rp)
+	addi	rp, rp, 8
+	b	L(fic)
+
+L(b00):	ld	r26, 0(up)
+	ld	r27, 8(up)
+	ld	r28, 0(rp)
+	ld	r29, 8(rp)
+	addi	up, up, 16
+	nop
+	mulld	r0, r26, r6
+	mulhdu	r5, r26, r6
+	mulld	r7, r27, r6
+	mulhdu	r8, r27, r6
+	addc	r7, r7, r5
+	addze	r12, r8
+	addc	r0, r0, r28
+	std	r0, 0(rp)
+	adde	r7, r7, r29
+	std	r7, 8(rp)
+	addi	rp, rp, 16
+	b	L(fic)
+
+L(b01):	bdnz	L(gt1)
+	ld	r26, 0(up)
+	ld	r28, 0(rp)
+	mulld	r0, r26, r6
+	mulhdu	r8, r26, r6
+	addc	r0, r0, r28
+	std	r0, 0(rp)
+	b	L(ret)
+L(gt1):	ld	r26, 0(up)
+	ld	r27, 8(up)
+	mulld	r0, r26, r6
+	mulhdu	r5, r26, r6
+	ld	r26, 16(up)
+	ld	r28, 0(rp)
+	mulld	r7, r27, r6
+	mulhdu	r8, r27, r6
+	ld	r29, 8(rp)
+	ld	r30, 16(rp)
+	mulld	r9, r26, r6
+	mulhdu	r10, r26, r6
+	addc	r7, r7, r5
+	adde	r9, r9, r8
+	addze	r12, r10
+	addc	r0, r0, r28
+	std	r0, 0(rp)
+	adde	r7, r7, r29
+	std	r7, 8(rp)
+	adde	r9, r9, r30
+	std	r9, 16(rp)
+	addi	up, up, 24
+	addi	rp, rp, 24
+	b	L(fic)
+
+L(b10):	addic	r0, r0, 0
+	li	r12, 0		C cy_limb = 0
+L(fic):	ld	r26, 0(up)
+	ld	r27, 8(up)
+	addi	up, up, 16
+	bdz	L(end)
+				C registers dying
+L(top):	mulld	r0, r26, r6	C
+	mulhdu	r5, r26, r6	C 26
+	ld	r26, 0(up)	C
+	ld	r28, 0(rp)	C
+	mulld	r7, r27, r6	C
+	mulhdu	r8, r27, r6	C 27
+	ld	r27, 8(up)	C
+	ld	r29, 8(rp)	C
+	adde	r0, r0, r12	C 0 12
+	adde	r7, r7, r5	C 5 7
+	mulld	r9, r26, r6	C
+	mulhdu	r10, r26, r6	C 26
+	ld	r26, 16(up)	C
+	ld	r30, 16(rp)	C
+	mulld	r11, r27, r6	C
+	mulhdu	r12, r27, r6	C 27
+	ld	r27, 24(up)	C
+	ld	r31, 24(rp)	C
+	adde	r9, r9, r8	C 8 9
+	adde	r11, r11, r10	C 10 11
+	addze	r12, r12	C 12
+	addc	r0, r0, r28	C 0 28
+	std	r0, 0(rp)	C 0
+	adde	r7, r7, r29	C 7 29
+	std	r7, 8(rp)	C 7
+	adde	r9, r9, r30	C 9 30
+	std	r9, 16(rp)	C 9
+	adde	r11, r11, r31	C 11 31
+	std	r11, 24(rp)	C 11
+	addi	up, up, 32	C
+	addi	rp, rp, 32	C
+	bdnz	L(top)		C
+
+L(end):	mulld	r0, r26, r6
+	mulhdu	r5, r26, r6
+	ld	r28, 0(rp)
+	nop
+	mulld	r7, r27, r6
+	mulhdu	r8, r27, r6
+	ld	r29, 8(rp)
+	nop
+	adde	r0, r0, r12
+	adde	r7, r7, r5
+	addze	r8, r8
+	addc	r0, r0, r28
+	std	r0, 0(rp)
+	adde	r7, r7, r29
+	std	r7, 8(rp)
+L(ret):	addze	r3, r8
+	ld	r31, -8(r1)
+	ld	r30, -16(r1)
+	ld	r29, -24(r1)
+	ld	r28, -32(r1)
+	ld	r27, -40(r1)
+	ld	r26, -48(r1)
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/aors_n.asm b/gmp/mpn/powerpc64/mode64/aors_n.asm
index 0e8474fdcc..42b6d79472 100644
--- a/gmp/mpn/powerpc64/mode64/aors_n.asm
+++ b/gmp/mpn/powerpc64/mode64/aors_n.asm
@@ -1,41 +1,56 @@
 dnl  PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
 
-dnl  Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                   cycles/limb
-C POWER3/PPC630          1.5
-C POWER4/PPC970          2
-C POWER5                 2
-C POWER6                 2.63
-C POWER7               2.25-2.87
+C		cycles/limb
+C POWER3/PPC630:     1.5
+C POWER4/PPC970:     2
+
+C   n	   POWER3/PPC630   POWER4/PPC970
+C     1	       17.00	       19.00
+C     2		9.00	       10.49
+C     3		5.33		7.66
+C     4		4.50		5.14
+C     5		4.20		4.80
+C     6		3.83		4.33
+C     7		3.00		3.99
+C     8		2.87		3.55
+C     9		2.89		3.40
+C    10		2.60		3.42
+C    11		2.45		3.15
+C    12		2.41		2.99
+C    13		2.46		3.01
+C    14		2.42		2.97
+C    15		2.20		2.85
+C    50		1.78		2.44
+C   100		1.83		2.20
+C   200		1.55		2.12
+C   400		1.53		2.05
+C  1000		1.98		2.02#
+C  2000		1.50#		2.04
+C  4000		2.55		2.50
+C  8000		2.70		2.45
+C 16000		2.65		5.94
+C 32000		2.62	       16.41
+C 64000		2.73	       18.94
 
 C This code is a little bit slower for POWER3/PPC630 than the simple code used
 C previously, but it is much faster for POWER4/PPC970.  The reason for the
@@ -147,8 +162,7 @@ L(go):	ld	r6, 0(r4)	C load s1 limb
 	addi	r4, r4, 32
 	addi	r5, r5, 32
 
-	ALIGN(16)
-L(top):	ADDSUBC	r28, r7, r6
+L(oop):	ADDSUBC	r28, r7, r6
 	ld	r6, 0(r4)	C load s1 limb
 	ld	r7, 0(r5)	C load s2 limb
 	ADDSUBC	r29, r9, r8
@@ -167,7 +181,7 @@ L(top):	ADDSUBC	r28, r7, r6
 	std	r30, 16(r3)
 	std	r31, 24(r3)
 	addi	r3, r3, 32
-	bdnz	L(top)		C decrement ctr and loop back
+	bdnz	L(oop)		C decrement ctr and loop back
 
 L(end):	ADDSUBC	r28, r7, r6
 	ADDSUBC	r29, r9, r8
diff --git a/gmp/mpn/powerpc64/mode64/aorsmul_1.asm b/gmp/mpn/powerpc64/mode64/aorsmul_1.asm
deleted file mode 100644
index 0c12f9b660..0000000000
--- a/gmp/mpn/powerpc64/mode64/aorsmul_1.asm
+++ /dev/null
@@ -1,225 +0,0 @@
-dnl  PowerPC-64 mpn_addmul_1 and mpn_submul_1.
-
-dnl  Copyright 1999-2001, 2003-2006, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   mpn_addmul_1    mpn_submul_1
-C                   cycles/limb     cycles/limb
-C POWER3/PPC630		6-18		6-18
-C POWER4/PPC970		 8		 8.3
-C POWER5		 8		 8.25
-C POWER6		16.25		16.75
-C POWER7		 3.77		 4.9
-
-C TODO
-C  * Try to reduce the number of needed live registers
-C  * Add support for _1c entry points
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n',  `r5')
-define(`vl', `r6')
-
-ifdef(`OPERATION_addmul_1',`
-  define(ADDSUBC,	adde)
-  define(ADDSUB,	addc)
-  define(func,		mpn_addmul_1)
-  define(func_nc,	mpn_addmul_1c)	C FIXME: not really supported
-  define(SM,		`')
-')
-ifdef(`OPERATION_submul_1',`
-  define(ADDSUBC,	subfe)
-  define(ADDSUB,	subfc)
-  define(func,		mpn_submul_1)
-  define(func_nc,	mpn_submul_1c)	C FIXME: not really supported
-  define(SM,		`$1')
-')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-ASM_START()
-PROLOGUE(func)
-	std	r31, -8(r1)
-	rldicl.	r0, n, 0,62	C r0 = n & 3, set cr0
-	std	r30, -16(r1)
-	cmpdi	cr6, r0, 2
-	std	r29, -24(r1)
-	addi	n, n, 3		C compute count...
-	std	r28, -32(r1)
-	srdi	n, n, 2		C ...for ctr
-	std	r27, -40(r1)
-	mtctr	n		C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	beq	cr6, L(b10)
-
-L(b11):	ld	r9, 0(up)
-	ld	r28, 0(rp)
-	mulld	r0, r9, r6
-	mulhdu	r12, r9, r6
-	ADDSUB	r0, r0, r28
-	std	r0, 0(rp)
-	addi	rp, rp, 8
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	addi	up, up, 24
-SM(`	subfe	r11, r11, r11 ')
-	b	L(bot)
-
-	ALIGN(16)
-L(b00):	ld	r9, 0(up)
-	ld	r27, 8(up)
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	mulld	r0, r9, r6
-	mulhdu	r5, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	addc	r7, r7, r5
-	addze	r12, r8
-	ADDSUB	r0, r0, r28
-	std	r0, 0(rp)
-	ADDSUBC	r7, r7, r29
-	std	r7, 8(rp)
-	addi	rp, rp, 16
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	addi	up, up, 32
-SM(`	subfe	r11, r11, r11 ')
-	b	L(bot)
-
-	ALIGN(16)
-L(b01):	bdnz	L(gt1)
-	ld	r9, 0(up)
-	ld	r11, 0(rp)
-	mulld	r0, r9, r6
-	mulhdu	r8, r9, r6
-	ADDSUB	r0, r0, r11
-	std	r0, 0(rp)
-SM(`	subfe	r11, r11, r11 ')
-SM(`	addic	r11, r11, 1 ')
-	addze	r3, r8
-	blr
-L(gt1):	ld	r9, 0(up)
-	ld	r27, 8(up)
-	mulld	r0, r9, r6
-	mulhdu	r5, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 16(up)
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	ld	r30, 16(rp)
-	mulld	r11, r9, r6
-	mulhdu	r10, r9, r6
-	addc	r7, r7, r5
-	adde	r11, r11, r8
-	addze	r12, r10
-	ADDSUB	r0, r0, r28
-	std	r0, 0(rp)
-	ADDSUBC	r7, r7, r29
-	std	r7, 8(rp)
-	ADDSUBC	r11, r11, r30
-	std	r11, 16(rp)
-	addi	rp, rp, 24
-	ld	r9, 24(up)
-	ld	r27, 32(up)
-	addi	up, up, 40
-SM(`	subfe	r11, r11, r11 ')
-	b	L(bot)
-
-L(b10):	addic	r0, r0, 0
-	li	r12, 0		C cy_limb = 0
-	ld	r9, 0(up)
-	ld	r27, 8(up)
-	bdz	L(end)
-	addi	up, up, 16
-
-	ALIGN(16)
-L(top):	mulld	r0, r9, r6
-	mulhdu	r5, r9, r6	C 9
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6	C 27
-	ld	r9, 0(up)
-	ld	r28, 0(rp)
-	ld	r27, 8(up)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12	C 0 12
-	adde	r7, r7, r5	C 5 7
-	mulld	r5, r9, r6
-	mulhdu	r10, r9, r6	C 9
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6	C 27
-	ld	r9, 16(up)
-	ld	r30, 16(rp)
-	ld	r27, 24(up)
-	ld	r31, 24(rp)
-	adde	r5, r5, r8	C 8 5
-	adde	r11, r11, r10	C 10 11
-	addze	r12, r12	C 12
-	ADDSUB	r0, r0, r28	C 0 28
-	std	r0, 0(rp)	C 0
-	ADDSUBC	r7, r7, r29	C 7 29
-	std	r7, 8(rp)	C 7
-	ADDSUBC	r5, r5, r30	C 5 30
-	std	r5, 16(rp)	C 5
-	ADDSUBC	r11, r11, r31	C 11 31
-	std	r11, 24(rp)	C 11
-	addi	up, up, 32
-SM(`	subfe	r11, r11, r11 ')
-	addi	rp, rp, 32
-L(bot):
-SM(`	addic	r11, r11, 1 ')
-	bdnz	L(top)
-
-L(end):	mulld	r0, r9, r6
-	mulhdu	r5, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12
-	adde	r7, r7, r5
-	addze	r8, r8
-	ADDSUB	r0, r0, r28
-	std	r0, 0(rp)
-	ADDSUBC	r7, r7, r29
-	std	r7, 8(rp)
-SM(`	subfe	r11, r11, r11 ')
-SM(`	addic	r11, r11, 1 ')
-	addze	r3, r8
-	ld	r31, -8(r1)
-	ld	r30, -16(r1)
-	ld	r29, -24(r1)
-	ld	r28, -32(r1)
-	ld	r27, -40(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/aorsorrlsh1_n.asm b/gmp/mpn/powerpc64/mode64/aorsorrlsh1_n.asm
deleted file mode 100644
index 2c5400ab52..0000000000
--- a/gmp/mpn/powerpc64/mode64/aorsorrlsh1_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl  PowerPC-64 mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsblsh1_n.
-
-dnl  Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,		1)
-define(RSH,		63)
-
-ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
-
-include_mpn(`powerpc64/mode64/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/powerpc64/mode64/aorsorrlsh2_n.asm b/gmp/mpn/powerpc64/mode64/aorsorrlsh2_n.asm
deleted file mode 100644
index 447791abb0..0000000000
--- a/gmp/mpn/powerpc64/mode64/aorsorrlsh2_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl  PowerPC-64 mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n.
-
-dnl  Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,		2)
-define(RSH,		62)
-
-ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
-
-include_mpn(`powerpc64/mode64/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/powerpc64/mode64/aorsorrlshC_n.asm b/gmp/mpn/powerpc64/mode64/aorsorrlshC_n.asm
deleted file mode 100644
index 6158f541fc..0000000000
--- a/gmp/mpn/powerpc64/mode64/aorsorrlshC_n.asm
+++ /dev/null
@@ -1,187 +0,0 @@
-dnl  PowerPC-64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n.
-
-dnl  Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C                  cycles/limb
-C POWER3/PPC630          1.83   (1.5 c/l should be possible)
-C POWER4/PPC970          3      (2.0 c/l should be possible)
-C POWER5                 3
-C POWER6              3.5-47
-C POWER7                 3
-
-C STATUS
-C  * Try combining upx+up, and vpx+vp.
-C  * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is
-C    greater than the 2nd operand.  Yes, this addition is non-commutative wrt
-C    performance.
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`vp', `r5')
-define(`n',  `r6')
-
-ifdef(`DO_add', `
-  define(`ADDSUBC',	`addc	$1, $2, $3')
-  define(`ADDSUBE',	`adde	$1, $2, $3')
-  define(INITCY,	`addic	$1, r1, 0')
-  define(RETVAL,	`addze	r3, $1')
-  define(`func',	mpn_addlsh`'LSH`'_n)')
-ifdef(`DO_sub', `
-  define(`ADDSUBC',	`subfc	$1, $2, $3')
-  define(`ADDSUBE',	`subfe	$1, $2, $3')
-  define(INITCY,	`addic	$1, r1, -1')
-  define(RETVAL,	`subfze	r3, $1
-			neg	r3, r3')
-  define(`func',	mpn_sublsh`'LSH`'_n)')
-ifdef(`DO_rsb', `
-  define(`ADDSUBC',	`subfc	$1, $3, $2')
-  define(`ADDSUBE',	`subfe	$1, $3, $2')
-  define(INITCY,	`addic	$1, r1, -1')
-  define(RETVAL,	`addme	r3, $1')
-  define(`func',	mpn_rsblsh`'LSH`'_n)')
-
-define(`rpx', `r6')
-define(`upx', `r7')
-define(`vpx', `r12')
-
-define(`s0', `r0')  define(`s1', `r9')
-define(`u0', `r8')
-define(`v0', `r10') define(`v1', `r11')
-
-
-ASM_START()
-PROLOGUE(func)
-	cmpldi	cr0, n, 13
-	bgt	L(big)
-
-	mtctr	n		C copy n in ctr
-	INITCY(	r0)		C clear cy
-
-	ld	v0, 0(vp)	C load v limb
-	ld	u0, 0(up)	C load u limb
-	addi	up, up, -8	C update up
-	addi	rp, rp, -8	C update rp
-	sldi	s1, v0, LSH
-	bdz	L(ex1)		C If done, skip loop
-
-	ALIGN(16)
-L(lo0):	ld	v1, 8(vp)	C load v limb
-	ADDSUBE(s1, s1, u0)	C add limbs with cy, set cy
-	ldu	u0, 16(up)	C load u limb and update up
-	srdi	s0, v0, RSH	C shift down previous v limb
-	std	s1, 8(rp)	C store result limb
-	rldimi	s0, v1, LSH, 0	C left shift v limb and merge with prev v limb
-	bdz	L(ex0)		C decrement ctr and exit if done
-	ldu	v0, 16(vp)	C load v limb and update vp
-	ADDSUBE(s0, s0, u0)	C add limbs with cy, set cy
-	ld	u0, 8(up)	C load u limb
-	srdi	s1, v1, RSH	C shift down previous v limb
-	stdu	s0, 16(rp)	C store result limb and update rp
-	rldimi	s1, v0, LSH, 0	C left shift v limb and merge with prev v limb
-	bdnz	L(lo0)		C decrement ctr and loop back
-
-L(ex1):	ADDSUBE(r7, s1, u0)
-	std	r7, 8(rp)	C store last result limb
-	srdi	r0, v0, RSH
-	RETVAL(	r0)
-	blr
-L(ex0):	ADDSUBE(r7, s0, u0)
-	std	r7, 16(rp)	C store last result limb
-	srdi	r0, v1, RSH
-	RETVAL(	r0)
-	blr
-
-
-L(big):	rldicl.	r0, n, 0,63	C r0 = n & 1, set cr0
-	addi	r6, n, -1	C ...for ctr
-	srdi	r6, r6, 1	C ...for ctr
-	mtctr	r6		C copy count into ctr
-	beq	cr0, L(b0)
-
-L(b1):	ld	v1, 0(vp)
-	ld	u0, 0(up)
-	sldi	s1, v1, LSH
-	srdi	s0, v1, RSH
-	ld	v0, 8(vp)
-	ADDSUBC(s1, s1, u0)	C add limbs without cy, set cy
-	addi	rpx, rp, -16
-	addi	rp, rp, -8
-	sub	upx, up, rp
-	sub	vpx, vp, rp
-	sub	up, up, rpx
-	sub	vp, vp, rpx
-	addi	up, up, 8
-	addi	upx, upx, 16
-	addi	vp, vp, 16
-	addi	vpx, vpx, 24
-	b	L(mid)
-
-L(b0):	ld	v0, 0(vp)
-	ld	u0, 0(up)
-	sldi	s0, v0, LSH
-	srdi	s1, v0, RSH
-	ld	v1, 8(vp)
-	ADDSUBC(s0, s0, u0)	C add limbs without cy, set cy
-	addi	rpx, rp, -8
-	addi	rp, rp, -16
-	sub	upx, up, rpx
-	sub	vpx, vp, rpx
-	sub	up, up, rp
-	sub	vp, vp, rp
-	addi	up, up, 8
-	addi	upx, upx, 16
-	addi	vp, vp, 16
-	addi	vpx, vpx, 24
-
-	ALIGN(32)
-L(top):	ldx	u0, rp, up
-	ldx	v0, rp, vp
-	rldimi	s1, v1, LSH, 0
-	stdu	s0, 16(rp)
-	srdi	s0, v1, RSH
-	ADDSUBE(s1, s1, u0)	C add limbs with cy, set cy
-L(mid):	ldx	u0, rpx, upx
-	ldx	v1, rpx, vpx
-	rldimi	s0, v0, LSH, 0
-	stdu	s1, 16(rpx)
-	srdi	s1, v0, RSH
-	ADDSUBE(s0, s0, u0)	C add limbs with cy, set cy
-	bdnz	L(top)		C decrement CTR and loop back
-
-	ldx	u0, rp, up
-	rldimi	s1, v1, LSH, 0
-	std	s0, 16(rp)
-	srdi	s0, v1, RSH
-	ADDSUBE(s1, s1, u0)	C add limbs with cy, set cy
-	std	s1, 24(rp)
-
-	RETVAL(	s0)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/bdiv_dbm1c.asm b/gmp/mpn/powerpc64/mode64/bdiv_dbm1c.asm
index 45cded9715..8c1e87e1ee 100644
--- a/gmp/mpn/powerpc64/mode64/bdiv_dbm1c.asm
+++ b/gmp/mpn/powerpc64/mode64/bdiv_dbm1c.asm
@@ -1,41 +1,28 @@
 dnl  PPC64 mpn_bdiv_dbm1c.
 
-dnl  Copyright 2008, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                 cycles/limb
-C POWER3/PPC630       6-18
-C POWER4/PPC970       8.25
-C POWER5              8.5  fluctuating as function of n % 3
-C POWER6             15
-C POWER7              4.75
+C		cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:    8.5
+C POWER5:           ?
 
 C TODO
 C  * Nothing to do...
@@ -61,7 +48,6 @@ PROLOGUE(mpn_bdiv_dbm1c)
 	blt	cr6, L(b01)
 	beq	cr6, L(b10)
 
-	ALIGN(16)
 L(b11):	mulld	r5, r0, r6
 	mulhdu	r12, r0, r6
 	ld	r0, 8(r4)
@@ -69,14 +55,13 @@ L(b11):	mulld	r5, r0, r6
 	addi	r3, r3, -24
 	b	L(3)
 
-	ALIGN(16)
 L(b00):	mulld	r9, r0, r6
 	mulhdu	r8, r0, r6
+	ld	r0, 8(r4)
 	addi	r4, r4, -16
 	addi	r3, r3, -16
 	b	L(0)
 
-	ALIGN(16)
 L(b01):	mulld	r5, r0, r6
 	mulhdu	r12, r0, r6
 	addi	r3, r3, -8
@@ -85,43 +70,42 @@ L(b01):	mulld	r5, r0, r6
 	addi	r4, r4, -8
 	b	L(1)
 
-	ALIGN(16)
 L(b10):	mulld	r9, r0, r6
 	mulhdu	r8, r0, r6
+	ld	r0, 8(r4)
 	ble	cr7, L(e2)
 
 	ALIGN(16)
-L(top):	subfc	r11, r9, r7
-	ld	r10, 8(r4)
+L(top):	mulld	r5, r0, r6
+	mulhdu	r12, r0, r6
+	subfc	r11, r9, r7
 	ld	r0, 16(r4)
 	subfe	r7, r8, r11
 	std	r11, 0(r3)
-	mulld	r5, r10, r6
-	mulhdu	r12, r10, r6
 L(1):	mulld	r9, r0, r6
 	mulhdu	r8, r0, r6
 	subfc	r11, r5, r7
+	ld	r0, 24(r4)
 	subfe	r7, r12, r11
 	std	r11, 8(r3)
-L(0):	subfc	r11, r9, r7
-	ld	r10, 24(r4)
+L(0):	mulld	r5, r0, r6
+	mulhdu	r12, r0, r6
+	subfc	r11, r9, r7
 	ld	r0, 32(r4)
 	subfe	r7, r8, r11
 	std	r11, 16(r3)
-	mulld	r5, r10, r6
-	mulhdu	r12, r10, r6
 L(3):	mulld	r9, r0, r6
 	mulhdu	r8, r0, r6
 	subfc	r11, r5, r7
+	ld	r0, 40(r4)
 	subfe	r7, r12, r11
 	std	r11, 24(r3)
 	addi	r4, r4, 32
 	addi	r3, r3, 32
 	bdnz	L(top)
 
-L(e2):	ld	r10, 8(r4)
-	mulld	r5, r10, r6
-	mulhdu	r12, r10, r6
+L(e2):	mulld	r5, r0, r6
+	mulhdu	r12, r0, r6
 	subfc	r11, r9, r7
 	subfe	r7, r8, r11
 	std	r11, 0(r3)
diff --git a/gmp/mpn/powerpc64/mode64/cnd_aors_n.asm b/gmp/mpn/powerpc64/mode64/cnd_aors_n.asm
deleted file mode 100644
index 24968c1912..0000000000
--- a/gmp/mpn/powerpc64/mode64/cnd_aors_n.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-dnl  PowerPC-64 mpn_cnd_add_n/mpn_cnd_sub_n.
-
-dnl  Copyright 1999-2001, 2003-2005, 2007, 2011, 2012 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          2.25
-C POWER5                 ?
-C POWER6                 3
-C POWER7                 2
-
-C INPUT PARAMETERS
-define(`cnd',  `r3')
-define(`rp',   `r4')
-define(`up',   `r5')
-define(`vp',   `r6')
-define(`n',    `r7')
-
-ifdef(`OPERATION_cnd_add_n',`
-  define(ADDSUBC,	adde)
-  define(ADDSUB,	addc)
-  define(func,		mpn_cnd_add_n)
-  define(GENRVAL,	`addi	r3, r3, 1')
-  define(SETCBR,	`addic	r0, $1, -1')
-  define(CLRCB,		`addic	r0, r0, 0')
-')
-ifdef(`OPERATION_cnd_sub_n',`
-  define(ADDSUBC,	subfe)
-  define(ADDSUB,	subfc)
-  define(func,		mpn_cnd_sub_n)
-  define(GENRVAL,	`neg	r3, r3')
-  define(SETCBR,	`subfic	r0, $1, 0')
-  define(CLRCB,		`addic	r0, r1, -1')
-')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	std	r29, -24(r1)
-	std	r28, -32(r1)
-	std	r27, -40(r1)
-
-	subfic	cnd, cnd, 0
-	subfe	cnd, cnd, cnd
-
-	rldicl.	r0, n, 0,62	C r0 = n & 3, set cr0
-	cmpdi	cr6, r0, 2
-	addi	n, n, 3	C compute count...
-	srdi	n, n, 2	C ...for ctr
-	mtctr	n		C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	beq	cr6, L(b10)
-
-L(b11):	ld	r8, 0(up)	C load s1 limb
-	ld	r9, 0(vp)	C load s2 limb
-	ld	r10, 8(up)	C load s1 limb
-	ld	r11, 8(vp)	C load s2 limb
-	ld	r12, 16(up)	C load s1 limb
-	addi	up, up, 24
-	ld	r0, 16(vp)	C load s2 limb
-	addi	vp, vp, 24
-	and	r9, r9, cnd
-	and	r11, r11, cnd
-	and	r0, r0, cnd
-	ADDSUB	r29, r9, r8
-	ADDSUBC	r30, r11, r10
-	ADDSUBC	r31, r0, r12
-	std	r29, 0(rp)
-	std	r30, 8(rp)
-	std	r31, 16(rp)
-	addi	rp, rp, 24
-	bdnz	L(go)
-	b	L(ret)
-
-L(b01):	ld	r12, 0(up)	C load s1 limb
-	addi	up, up, 8
-	ld	r0, 0(vp)	C load s2 limb
-	addi	vp, vp, 8
-	and	r0, r0, cnd
-	ADDSUB	r31, r0, r12	C add
-	std	r31, 0(rp)
-	addi	rp, rp, 8
-	bdnz	L(go)
-	b	L(ret)
-
-L(b10):	ld	r10, 0(up)	C load s1 limb
-	ld	r11, 0(vp)	C load s2 limb
-	ld	r12, 8(up)	C load s1 limb
-	addi	up, up, 16
-	ld	r0, 8(vp)	C load s2 limb
-	addi	vp, vp, 16
-	and	r11, r11, cnd
-	and	r0, r0, cnd
-	ADDSUB	r30, r11, r10	C add
-	ADDSUBC	r31, r0, r12	C add
-	std	r30, 0(rp)
-	std	r31, 8(rp)
-	addi	rp, rp, 16
-	bdnz	L(go)
-	b	L(ret)
-
-L(b00):	CLRCB			C clear/set cy
-L(go):	ld	r7, 0(up)	C load s1 limb
-	ld	r27, 0(vp)	C load s2 limb
-	ld	r8, 8(up)	C load s1 limb
-	ld	r9, 8(vp)	C load s2 limb
-	ld	r10, 16(up)	C load s1 limb
-	ld	r11, 16(vp)	C load s2 limb
-	ld	r12, 24(up)	C load s1 limb
-	ld	r0, 24(vp)	C load s2 limb
-	and	r27, r27, cnd
-	and	r9, r9, cnd
-	and	r11, r11, cnd
-	and	r0, r0, cnd
-	bdz	L(end)
-
-	addi	up, up, 32
-	addi	vp, vp, 32
-
-L(top):	ADDSUBC	r28, r27, r7
-	ld	r7, 0(up)	C load s1 limb
-	ld	r27, 0(vp)	C load s2 limb
-	ADDSUBC	r29, r9, r8
-	ld	r8, 8(up)	C load s1 limb
-	ld	r9, 8(vp)	C load s2 limb
-	ADDSUBC	r30, r11, r10
-	ld	r10, 16(up)	C load s1 limb
-	ld	r11, 16(vp)	C load s2 limb
-	ADDSUBC	r31, r0, r12
-	ld	r12, 24(up)	C load s1 limb
-	ld	r0, 24(vp)	C load s2 limb
-	std	r28, 0(rp)
-	addi	up, up, 32
-	std	r29, 8(rp)
-	addi	vp, vp, 32
-	std	r30, 16(rp)
-	std	r31, 24(rp)
-	addi	rp, rp, 32
-	and	r27, r27, cnd
-	and	r9, r9, cnd
-	and	r11, r11, cnd
-	and	r0, r0, cnd
-	bdnz	L(top)		C decrement ctr and loop back
-
-L(end):	ADDSUBC	r28, r27, r7
-	ADDSUBC	r29, r9, r8
-	ADDSUBC	r30, r11, r10
-	ADDSUBC	r31, r0, r12
-	std	r28, 0(rp)
-	std	r29, 8(rp)
-	std	r30, 16(rp)
-	std	r31, 24(rp)
-
-L(ret):	ld	r31, -8(r1)
-	ld	r30, -16(r1)
-	ld	r29, -24(r1)
-	ld	r28, -32(r1)
-	ld	r27, -40(r1)
-
-	subfe	r3, r0, r0	C -cy
-	GENRVAL
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/dive_1.asm b/gmp/mpn/powerpc64/mode64/dive_1.asm
index 434dde9145..a4a06da26c 100644
--- a/gmp/mpn/powerpc64/mode64/dive_1.asm
+++ b/gmp/mpn/powerpc64/mode64/dive_1.asm
@@ -1,45 +1,32 @@
 dnl  PowerPC-64 mpn_divexact_1 -- mpn by limb exact division.
 
-dnl  Copyright 2006, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                       cycles/limb
-C                       norm    unorm
-C POWER3/PPC630        13-19
-C POWER4/PPC970         16
-C POWER5                16      16
-C POWER6                37      46
-C POWER7                12      12
+C		cycles/limb
+C POWER3/PPC630:    13-19
+C POWER4/PPC970:     16
+C POWER5:	     16
 
 C TODO
-C  * Check if n=1 code is really an improvement.  It probably isn't.
+C  * Check if n=1 code is really an improvment.  It probably isn't.
+C  * Perhaps remove L(norm) code, it is currently unreachable.
 C  * Make more similar to mode1o.asm.
 
 C INPUT PARAMETERS
@@ -53,7 +40,7 @@ ASM_START()
 
 EXTERN(binvert_limb_table)
 
-PROLOGUE(mpn_divexact_1,toc)
+PROLOGUE(mpn_divexact_1)
 	addic.	n, n, -1
 	ld	r12, 0(up)
 	bne	cr0, L(2)
@@ -74,6 +61,7 @@ L(7):
 	mtctr	n
 	LEA(	r5, binvert_limb_table)
 	rldicl	r11, d, 63, 57
+C	cmpdi	cr7, r0, 0
 	lbzx	r0, r5, r11
 	mulld	r9, r0, r0
 	sldi	r0, r0, 1
@@ -87,27 +75,26 @@ L(7):
 	sldi	r0, r0, 1
 	mulld	r9, d, r9
 	subf	r7, r9, r0		C r7 = 1/d mod 2^64
-	bne	cr0, L(norm)
+C	beq	cr7, L(norm)
 	subfic	r8, r10, 64		C set carry as side effect
 	li	r5, 0
-	srd	r11, r12, r10
 
 	ALIGN(16)
 L(loop0):
+	srd	r11, r12, r10
 	ld	r12, 8(up)
-	nop
 	addi	up, up, 8
 	sld	r0, r12, r8
 	or	r11, r11, r0
 	subfe	r9, r5, r11
-	srd	r11, r12, r10
 	mulld	r0, r7, r9
-	mulhdu	r5, r0, d
 	std	r0, 0(rp)
 	addi	rp, rp, 8
+	mulhdu	r5, r0, d
 	bdnz	L(loop0)
 
-	subfe	r0, r5, r11
+	srd	r0, r12, r10
+	subfe	r0, r5, r0
 	mulld	r0, r7, r0
 	std	r0, 0(rp)
 	blr
@@ -115,15 +102,14 @@ L(loop0):
 	ALIGN(16)
 L(norm):
 	mulld	r11, r12, r7
-	mulhdu	r5, r11, d
 	std	r11, 0(rp)
 	ALIGN(16)
 L(loop1):
+	mulhdu	r5, r11, d
 	ld	r9, 8(up)
 	addi	up, up, 8
 	subfe	r5, r5, r9
 	mulld	r11, r7, r5
-	mulhdu	r5, r11, d	C result not used
 	std	r11, 8(rp)
 	addi	rp, rp, 8
 	bdnz	L(loop1)
diff --git a/gmp/mpn/powerpc64/mode64/diveby3.asm b/gmp/mpn/powerpc64/mode64/diveby3.asm
new file mode 100644
index 0000000000..d96f775d71
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/diveby3.asm
@@ -0,0 +1,83 @@
+dnl  PowerPC-64 mpn_divexact_by3 -- mpn by 3 exact division
+
+dnl  Copyright 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:     13
+C POWER4/PPC970:     13
+C POWER5:	     13
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cy', `r6')
+
+define(`xAAAAAAAB',`r7')
+define(`xAAAAAAAA',  `r9')
+define(`q', `r10')
+define(`ul', `r11')
+define(`one',  `r12')
+
+
+ASM_START()
+PROLOGUE(mpn_divexact_by3c)
+
+	mtctr	r5
+	li	r7, -0x5556		C 0xFFFFFFFFFFFFAAAA
+	ld	ul, 0(up)
+	rldimi	r7, r7, 16, 32		C 0xFFFFFFFFAAAAAAAA
+	rldimi	r7, r7, 32, 63		C 0xAAAAAAAAAAAAAAAB = 1/3
+
+	addi	r9, r7, -1		C 0xAAAAAAAAAAAAAAAA
+	li	one, 1
+
+	subfc	ul, cy, ul		C  C = (cy <= up[0])
+	subfe	cy, r1, r1		C  cy = -(cy > up[0])
+	bdz	L(end)
+
+	ALIGN(16)
+L(top):	mulld	q, ul, xAAAAAAAB
+
+	ld	ul, 8(up)
+	addi	up, up, 8
+	addc	r0, xAAAAAAAA, q	C set C flag if q >= 0x5555...56
+
+	subfe	cy, cy, one		C cy = 1-cy-1+C
+	subfc	r0, q, xAAAAAAAA	C set C flag if q < 0xAAAA...AA
+
+	subfe	ul, cy, ul		C ul = ul-cy-1+C
+	std	q, 0(rp)
+	addi	rp, rp, 8
+
+	subfe	cy, r1, r1
+	bdnz	L(top)
+
+L(end):	mulld	q, ul, xAAAAAAAB
+	addc	r0, xAAAAAAAA, q
+
+	subfe	cy, cy, one
+	subfc	r0, q, xAAAAAAAA
+
+	std	q, 0(rp)
+	subfe	r3, r1, r1
+	subf	r3, r3, cy
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/divrem_1.asm b/gmp/mpn/powerpc64/mode64/divrem_1.asm
index b283877006..895badfe61 100644
--- a/gmp/mpn/powerpc64/mode64/divrem_1.asm
+++ b/gmp/mpn/powerpc64/mode64/divrem_1.asm
@@ -1,42 +1,29 @@
 dnl  PowerPC-64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
 
-dnl  Copyright 2003-2005, 2007, 2008, 2010, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                           cycles/limb
-C                       norm    unorm   frac
-C POWER3/PPC630         16-34   16-34   ~11   outdated figures
-C POWER4/PPC970          28      28      19
-C POWER5                 29      29     ~19
-C POWER6                 49      59     ~42
-C POWER7                 24.5    23     ~14
+C			    cycles/limb
+C			norm	unorm	frac
+C POWER3/PPC630		16-34	16-34	~11
+C POWER4/PPC970		 29		 19
+C POWER5		 29	 29	~20
 
 C INPUT PARAMETERS
 C qp  = r3
@@ -56,7 +43,7 @@ ASM_START()
 
 EXTERN_FUNC(mpn_invert_limb)
 
-PROLOGUE(mpn_divrem_1,toc)
+PROLOGUE(mpn_divrem_1)
 
 	mfcr	r12
 	add.	r10, r6, r4
@@ -107,6 +94,7 @@ L(71):
 	sld	r31, r31, r27
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
+	nop
 	beq-	cr4, L(110)
 	sldi	r9, r28, 3
 	addic.	r6, r28, -2
@@ -122,23 +110,23 @@ L(71):
 	sldi	r6, r6, 3
 	ALIGN(16)
 L(uloop):
+	addi	r11, r31, 1
 	ldx	r8, r26, r6
-	nop
 	mulld	r0, r31, r3
 	mulhdu	r10, r31, r3
-	addi	r11, r31, 1
-	srd	r9, r8, r5
 	addi	r6, r6, -8
+	srd	r9, r8, r5
 	or	r9, r7, r9
 	addc	r0, r0, r9
 	adde	r10, r10, r11
 	mulld	r31, r10, r30
 	subf	r31, r31, r9
-	subfc	r0, r31, r0	C r <= ql
-	subfe	r0, r0, r0	C r0 = -(r <= ql)
-	and	r9, r30, r0
-	add	r31, r31, r9
-	add	r10, r0, r10	C qh -= (r >= ql)
+	subfc	r0, r0, r31	C r >= ql
+	subfe	r0, r0, r0	C r0 = -(r >= ql)
+	not	r7, r0
+	add	r10, r7, r10	C qh -= (r >= ql)
+	andc	r0, r30, r0
+	add	r31, r31, r0
 	cmpld	cr7, r31, r30
 	bge-	cr7, L(164)
 L(123):
@@ -175,19 +163,19 @@ L(110):
 L(ufloop):
 	addi	r11, r31, 1
 	nop
-	mulld	r0, r3, r31
+	mulld	r7, r3, r31
 	mulhdu	r10, r3, r31
 	add	r10, r10, r11
 	mulld	r31, r9, r10
 ifelse(0,1,`
-	subfc	r0, r0, r31
+	subfc	r0, r7, r31
 	subfe	r0, r0, r0	C r0 = -(r >= ql)
 	not	r7, r0
 	add	r10, r7, r10	C qh -= (r >= ql)
 	andc	r0, r30, r0
 	add	r31, r31, r0
 ',`
-	cmpld	cr7, r31, r0
+	cmpld	cr7, r31, r7
 	blt	cr7, L(29)
 	add	r31, r30, r31
 	addi	r10, r10, -1
@@ -228,11 +216,12 @@ L(162):
 	and	r0, r0, r7
 	subf	r31, r0, r31
 L(8):
+L(10):
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
-	li	r27, 0
+	nop
 	addic.	r6, r28, -1
-	blt-	cr0, L(110)
+	blt-	cr0, L(150)
 	mtctr	r28
 	sldi	r6, r6, 3
 	ALIGN(16)
@@ -240,25 +229,70 @@ L(nloop):
 	addi	r11, r31, 1
 	ldx	r8, r26, r6
 	mulld	r0, r31, r3
-	mulhdu	r10, r31, r3
 	addi	r6, r6, -8
-	addc	r0, r0, r8
+	mulhdu	r10, r31, r3
+	addc	r7, r0, r8
 	adde	r10, r10, r11
 	mulld	r31, r10, r30
 	subf	r31, r31, r8	C r = nl - qh * d
-	subfc	r0, r31, r0	C r <= ql
-	subfe	r0, r0, r0	C r0 = -(r <= ql)
-	and	r9, r30, r0
-	add	r31, r31, r9
-	add	r10, r0, r10	C qh -= (r >= ql)
+	subfc	r0, r7, r31	C r >= ql
+	subfe	r0, r0, r0	C r0 = -(r >= ql)
+	not	r7, r0
+	add	r10, r7, r10	C qh -= (r >= ql)
+	andc	r0, r30, r0
+	add	r31, r31, r0
 	cmpld	cr7, r31, r30
 	bge-	cr7, L(167)
 L(51):
 	std	r10, 0(r29)
 	addi	r29, r29, -8
 	bdnz	L(nloop)
-	b	L(110)
 
+L(150):
+	addic.	r9, r25, -1
+	blt-	cr0, L(152)
+	mtctr	r25
+	neg	r9, r30
+	ALIGN(16)
+L(nfloop):
+	addi	r11, r31, 1
+	nop
+	mulld	r7, r3, r31
+	mulhdu	r10, r3, r31
+	add	r10, r10, r11
+	mulld	r31, r9, r10
+ifelse(0,1,`
+	subfc	r0, r7, r31
+	subfe	r0, r0, r0	C r0 = -(r >= ql)
+	not	r7, r0
+	add	r10, r7, r10	C qh -= (r >= ql)
+	andc	r0, r30, r0
+	add	r31, r31, r0
+',`
+	cmpld	cr7, r31, r7
+	blt	cr7, L(28)
+	add	r31, r30, r31
+	addi	r10, r10, -1
+L(28):
+')
+	std	r10, 0(r29)
+	addi	r29, r29, -8
+	bdnz	L(nfloop)
+L(152):
+	addi	r1, r1, 176
+	mr	r3, r31
+	ld	r0, 16(r1)
+	lwz	r12, 8(r1)
+	mtlr	r0
+	ld	r25, -56(r1)
+	ld	r26, -48(r1)
+	mtcrf	8, r12
+	ld	r27, -40(r1)
+	ld	r28, -32(r1)
+	ld	r29, -24(r1)
+	ld	r30, -16(r1)
+	ld	r31, -8(r1)
+	blr
 L(164):
 	subf	r31, r30, r31
 	addi	r10, r10, 1
diff --git a/gmp/mpn/powerpc64/mode64/divrem_2.asm b/gmp/mpn/powerpc64/mode64/divrem_2.asm
index 73ec23c94d..369b5c1f1d 100644
--- a/gmp/mpn/powerpc64/mode64/divrem_2.asm
+++ b/gmp/mpn/powerpc64/mode64/divrem_2.asm
@@ -3,40 +3,30 @@ dnl  PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
 dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                       cycles/limb
-C                       norm    frac
+C			cycles/limb
+C			norm	frac
 C POWER3/PPC630
-C POWER4/PPC970         ?       ?
-C POWER5                37      ?
-C POWER6                62      ?
-C POWER6                30.5    ?
+C POWER4/PPC970		39*	39*
+C POWER5		39*	39*
+
+C STATUS
+C  * Performace fluctuates like crazy
 
 C INPUT PARAMETERS
 C qp  = r3
@@ -53,7 +43,7 @@ ASM_START()
 
 EXTERN_FUNC(mpn_invert_limb)
 
-PROLOGUE(mpn_divrem_2,toc)
+PROLOGUE(mpn_divrem_2)
 	mflr	r0
 	std	r23, -72(r1)
 	std	r24, -64(r1)
@@ -107,6 +97,7 @@ L(8):
 	blt	cr0, L(18)
 	mr	r3, r30
 	CALL(	mpn_invert_limb)
+	nop
 	mulld	r10, r3, r30
 	mulhdu	r0, r3, r28
 	addc	r8, r10, r28
@@ -130,12 +121,12 @@ L(loop):
 	mulld	r6, r29, r3
 	addc	r6, r6, r31
 	adde	r8, r8, r29
-	cmpd	cr7, r27, r25
 	mulld	r0, r30, r8
+	subf	r31, r0, r31
 	mulhdu	r11, r28, r8
 	mulld	r10, r28, r8
-	subf	r31, r0, r31
 	li	r7, 0
+	cmpd	cr7, r27, r25
 	blt	cr7, L(60)
 	ld	r7, 0(r26)
 	addi	r26, r26, -8
diff --git a/gmp/mpn/powerpc64/mode64/gcd_1.asm b/gmp/mpn/powerpc64/mode64/gcd_1.asm
deleted file mode 100644
index 8762bbbef5..0000000000
--- a/gmp/mpn/powerpc64/mode64/gcd_1.asm
+++ /dev/null
@@ -1,122 +0,0 @@
-dnl  PowerPC-64 mpn_gcd_1.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/bit (approx)
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 8.5
-C POWER5		 ?
-C POWER6		10.1
-C POWER7		 9.4
-C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
-
-C INPUT PARAMETERS
-define(`up',    `r3')
-define(`n',     `r4')
-define(`v0',    `r5')
-
-EXTERN_FUNC(mpn_mod_1)
-EXTERN_FUNC(mpn_modexact_1c_odd)
-
-ASM_START()
-PROLOGUE(mpn_gcd_1,toc)
-	mflr	r0
-	std	r30, -16(r1)
-	std	r31, -8(r1)
-	std	r0, 16(r1)
-	stdu	r1, -128(r1)
-
-	ld	r7, 0(up)		C U low limb
-	or	r0, r5, r7		C x | y
-
-	neg	r6, r0
-	and	r6, r6, r0
-	cntlzd	r31, r6			C common twos
-	subfic	r31, r31, 63
-
-	neg	r6, r5
-	and	r6, r6, r5
-	cntlzd	r8, r6
-	subfic	r8, r8, 63
-	srd	r5, r5, r8
-	mr	r30, r5			C v0 saved
-
-	cmpdi	r4, BMOD_1_TO_MOD_1_THRESHOLD
-	blt	L(bmod)
-	CALL(	mpn_mod_1)
-	b	L(reduced)
-L(bmod):
-	li	r6, 0
-	CALL(	mpn_modexact_1c_odd)
-L(reduced):
-
-define(`mask', `r0')dnl
-define(`a1',   `r4')dnl
-define(`a2',   `r5')dnl
-define(`d1',   `r6')dnl
-define(`d2',   `r7')dnl
-define(`cnt',  `r9')dnl
-
-	neg.	r6, r3
-	and	r6, r6, r3
-	cntlzd	cnt, r6
-	subfic	cnt, cnt, 63
-	li	r12, 63
-	bne	L(mid)
-	b	L(end)
-
-	ALIGN(16)
-L(top):
-	and	a1, r10, mask		C d - a
-	andc	a2, r11,  mask		C a - d
-	and	d1, r3, mask		C a
-	andc	d2, r30, mask		C d
-	or	r3, a1, a2		C new a
-	subf	cnt, cnt, r12
-	or	r30, d1, d2		C new d
-L(mid):	srd	r3, r3, cnt
-	sub.	r10, r30, r3		C r10 = d - a
-	subc	r11, r3, r30		C r11 = a - d
-	neg	r8, r10
-	and	r8, r8, r10
-	subfe	mask, mask, mask
-	cntlzd	cnt, r8
-	bne	L(top)
-
-L(end):	sld	r3, r30, r31
-
-	addi	r1, r1, 128
-	ld	r0, 16(r1)
-	ld	r30, -16(r1)
-	ld	r31, -8(r1)
-	mtlr	r0
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/gmp-mparam.h b/gmp/mpn/powerpc64/mode64/gmp-mparam.h
index f8305f4720..4eb8887724 100644
--- a/gmp/mpn/powerpc64/mode64/gmp-mparam.h
+++ b/gmp/mpn/powerpc64/mode64/gmp-mparam.h
@@ -5,73 +5,62 @@ Copyright 2008, 2009 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 /* 1600MHz PPC970 */
 
 /* Generated by tuneup.c, 2009-01-14, gcc 4.0 */
 
-#define MUL_TOOM22_THRESHOLD             14
-#define MUL_TOOM33_THRESHOLD             93
-#define MUL_TOOM44_THRESHOLD            135
+#define MUL_KARATSUBA_THRESHOLD          14
+#define MUL_TOOM3_THRESHOLD              57
+#define MUL_TOOM44_THRESHOLD            155
 
-#define SQR_BASECASE_THRESHOLD            6
-#define SQR_TOOM2_THRESHOLD              32
-#define SQR_TOOM3_THRESHOLD              74
-#define SQR_TOOM4_THRESHOLD             136
+#define SQR_BASECASE_THRESHOLD            5
+#define SQR_KARATSUBA_THRESHOLD          32
+#define SQR_TOOM3_THRESHOLD              89
+#define SQR_TOOM4_THRESHOLD             154
 
-#define MULLO_BASECASE_THRESHOLD          0  /* always */
-#define MULLO_DC_THRESHOLD               44
-#define MULLO_MUL_N_THRESHOLD           234
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              40
+#define MULLOW_MUL_N_THRESHOLD          234
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* always */
-#define DIV_DC_THRESHOLD                 33
-#define POWM_THRESHOLD                   89
+#define DIV_DC_THRESHOLD                 32
+#define POWM_THRESHOLD                   93
 
-#define MATRIX22_STRASSEN_THRESHOLD      15
-#define HGCD_THRESHOLD                   93
-#define GCD_DC_THRESHOLD                237
-#define GCDEXT_DC_THRESHOLD             273
+#define MATRIX22_STRASSEN_THRESHOLD      19
+#define HGCD_THRESHOLD                   96
+#define GCD_DC_THRESHOLD                242
+#define GCDEXT_DC_THRESHOLD             353
 #define JACOBI_BASE_METHOD                1
 
 #define MOD_1_NORM_THRESHOLD              0  /* always */
 #define MOD_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1_THRESHOLD                 6
+#define MOD_1_1_THRESHOLD                 7
 #define MOD_1_2_THRESHOLD                 9
-#define MOD_1_4_THRESHOLD                23
+#define MOD_1_4_THRESHOLD                44
 #define USE_PREINV_DIVREM_1               0
-#define USE_PREINV_MOD_1                  0
+#define USE_PREINV_MOD_1                  1
 #define DIVEXACT_1_THRESHOLD              0  /* always (native) */
-#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
 
-#define GET_STR_DC_THRESHOLD             12
-#define GET_STR_PRECOMPUTE_THRESHOLD     24
-#define SET_STR_DC_THRESHOLD            650
-#define SET_STR_PRECOMPUTE_THRESHOLD   1713
+#define GET_STR_DC_THRESHOLD             10
+#define GET_STR_PRECOMPUTE_THRESHOLD     20
+#define SET_STR_DC_THRESHOLD            532
+#define SET_STR_PRECOMPUTE_THRESHOLD   1790
 
 #define MUL_FFT_TABLE  { 336, 672, 1856, 2816, 7168, 20480, 81920, 327680, 0 }
 #define MUL_FFT_MODF_THRESHOLD          304
@@ -80,3 +69,9 @@ see https://www.gnu.org/licenses/.  */
 #define SQR_FFT_TABLE  { 272, 672, 1600, 2816, 7168, 20480, 81920, 327680, 786432, 0 }
 #define SQR_FFT_MODF_THRESHOLD          272
 #define SQR_FFT_THRESHOLD              2688
+
+/* These tables are now obsolete */
+
+#define MUL_FFT_TABLE2 {{1,4}, {209,5}, {513,6}, {1217,7}, {2561,8}, {3329,7}, {3457,8}, {3841,7}, {4097,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {22017,10}, {23553,9}, {26113,11}, {30721,10}, {48129,9}, {50689,10}, {56321,11}, {61441,10}, {81409,11}, {96257,10}, {97281,9}, {98305,10}, {99329,9}, {101889,10}, {106241,12}, {126977,11}, {129025,10}, {146945,11}, {161793,10}, {179969,11}, {194561,10}, {212737,11}, {227329,10}, {228865,12}, {258049,11}, {359937,12}, {389121,11}, {458241,13}, {516097,12}, {520193,11}, {588801,12}, {651265,11}, {720385,12}, {782337,11}, {851457,12}, {913409,11}, {982529,12}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {209,5}, {481,6}, {1089,7}, {3073,8}, {6913,9}, {7681,8}, {8449,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {26113,11}, {30721,10}, {31745,9}, {34305,10}, {56321,11}, {63489,10}, {81409,11}, {96257,9}, {96769,10}, {98049,12}, {126977,11}, {129025,10}, {146945,11}, {161793,10}, {212481,12}, {258049,11}, {267265,10}, {270337,11}, {272385,10}, {274433,11}, {424961,13}, {516097,12}, {520193,11}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/powerpc64/mode64/invert_limb.asm b/gmp/mpn/powerpc64/mode64/invert_limb.asm
index dfdba6451e..02a67a3979 100644
--- a/gmp/mpn/powerpc64/mode64/invert_limb.asm
+++ b/gmp/mpn/powerpc64/mode64/invert_limb.asm
@@ -1,88 +1,109 @@
 dnl  PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
 
-dnl  Copyright 2004-2006, 2008, 2010, 2013 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                  cycles/limb (approximate)
-C POWER3/PPC630         80
-C POWER4/PPC970         86
-C POWER5                86
-C POWER6               170
-C POWER7                66
+C		cycles/limb
+C POWER3/PPC630:     ?
+C POWER4/PPC970:     75 (including call+ret)
+
+C TODO:
+C   * Pair multiply instructions.
 
 ASM_START()
-PROLOGUE(mpn_invert_limb,toc)
+PROLOGUE(mpn_invert_limb)
 	LEAL(	r12, approx_tab)
-	srdi	r9, r3, 32
-	rlwinm	r9, r9, 10, 23, 30	C (d >> 55) & 0x1fe
-	srdi	r10, r3, 24		C d >> 24
-	lis	r11, 0x1000
-	rldicl	r8, r3, 0, 63		C d mod 2
-	addi	r10, r10, 1		C d40
-	sldi	r11, r11, 32		C 2^60
-	srdi	r7, r3, 1		C d/2
-	add	r7, r7, r8		C d63 = ceil(d/2)
-	neg	r8, r8			C mask = -(d mod 2)
-	lhzx	r0, r9, r12
-	mullw	r9, r0, r0		C v0*v0
-	sldi	r6, r0, 11		C v0 << 11
-	addi	r0, r6, -1		C (v0 << 11) - 1
-	mulld	r9, r9, r10		C v0*v0*d40
-	srdi	r9, r9, 40		C v0*v0*d40 >> 40
-	subf	r9, r9, r0		C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
-	mulld	r0, r9, r10		C v1*d40
-	sldi	r6, r9, 13		C v1 << 13
-	subf	r0, r0, r11		C 2^60 - v1*d40
-	mulld	r0, r0, r9		C v1 * (2^60 - v1*d40)
-	srdi	r0, r0, 47		C v1 * (2^60 - v1*d40) >> 47
-	add	r0, r0, r6		C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
-	mulld	r11, r0, r7		C v2 * d63
-	srdi	r10, r0, 1		C v2 >> 1
-	sldi	r9, r0, 31		C v2 << 31
-	and	r8, r10, r8		C (v2 >> 1) & mask
-	subf	r8, r11, r8		C ((v2 >> 1) & mask) - v2 * d63
-	mulhdu	r0, r8, r0		C p1 = v2 * (((v2 >> 1) & mask) - v2 * d63)
-	srdi	r0, r0, 1		C p1 >> 1
-	add	r0, r0, r9		C v3 = (v2 << 31) + (p1 >> 1)
-	nop
-	mulld	r11, r0, r3
-	mulhdu	r9, r0, r3
-	addc	r10, r11, r3
-	adde	r3, r9, r3
-	subf	r3, r3, r0
+
+	srdi	r11, r3, 32		C r11 = d >> 32
+	rlwinm  r9, r11, 10, 23, 30	C r9 = ((d >> 55) & 0xff) << 1
+	lhzx	r0, r12, r9		C load initial approximation
+	rldic	r10, r0, 6, 42
+	mulld	r8, r10, r10
+	sldi	r9, r10, 17
+	mulld	r0, r8, r11
+	srdi	r0, r0, 31
+	subf	r10, r0, r9
+	mulld	r8, r10, r10
+	sldi	r11, r10, 33
+	mulhdu	r0, r8, r3
+	sldi	r9, r0, 1
+	subf	r10, r9, r11
+	sldi	r11, r10, 2
+	mulhdu	r0, r10, r10
+	mulld	r8, r10, r10
+	mulhdu	r10, r8, r3
+	mulld	r9, r0, r3
+	mulhdu	r0, r0, r3
+	addc	r8, r9, r10
+	addze	r10, r0
+	srdi	r0, r8, 62
+	rldimi	r0, r10, 2, 0
+	sldi	r9, r8, 2
+	subfic	r10, r9, 0
+	subfe	r8, r0, r11
+	mulhdu	r10, r3, r8
+	add	r10, r10, r3
+	mulld	r9, r3, r8
+	subf	r11, r10, r8
+	addi	r0, r10, 1
+	addi	r8, r11, -1
+	and	r0, r3, r0
+	addc	r11, r9, r0
+	addze	r10, r10
+	addc	r0, r11, r3
+	addze	r10, r10
+	subf	r3, r10, r8
 	blr
 EPILOGUE()
 
 DEF_OBJECT(approx_tab)
-forloop(i,256,512-1,dnl
-`	.short	eval(0x7fd00/i)
-')dnl
+	.short	1023,1020,1016,1012,1008,1004,1000,996
+	.short	992,989,985,981,978,974,970,967
+	.short	963,960,956,953,949,946,942,939
+	.short	936,932,929,926,923,919,916,913
+	.short	910,907,903,900,897,894,891,888
+	.short	885,882,879,876,873,870,868,865
+	.short	862,859,856,853,851,848,845,842
+	.short	840,837,834,832,829,826,824,821
+	.short	819,816,814,811,809,806,804,801
+	.short	799,796,794,791,789,787,784,782
+	.short	780,777,775,773,771,768,766,764
+	.short	762,759,757,755,753,751,748,746
+	.short	744,742,740,738,736,734,732,730
+	.short	728,726,724,722,720,718,716,714
+	.short	712,710,708,706,704,702,700,699
+	.short	697,695,693,691,689,688,686,684
+	.short	682,680,679,677,675,673,672,670
+	.short	668,667,665,663,661,660,658,657
+	.short	655,653,652,650,648,647,645,644
+	.short	642,640,639,637,636,634,633,631
+	.short	630,628,627,625,624,622,621,619
+	.short	618,616,615,613,612,611,609,608
+	.short	606,605,604,602,601,599,598,597
+	.short	595,594,593,591,590,589,587,586
+	.short	585,583,582,581,579,578,577,576
+	.short	574,573,572,571,569,568,567,566
+	.short	564,563,562,561,560,558,557,556
+	.short	555,554,553,551,550,549,548,547
+	.short	546,544,543,542,541,540,539,538
+	.short	537,536,534,533,532,531,530,529
+	.short	528,527,526,525,524,523,522,521
+	.short	520,519,518,517,516,515,514,513
 END_OBJECT(approx_tab)
 ASM_END()
diff --git a/gmp/mpn/powerpc64/mode64/mod_1_1.asm b/gmp/mpn/powerpc64/mode64/mod_1_1.asm
deleted file mode 100644
index 873373054f..0000000000
--- a/gmp/mpn/powerpc64/mode64/mod_1_1.asm
+++ /dev/null
@@ -1,164 +0,0 @@
-dnl  PowerPC-64 mpn_mod_1_1p
-
-dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970         17
-C POWER5                16
-C POWER6                30
-C POWER7                10.2
-
-C TODO
-C  * Optimise, in particular the cps function.  This was compiler-generated and
-C    then hand optimised.
-
-C INPUT PARAMETERS
-define(`ap',  `r3')
-define(`n',   `r4')
-define(`d',   `r5')
-define(`cps', `r6')
-
-ASM_START()
-
-EXTERN_FUNC(mpn_invert_limb)
-
-PROLOGUE(mpn_mod_1_1p)
-	sldi	r10, r4, 3
-	addi	r4, r4, -1
-	add	r3, r3, r10
-	ld	r0, 16(r6)		C B1modb
-	ld	r12, 24(r6)		C B2modb
-	ld	r9, -8(r3)
-	ld	r10, -16(r3)
-	mtctr	r4
-	mulhdu	r8, r9, r0
-	mulld	r7, r9, r0
-	addc	r11, r7, r10
-	addze	r9, r8
-	bdz	L(end)
-
-	ALIGN(16)
-L(top):	ld	r4, -24(r3)
-	addi	r3, r3, -8
-	nop
-	mulld	r10, r11, r0
-	mulld	r8, r9, r12
-	mulhdu	r11, r11, r0
-	mulhdu	r9, r9, r12
-	addc	r7, r10, r4
-	addze	r10, r11
-	addc	r11, r8, r7
-	adde	r9, r9, r10
-	bdnz	L(top)
-
-L(end):
-ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
-`	lwz	r0, 8(r6)',
-`	lwz	r0, 12(r6)')
-	ld	r3, 0(r6)
-	cmpdi	cr7, r0, 0
-	beq-	cr7, L(4)
-	subfic	r10, r0, 64
-	sld	r9, r9, r0
-	srd	r10, r11, r10
-	or	r9, r10, r9
-L(4):	subfc	r10, r5, r9
-	subfe	r10, r10, r10
-	nand	r10, r10, r10
-	sld	r11, r11, r0
-	and	r10, r10, r5
-	subf	r9, r10, r9
-	mulhdu	r10, r9, r3
-	mulld	r3, r9, r3
-	addi	r9, r9, 1
-	addc	r8, r3, r11
-	adde	r3, r10, r9
-	mulld	r3, r3, r5
-	subf	r3, r3, r11
-	cmpld	cr7, r8, r3
-	bge	cr7, L(5)		C FIXME: Make branch-less
-	add	r3, r3, r5
-L(5):	cmpld	cr7, r3, r5
-	bge-	cr7, L(10)
-	srd	r3, r3, r0
-	blr
-
-L(10):	subf	r3, r5, r3
-	srd	r3, r3, r0
-	blr
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps,toc)
-	mflr	r0
-	std	r29, -24(r1)
-	std	r30, -16(r1)
-	std	r31, -8(r1)
-	cntlzd	r31, r4
-	std	r0, 16(r1)
-	extsw	r31, r31
-	mr	r29, r3
-	stdu	r1, -144(r1)
-	sld	r30, r4, r31
-	mr	r3, r30
-	CALL(	mpn_invert_limb)
-	cmpdi	cr7, r31, 0
-	neg	r0, r30
-	beq-	cr7, L(13)
-	subfic	r11, r31, 64
-	li	r0, 1
-	neg	r9, r30
-	srd	r11, r3, r11
-	sld	r0, r0, r31
-	or	r0, r11, r0
-	mulld	r0, r0, r9
-L(13):	mulhdu	r9, r0, r3
-	mulld	r11, r0, r3
-	add	r9, r0, r9
-	nor	r9, r9, r9
-	mulld	r9, r9, r30
-	cmpld	cr7, r11, r9
-	bge	cr7, L(14)
-	add	r9, r9, r30
-L(14):	addi	r1, r1, 144
-	srd	r0, r0, r31
-	std	r31, 8(r29)
-	std	r3, 0(r29)
-	std	r0, 16(r29)
-	ld	r0, 16(r1)
-	srd	r9, r9, r31
-	ld	r30, -16(r1)
-	ld	r31, -8(r1)
-	std	r9, 24(r29)
-	ld	r29, -24(r1)
-	mtlr	r0
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/mod_1_4.asm b/gmp/mpn/powerpc64/mode64/mod_1_4.asm
deleted file mode 100644
index 0b7d6bf699..0000000000
--- a/gmp/mpn/powerpc64/mode64/mod_1_4.asm
+++ /dev/null
@@ -1,270 +0,0 @@
-dnl  PowerPC-64 mpn_mod_1s_4p
-
-dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          9
-C POWER5                 9
-C POWER6                13
-C POWER7                3.5
-
-C TODO
-C  * Optimise, in particular the cps function.  This was compiler-generated and
-C    then hand optimised.
-
-C INPUT PARAMETERS
-define(`ap',  `r3')
-define(`n',   `r4')
-define(`d',   `r5')
-define(`cps', `r6')
-
-ASM_START()
-
-EXTERN_FUNC(mpn_invert_limb)
-
-PROLOGUE(mpn_mod_1s_4p)
-	std	r23, -72(r1)
-	ld	r23, 48(cps)
-	std	r24, -64(r1)
-	std	r25, -56(r1)
-	ld	r24, 32(cps)
-	ld	r25, 24(cps)
-	std	r26, -48(r1)
-	std	r27, -40(r1)
-	ld	r26, 16(cps)
-	std	r28, -32(r1)
-	std	r29, -24(r1)
-	std	r30, -16(r1)
-	std	r31, -8(r1)
-	ld	r30, 40(cps)
-
-	rldicl.	r0, n, 0,62
-	sldi	r31, n, 3
-	add	ap, ap, r31		C make ap point at end of operand
-
-	cmpdi	cr7, r0, 2
-	beq	cr0, L(b00)
-	blt	cr7, L(b01)
-	beq	cr7, L(b10)
-
-L(b11):	ld	r11, -16(ap)
-	ld	r9, -8(ap)
-	ld	r0, -24(ap)
-	mulhdu	r27, r11, r26
-	mulld	r8, r11, r26
-	mulhdu	r11, r9, r25
-	mulld	r9, r9, r25
-	addc	r31, r8, r0
-	addze	r10, r27
-	addc	r0, r9, r31
-	adde	r9, r11, r10
-	addi	ap, ap, -40
-	b	L(6)
-
-	ALIGN(16)
-L(b00):	ld	r11, -24(ap)
-	ld	r10, -16(ap)
-	ld	r9, -8(ap)
-	ld	r0, -32(ap)
-	mulld	r8, r11, r26
-	mulhdu	r7, r10, r25
-	mulhdu	r27, r11, r26
-	mulhdu	r11, r9, r24
-	mulld	r10, r10, r25
-	mulld	r9, r9, r24
-	addc	r31, r8, r0
-	addze	r0, r27
-	addc	r8, r31, r10
-	adde	r10, r0, r7
-	addc	r0, r9, r8
-	adde	r9, r11, r10
-	addi	ap, ap, -48
-	b	L(6)
-
-	ALIGN(16)
-L(b01):	li	r9, 0
-	ld	r0, -8(ap)
-	addi	ap, ap, -24
-	b	L(6)
-
-	ALIGN(16)
-L(b10):	ld	r9, -8(ap)
-	ld	r0, -16(ap)
-	addi	ap, ap, -32
-
-	ALIGN(16)
-L(6):	addi	r10, n, 3
-	srdi	r7, r10, 2
-	mtctr	r7
-	bdz	L(end)
-
-	ALIGN(16)
-L(top):	ld	r31, -16(ap)
-	ld	r10, -8(ap)
-	ld	r11, 8(ap)
-	ld	r12, 0(ap)
-	mulld	r29, r0, r30		C rl * B4modb
-	mulhdu	r0,  r0, r30		C rl * B4modb
-	mulhdu	r27, r10, r26
-	mulld	r10, r10, r26
-	mulhdu	r7, r9, r23		C rh * B5modb
-	mulld	r9, r9, r23		C rh * B5modb
-	mulhdu	r28, r11, r24
-	mulld	r11, r11, r24
-	mulhdu	r4, r12, r25
-	mulld	r12, r12, r25
-	addc	r8, r10, r31
-	addze	r10, r27
-	addi	ap, ap, -32
-	addc	r27, r8, r12
-	adde	r12, r10, r4
-	addc	r11, r27, r11
-	adde	r31, r12, r28
-	addc	r12, r11, r29
-	adde	r4, r31, r0
-	addc	r0, r9, r12
-	adde	r9, r7, r4
-	bdnz	L(top)
-
-L(end):
-ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
-`	lwz	r3, 8(cps)',
-`	lwz	r3, 12(cps)')
-	mulld	r10, r9, r26
-	mulhdu	r9, r9, r26
-	addc	r11, r0, r10
-	addze	r9, r9
-	ld	r10, 0(cps)
-	subfic	r8, r3, 64
-	sld	r9, r9, r3
-	srd	r8, r11, r8
-	sld	r11, r11, r3
-	or	r9, r8, r9
-	mulld	r0, r9, r10
-	mulhdu	r10, r9, r10
-	addi	r9, r9, 1
-	addc	r8, r0, r11
-	adde	r0, r10, r9
-	mulld	r0, r0, d
-	subf	r0, r0, r11
-	cmpld	cr7, r8, r0
-	bge	cr7, L(9)
-	add	r0, r0, d
-L(9):	cmpld	cr7, r0, d
-	bge-	cr7, L(16)
-L(10):	srd	r3, r0, r3
-	ld	r23, -72(r1)
-	ld	r24, -64(r1)
-	ld	r25, -56(r1)
-	ld	r26, -48(r1)
-	ld	r27, -40(r1)
-	ld	r28, -32(r1)
-	ld	r29, -24(r1)
-	ld	r30, -16(r1)
-	ld	r31, -8(r1)
-	blr
-
-L(16):	subf	r0, d, r0
-	b	L(10)
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1s_4p_cps,toc)
-	mflr	r0
-	std	r29, -24(r1)
-	std	r30, -16(r1)
-	mr	r29, r3
-	std	r0, 16(r1)
-	std	r31, -8(r1)
-	stdu	r1, -144(r1)
-	cntlzd	r31, r4
-	sld	r30, r4, r31
-	mr	r3, r30
-	CALL(	mpn_invert_limb)
-	subfic	r9, r31, 64
-	li	r10, 1
-	sld	r10, r10, r31
-	srd	r9, r3, r9
-	neg	r0, r30
-	or	r10, r10, r9
-	mulld	r10, r10, r0
-	mulhdu	r11, r10, r3
-	nor	r11, r11, r11
-	subf	r11, r10, r11
-	mulld	r11, r11, r30
-	mulld	r0, r10, r3
-	cmpld	cr7, r0, r11
-	bge	cr7, L(18)
-	add	r11, r11, r30
-L(18):	mulhdu	r9, r11, r3
-	add	r9, r11, r9
-	nor	r9, r9, r9
-	mulld	r9, r9, r30
-	mulld	r0, r11, r3
-	cmpld	cr7, r0, r9
-	bge	cr7, L(19)
-	add	r9, r9, r30
-L(19):	mulhdu	r0, r9, r3
-	add	r0, r9, r0
-	nor	r0, r0, r0
-	mulld	r0, r0, r30
-	mulld	r8, r9, r3
-	cmpld	cr7, r8, r0
-	bge	cr7, L(20)
-	add	r0, r0, r30
-L(20):	mulhdu	r8, r0, r3
-	add	r8, r0, r8
-	nor	r8, r8, r8
-	mulld	r8, r8, r30
-	mulld	r7, r0, r3
-	cmpld	cr7, r7, r8
-	bge	cr7, L(21)
-	add	r8, r8, r30
-L(21):	srd	r0, r0, r31
-	addi	r1, r1, 144
-	srd	r8, r8, r31
-	srd	r10, r10, r31
-	srd	r11, r11, r31
-	std	r0, 40(r29)
-	std	r31, 8(r29)
-	srd	r9, r9, r31
-	ld	r0, 16(r1)
-	ld	r30, -16(r1)
-	std	r8, 48(r29)
-	std	r3, 0(r29)
-	mtlr	r0
-	ld	r31, -8(r1)
-	std	r10, 16(r29)
-	std	r11, 24(r29)
-	std	r9, 32(r29)
-	ld	r29, -24(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/mod_34lsub1.asm b/gmp/mpn/powerpc64/mode64/mod_34lsub1.asm
index c35e0e37a4..ca46c3933b 100644
--- a/gmp/mpn/powerpc64/mode64/mod_34lsub1.asm
+++ b/gmp/mpn/powerpc64/mode64/mod_34lsub1.asm
@@ -1,41 +1,28 @@
-dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^48-1.
+dnl  PowerPC-64 mpn_mod_34lsub1 -- modulo 2^24-1.
 
 dnl  Copyright 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                   cycles/limb
-C POWER3/PPC630          1.33
-C POWER4/PPC970          1.5
-C POWER5                 1.32
-C POWER6                 2.35
-C POWER7                 1
+C		cycles/limb
+C POWER3/PPC630:     1.33
+C POWER4/PPC970:     1.5
+C POWER5:	     1.57
 
 C INPUT PARAMETERS
 define(`up',`r3')
diff --git a/gmp/mpn/powerpc64/mode64/mode1o.asm b/gmp/mpn/powerpc64/mode64/mode1o.asm
index 726339a931..95aa2870da 100644
--- a/gmp/mpn/powerpc64/mode64/mode1o.asm
+++ b/gmp/mpn/powerpc64/mode64/mode1o.asm
@@ -3,43 +3,30 @@ dnl  PowerPC-64 mpn_modexact_1_odd -- mpn by limb exact remainder.
 dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                  cycles/limb
-C POWER3/PPC630        13-19
-C POWER4/PPC970         16
-C POWER5                16
-C POWER6                 ?
-C POWER7                12
+C               cycles/limb
+C POWER3/PPC630:    13-19
+C POWER4/PPC970:     16
+C POWER5:            16
 
 C TODO
-C  * Check if n=1 code is really an improvement.  It probably isn't.
-C  * Make more similar to dive_1.asm.
+C  * Check if n=1 code is really an improvment.  It probably isn't.
+C  * Make more similar to dive_1.asm..
 
 C INPUT PARAMETERS
 define(`up', `r3')
@@ -52,7 +39,7 @@ ASM_START()
 
 EXTERN(binvert_limb_table)
 
-PROLOGUE(mpn_modexact_1c_odd,toc)
+PROLOGUE(mpn_modexact_1c_odd)
 	addic.	n, n, -1		C set carry as side effect
 	ld	r8, 0(up)
 	bne	cr0, L(2)
diff --git a/gmp/mpn/powerpc64/mode64/mul_1.asm b/gmp/mpn/powerpc64/mode64/mul_1.asm
index 27a8f8fb4d..8f644d8710 100644
--- a/gmp/mpn/powerpc64/mode64/mul_1.asm
+++ b/gmp/mpn/powerpc64/mode64/mul_1.asm
@@ -1,42 +1,30 @@
 dnl  PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
 dnl  the result in a second limb vector.
 
-dnl  Copyright 1999-2001, 2003-2006, 2010 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C               cycles/limb
-C POWER3/PPC630     6-18
-C POWER4/PPC970     7.25?  not updated for last file revision
-C POWER5            7.25
-C POWER6           14
-C POWER7            2.9
+C		cycles/limb
+C POWER3/PPC630:     6-18
+C POWER4/PPC970:     7.25
+C POWER5:            7.75
 
 C TODO
 C  * Try to reduce the number of needed live registers (at least r5 and r10
@@ -130,18 +118,26 @@ L(b10):	ld	r27, 8(up)
 
 L(top):	mulld	r0, r26, r6
 	mulhdu	r5, r26, r6
+	ld	r26, 0(up)
+	nop
+
 	mulld	r7, r27, r6
 	mulhdu	r8, r27, r6
-	ld	r26, 0(up)
 	ld	r27, 8(up)
+	nop
+
 	adde	r0, r0, r12
 	adde	r7, r7, r5
+
 	mulld	r9, r26, r6
 	mulhdu	r10, r26, r6
+	ld	r26, 16(up)
+	nop
+
 	mulld	r11, r27, r6
 	mulhdu	r12, r27, r6
-	ld	r26, 16(up)
 	ld	r27, 24(up)
+
 	std	r0, 0(rp)
 	adde	r9, r9, r8
 	std	r7, 8(rp)
@@ -155,10 +151,13 @@ L(top):	mulld	r0, r26, r6
 
 L(end):	mulld	r0, r26, r6
 	mulhdu	r5, r26, r6
+
 	mulld	r7, r27, r6
 	mulhdu	r8, r27, r6
+
 	adde	r0, r0, r12
 	adde	r7, r7, r5
+
 	std	r0, 0(rp)
 	std	r7, 8(rp)
 L(ret):	addze	r3, r8
diff --git a/gmp/mpn/powerpc64/mode64/mul_basecase.asm b/gmp/mpn/powerpc64/mode64/mul_basecase.asm
index 18731879e4..cea5417eb2 100644
--- a/gmp/mpn/powerpc64/mode64/mul_basecase.asm
+++ b/gmp/mpn/powerpc64/mode64/mul_basecase.asm
@@ -1,40 +1,30 @@
-dnl  PowerPC-64 mpn_mul_basecase.
+dnl  PowerPC-64 mpn_basecase.
 
-dnl  Copyright 1999-2001, 2003-2006, 2008 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008 Free Software
+dnl  Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C                  cycles/limb
-C POWER3/PPC630         6-18
-C POWER4/PPC970          8
-C POWER5                 8
-C POWER6                24
+C		cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:     8
+C POWER5:            8
+
 
 C INPUT PARAMETERS
 define(`rp', `r3')
diff --git a/gmp/mpn/powerpc64/mode64/p3/gmp-mparam.h b/gmp/mpn/powerpc64/mode64/p3/gmp-mparam.h
deleted file mode 100644
index 61a437b6e6..0000000000
--- a/gmp/mpn/powerpc64/mode64/p3/gmp-mparam.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/* POWER3/PowerPC630 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 2008-2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     17
-#define USE_PREINV_DIVREM_1                  0
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                10
-#define MUL_TOOM33_THRESHOLD                33
-#define MUL_TOOM44_THRESHOLD                46
-#define MUL_TOOM6H_THRESHOLD                77
-#define MUL_TOOM8H_THRESHOLD               139
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      49
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      47
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      49
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      49
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      34
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 14
-#define SQR_TOOM3_THRESHOLD                 45
-#define SQR_TOOM4_THRESHOLD                 64
-#define SQR_TOOM6_THRESHOLD                 85
-#define SQR_TOOM8_THRESHOLD                139
-
-#define MULMID_TOOM42_THRESHOLD             22
-
-#define MULMOD_BNM1_THRESHOLD                8
-#define SQRMOD_BNM1_THRESHOLD               10
-
-#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    220, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
-    {      7, 7}, {     15, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     23,10}, {     15, 9}, \
-    {     35, 8}, {     71,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
-    {     79,10}, {     55,11}, {     31,10}, {     63, 9}, \
-    {    127,10}, {     71, 9}, {    143, 8}, {    287,10}, \
-    {     79,11}, {     47,10}, {     95, 9}, {    191,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287,11}, {     79,10}, \
-    {    159, 9}, {    319, 8}, {    639,10}, {    175, 9}, \
-    {    351,11}, {     95,10}, {    191, 9}, {    383,11}, \
-    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    175,10}, {    351,12}, {     95,11}, {    191,10}, \
-    {    383, 9}, {    767,11}, {    223,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
-    {    575, 9}, {   1151,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    351,12}, {    191,11}, {    383,10}, \
-    {    767,12}, {    223,11}, {    447,10}, {    895,13}, \
-    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
-    {    575,10}, {   1151,12}, {    319,11}, {    639,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,10}, {   1663,12}, \
-    {    447,11}, {    895,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 120
-#define MUL_FFT_THRESHOLD                 2688
-
-#define SQR_FFT_MODF_THRESHOLD             188  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    188, 5}, {      9, 6}, {      5, 5}, {     11, 6}, \
-    {     13, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
-    {      9, 7}, {     19, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
-    {     15, 8}, {     31, 9}, {     19, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79, 8}, {    159,10}, {     47, 9}, {     95, 8}, \
-    {    191,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255,10}, {     71, 9}, {    143, 8}, {    287,10}, \
-    {     79, 9}, {    159,11}, {     47,10}, {     95, 9}, \
-    {    191,12}, {     31,11}, {     63,10}, {    127, 9}, \
-    {    255, 8}, {    511,10}, {    143, 9}, {    287,11}, \
-    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
-    {    175,11}, {     95,10}, {    191, 9}, {    383,11}, \
-    {    111,10}, {    223,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    175,12}, {     95,11}, {    191,10}, {    383, 9}, \
-    {    767,11}, {    223,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,12}, \
-    {    191,11}, {    383,10}, {    767,12}, {    223,11}, \
-    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
-    {    511,12}, {    287,11}, {    575,10}, {   1151,12}, \
-    {    319,11}, {    639,12}, {    351,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    447,11}, {    895,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 118
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  27
-#define MULLO_MUL_N_THRESHOLD             2511
-
-#define DC_DIV_QR_THRESHOLD                 23
-#define DC_DIVAPPR_Q_THRESHOLD              87
-#define DC_BDIV_QR_THRESHOLD                27
-#define DC_BDIV_Q_THRESHOLD                 60
-
-#define INV_MULMOD_BNM1_THRESHOLD           27
-#define INV_NEWTON_THRESHOLD                91
-#define INV_APPR_THRESHOLD                  91
-
-#define BINV_NEWTON_THRESHOLD              115
-#define REDC_1_TO_REDC_N_THRESHOLD          31
-
-#define MU_DIV_QR_THRESHOLD                551
-#define MU_DIVAPPR_Q_THRESHOLD             551
-#define MUPI_DIV_QR_THRESHOLD               42
-#define MU_BDIV_QR_THRESHOLD               483
-#define MU_BDIV_Q_THRESHOLD                492
-
-#define POWM_SEC_TABLE  2,23,140,556,713,746
-
-#define MATRIX22_STRASSEN_THRESHOLD          8
-#define HGCD_THRESHOLD                      56
-#define HGCD_APPR_THRESHOLD                 51
-#define HGCD_REDUCE_THRESHOLD              688
-#define GCD_DC_THRESHOLD                   333
-#define GCDEXT_DC_THRESHOLD                126
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               375
-#define SET_STR_PRECOMPUTE_THRESHOLD       812
-
-#define FAC_DSC_THRESHOLD                  351
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/powerpc64/mode64/p4/gmp-mparam.h b/gmp/mpn/powerpc64/mode64/p4/gmp-mparam.h
deleted file mode 100644
index d909b292bb..0000000000
--- a/gmp/mpn/powerpc64/mode64/p4/gmp-mparam.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* POWER4/PowerPC970 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 2008-2010, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 1800 MHz PPC970 */
-/* FFT tuning limit = 10000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.0 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
-#define USE_PREINV_DIVREM_1                  0
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD            1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           34
-
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                53
-#define MUL_TOOM44_THRESHOLD               136
-#define MUL_TOOM6H_THRESHOLD               197
-#define MUL_TOOM8H_THRESHOLD               296
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      96
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      79
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 24
-#define SQR_TOOM3_THRESHOLD                 85
-#define SQR_TOOM4_THRESHOLD                142
-#define SQR_TOOM6_THRESHOLD                270
-#define SQR_TOOM8_THRESHOLD                430
-
-#define MULMID_TOOM42_THRESHOLD             32
-
-#define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               15
-
-#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    380, 5}, {     13, 6}, {      7, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     13, 5}, {     28, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     55,11}, \
-    {     15,10}, {     31, 9}, {     71,10}, {     39, 9}, \
-    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     63, 9}, {    127,10}, {     87,11}, \
-    {     47,10}, {     95, 9}, {    191,10}, {    103,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    135, 9}, {    271,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    167, 9}, {    335,11}, \
-    {     95,10}, {    191, 9}, {    383, 8}, {    767,10}, \
-    {    207, 9}, {    415,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    303, 9}, {    607,10}, {    319, 9}, {    639,10}, \
-    {    335, 9}, {    671,10}, {    351,12}, {     95,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
-    {    415, 9}, {    831,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    543,11}, \
-    {    287,10}, {    575,11}, {    303,10}, {    607,11}, \
-    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,12}, {    223,10}, \
-    {    895,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    543,12}, {    287,11}, {    607,12}, {    319,11}, \
-    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    895,12}, \
-    {    479,14}, {    127,13}, {    255,12}, {    607,13}, \
-    {    319,12}, {    703,13}, {    383,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
-    {   1151,13}, {    703,14}, {    383,13}, {    895,15}, \
-    {    255,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1087,12}, {   2175,13}, {   1151,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 165
-#define MUL_FFT_THRESHOLD                 9088
-
-#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    308, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     13, 5}, {     28, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     14, 6}, {     29, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95, 9}, \
-    {    191, 8}, {    383,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511,10}, {    135, 9}, \
-    {    271, 8}, {    543,11}, {     79,10}, {    159, 9}, \
-    {    319, 8}, {    639,10}, {    175, 9}, {    351,11}, \
-    {     95,10}, {    191, 9}, {    383, 8}, {    767,10}, \
-    {    207, 9}, {    415,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
-    {    351,12}, {     95,11}, {    191,10}, {    383, 9}, \
-    {    767,11}, {    207,10}, {    415, 9}, {    831,11}, \
-    {    223,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    271,10}, {    543,11}, {    287,10}, \
-    {    575,11}, {    303,10}, {    607,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,12}, {    223,10}, {    895,11}, {    479,13}, \
-    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
-    {    607,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    895,12}, {    479,14}, {    127,13}, \
-    {    255,12}, {    607,13}, {    319,12}, {    703,13}, \
-    {    383,12}, {    927,14}, {    255,13}, {    511,12}, \
-    {   1023,13}, {    575,12}, {   1151,13}, {    639,12}, \
-    {   1279,13}, {    703,14}, {    383,13}, {    895,12}, \
-    {   1791,15}, {    255,14}, {    511,13}, {   1023,12}, \
-    {   2047,13}, {   1087,12}, {   2175,13}, {   1151,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 162
-#define SQR_FFT_THRESHOLD                 6272
-
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  44
-#define MULLO_MUL_N_THRESHOLD            18087
-
-#define DC_DIV_QR_THRESHOLD                 42
-#define DC_DIVAPPR_Q_THRESHOLD             167
-#define DC_BDIV_QR_THRESHOLD                46
-#define DC_BDIV_Q_THRESHOLD                110
-
-#define INV_MULMOD_BNM1_THRESHOLD           30
-#define INV_NEWTON_THRESHOLD               181
-#define INV_APPR_THRESHOLD                 173
-
-#define BINV_NEWTON_THRESHOLD              214
-#define REDC_1_TO_REDC_N_THRESHOLD          56
-
-#define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD            1017
-#define MUPI_DIV_QR_THRESHOLD               92
-#define MU_BDIV_QR_THRESHOLD               889
-#define MU_BDIV_Q_THRESHOLD               1017
-
-#define POWM_SEC_TABLE  2,22,87,579,1925
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     109
-#define HGCD_APPR_THRESHOLD                115
-#define HGCD_REDUCE_THRESHOLD             4633
-#define GCD_DC_THRESHOLD                   318
-#define GCDEXT_DC_THRESHOLD                242
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        23
-#define SET_STR_DC_THRESHOLD               802
-#define SET_STR_PRECOMPUTE_THRESHOLD      1712
-
-#define FAC_DSC_THRESHOLD                  507
-#define FAC_ODD_THRESHOLD                   25
diff --git a/gmp/mpn/powerpc64/mode64/p5/gmp-mparam.h b/gmp/mpn/powerpc64/mode64/p5/gmp-mparam.h
deleted file mode 100644
index 15b009c357..0000000000
--- a/gmp/mpn/powerpc64/mode64/p5/gmp-mparam.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* POWER5 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* POWER5 (friggms.hpc.ntnu.no) */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        15
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
-#define USE_PREINV_DIVREM_1                  0
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           40
-
-#define MUL_TOOM22_THRESHOLD                21
-#define MUL_TOOM33_THRESHOLD                24
-#define MUL_TOOM44_THRESHOLD                70
-#define MUL_TOOM6H_THRESHOLD               262
-#define MUL_TOOM8H_THRESHOLD               393
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      49
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     126
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      85
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      94
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      70
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 24
-#define SQR_TOOM3_THRESHOLD                 81
-#define SQR_TOOM4_THRESHOLD                142
-#define SQR_TOOM6_THRESHOLD                189
-#define SQR_TOOM8_THRESHOLD                284
-
-#define MULMID_TOOM42_THRESHOLD             36
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               15
-
-#define MUL_FFT_MODF_THRESHOLD             304  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    348, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135,11}, {     79,10}, {    159, 9}, {    319,11}, \
-    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
-    {    143,10}, {    287, 9}, {    575,10}, {    319,12}, \
-    {     95,11}, {    191,10}, {    383,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575, 9}, {   1151,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
-    {    639,12}, {    351,11}, {    703,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,12}, \
-    {    447,11}, {    895,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
-    {   2175,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    831,13}, {    447,12}, \
-    {    959,11}, {   1919,14}, {    255,13}, {    511,12}, \
-    {   1087,11}, {   2175,13}, {    575,12}, {   1215,11}, \
-    {   2431,10}, {   4863,13}, {    639,12}, {   1343,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    959,12}, \
-    {   1919,11}, {   3839,15}, {    255,14}, {    511,13}, \
-    {   1087,12}, {   2175,13}, {   1215,12}, {   2431,11}, \
-    {   4863,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1407,12}, {   2815,13}, {   1471,12}, {   2943,14}, \
-    {    767,13}, {   1599,12}, {   3199,13}, {   1663,14}, \
-    {    895,13}, {   1919,12}, {   3839,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2943,15}, {    767,14}, {   1535,13}, {   3199,14}, \
-    {   1663,13}, {   3327,14}, {   1919,13}, {   3839,16}, \
-    {    511,15}, {   1023,14}, {   2431,13}, {   4863,15}, \
-    {   1279,14}, {   2943,12}, {  11775,15}, {   1535,14}, \
-    {   3327,15}, {   1791,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 208
-#define MUL_FFT_THRESHOLD                 4224
-
-#define SQR_FFT_MODF_THRESHOLD             284  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    272, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {     19, 7}, {     17, 8}, {      9, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     63,10}, {     47,11}, \
-    {     31,10}, {     71, 9}, {    143,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
-    {    143,11}, {     79,10}, {    159, 9}, {    319,10}, \
-    {    175, 9}, {    351,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207, 9}, {    415,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319,11}, {    175,10}, {    351,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,10}, {    895,11}, \
-    {    479,10}, {    959,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    543,12}, {    287,11}, {    575,12}, \
-    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,11}, {    895,12}, {    479,11}, \
-    {    959,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,12}, {    575,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1279,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    959,12}, {   1919,15}, {    255,14}, {    511,13}, \
-    {   1023,12}, {   2047,13}, {   1087,12}, {   2175,13}, \
-    {   1215,14}, {    639,13}, {   1407,12}, {   2815,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {   1407,13}, {   2815,15}, \
-    {    767,14}, {   1663,13}, {   3327,14}, {   1919,13}, \
-    {   3839,16}, {    511,15}, {   1023,14}, {   2431,13}, \
-    {   4863,15}, {   1279,14}, {   2943,13}, {   5887,12}, \
-    {  11775,15}, {   1535,14}, {   3327,15}, {   1791,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 190
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             6
-#define MULLO_DC_THRESHOLD                  60
-#define MULLO_MUL_N_THRESHOLD             7463
-
-#define DC_DIV_QR_THRESHOLD                 58
-#define DC_DIVAPPR_Q_THRESHOLD             232
-#define DC_BDIV_QR_THRESHOLD                78
-#define DC_BDIV_Q_THRESHOLD                238
-
-#define INV_MULMOD_BNM1_THRESHOLD           92
-#define INV_NEWTON_THRESHOLD               155
-#define INV_APPR_THRESHOLD                 157
-
-#define BINV_NEWTON_THRESHOLD              155
-#define REDC_1_TO_REDC_N_THRESHOLD          61
-
-#define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD             979
-#define MUPI_DIV_QR_THRESHOLD               79
-#define MU_BDIV_QR_THRESHOLD               823
-#define MU_BDIV_Q_THRESHOLD                942
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                      74
-#define HGCD_APPR_THRESHOLD                155
-#define HGCD_REDUCE_THRESHOLD             2479
-#define GCD_DC_THRESHOLD                   351
-#define GCDEXT_DC_THRESHOLD                288
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               650
-#define SET_STR_PRECOMPUTE_THRESHOLD      1585
-
-#define FAC_DSC_THRESHOLD                  662
-#define FAC_ODD_THRESHOLD                   28
diff --git a/gmp/mpn/powerpc64/mode64/p6/aorsmul_1.asm b/gmp/mpn/powerpc64/mode64/p6/aorsmul_1.asm
deleted file mode 100644
index 5a85f84f4a..0000000000
--- a/gmp/mpn/powerpc64/mode64/p6/aorsmul_1.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-dnl  PowerPC-64 mpn_addmul_1 and mpn_submul_1 optimised for power6.
-
-dnl  Copyright 1999-2001, 2003-2006, 2008, 2010, 2011 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C               mpn_addmul_1    mpn_submul_1
-C               cycles/limb     cycles/limb
-C POWER3/PPC630     ?               ?
-C POWER4/PPC970     ?               ?
-C POWER5            ?               ?
-C POWER6           12.25           12.8
-C POWER7            ?               ?
-
-C TODO
-C  * Reduce register usage.
-C  * Schedule function entry code.
-C  * Unroll more.  8-way unrolling would bring us to 10 c/l, 16-way unrolling
-C    would bring us to 9 c/l.
-C  * Handle n = 1 and perhaps n = 2 separately, without saving any registers.
-
-C INPUT PARAMETERS
-define(`rp',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`v0',  `r6')
-
-ifdef(`OPERATION_addmul_1',`
-  define(ADDSUBC,	adde)
-  define(ADDSUB,	addc)
-  define(func,		mpn_addmul_1)
-  define(func_nc,	mpn_addmul_1c)	C FIXME: not really supported
-  define(AM,		`$1')
-  define(SM,		`')
-  define(CLRRSC,	`addic	$1, r0, 0')
-')
-ifdef(`OPERATION_submul_1',`
-  define(ADDSUBC,	subfe)
-  define(ADDSUB,	subfc)
-  define(func,		mpn_submul_1)
-  define(func_nc,	mpn_submul_1c)	C FIXME: not really supported
-  define(AM,		`')
-  define(SM,		`$1')
-  define(CLRRSC,	`subfc	$1, r0, r0')
-')
-
-ASM_START()
-PROLOGUE(func)
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	std	r29, -24(r1)
-	std	r28, -32(r1)
-	std	r27, -40(r1)
-
-	rldicl.	r0, n, 0,62	C r0 = n & 3, set cr0
-	cmpdi	cr6, r0, 2
-	addi	n, n, 3		C compute count...
-	srdi	n, n, 2		C ...for ctr
-	mtctr	n		C copy loop count into ctr
-	beq	cr0, L(b0)
-	blt	cr6, L(b1)
-	beq	cr6, L(b2)
-
-L(b3):	ld	r8, 0(up)
-	ld	r7, 8(up)
-	ld	r27, 16(up)
-	addi	up, up, 16
-	addi	rp, rp, 16
-	mulld	r5,  r8, v0
-	mulhdu	r8,  r8, v0
-	mulld	r9,  r7, v0
-	mulhdu	r7,  r7, v0
-	mulld	r11, r27, v0
-	mulhdu	r27, r27, v0
-	ld	r29, -16(rp)
-	ld	r30, -8(rp)
-	ld	r31, 0(rp)
-	addc	r9, r9, r8
-	adde	r11, r11, r7
-	addze	r12, r27
-	ADDSUB	r5, r5, r29
-	b	L(l3)
-
-L(b2):	ld	r7, 0(up)
-	ld	r27, 8(up)
-	addi	up, up, 8
-	addi	rp, rp, 8
-	mulld	r9,  r7, v0
-	mulhdu	r7,  r7, v0
-	mulld	r11, r27, v0
-	mulhdu	r27, r27, v0
-	ld	r30, -8(rp)
-	ld	r31, 0(rp)
-	addc	r11, r11, r7
-	addze	r12, r27
-	ADDSUB	r9, r9, r30
-	b	L(l2)
-
-L(b1):	ld	r27, 0(up)
-	ld	r31, 0(rp)
-	mulld	r11, r27, v0
-	mulhdu	r12, r27, v0
-	ADDSUB	r11, r11, r31
-	b	L(l1)
-
-L(b0):	addi	up, up, -8
-	addi	rp, rp, -8
-	CLRRSC(	r12)		C clear r12 and clr/set cy
-
-	ALIGN(32)
-L(top):
-SM(`	subfe	r11, r0, r0')	C complement...
-SM(`	addic	r11, r11, 1')	C ...carry flag
-	ld	r10, 8(up)
-	ld	r8, 16(up)
-	ld	r7, 24(up)
-	ld	r27, 32(up)
-	addi	up, up, 32
-	addi	rp, rp, 32
-	mulld	r0,  r10, v0
-	mulhdu	r10, r10, v0
-	mulld	r5,  r8, v0
-	mulhdu	r8,  r8, v0
-	mulld	r9,  r7, v0
-	mulhdu	r7,  r7, v0
-	mulld	r11, r27, v0
-	mulhdu	r27, r27, v0
-	ld	r28, -24(rp)
-	adde	r0, r0, r12
-	ld	r29, -16(rp)
-	adde	r5, r5, r10
-	ld	r30, -8(rp)
-	ld	r31, 0(rp)
-	adde	r9, r9, r8
-	adde	r11, r11, r7
-	addze	r12, r27
-	ADDSUB	r0, r0, r28
-	std	r0, -24(rp)
-	ADDSUBC	r5, r5, r29
-L(l3):	std	r5, -16(rp)
-	ADDSUBC	r9, r9, r30
-L(l2):	std	r9, -8(rp)
-	ADDSUBC	r11, r11, r31
-L(l1):	std	r11, 0(rp)
-	bdnz	L(top)
-
-AM(`	addze	r3, r12')
-SM(`	subfe	r11, r0, r0')		C complement...
-	ld	r31, -8(r1)
-SM(`	subf	r3, r11, r12')
-	ld	r30, -16(r1)
-	ld	r29, -24(r1)
-	ld	r28, -32(r1)
-	ld	r27, -40(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/p6/gmp-mparam.h b/gmp/mpn/powerpc64/mode64/p6/gmp-mparam.h
deleted file mode 100644
index c7e2f894ad..0000000000
--- a/gmp/mpn/powerpc64/mode64/p6/gmp-mparam.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* POWER6 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 3500 MHz POWER6 (kolga.bibsys.no) */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      6
-#define USE_PREINV_DIVREM_1                  0
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           21
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                50
-#define MUL_TOOM44_THRESHOLD               106
-#define MUL_TOOM6H_THRESHOLD               274
-#define MUL_TOOM8H_THRESHOLD               339
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      62
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      76
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      66
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      88
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 24
-#define SQR_TOOM3_THRESHOLD                 49
-#define SQR_TOOM4_THRESHOLD                130
-#define SQR_TOOM6_THRESHOLD                226
-#define SQR_TOOM8_THRESHOLD                272
-
-#define MULMID_TOOM42_THRESHOLD             36
-
-#define MULMOD_BNM1_THRESHOLD               14
-#define SQRMOD_BNM1_THRESHOLD               14
-
-#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    340, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     21, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
-    {     33, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     63,10}, {     47,11}, \
-    {     31,10}, {     71,11}, {     47,12}, {     31,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
-    {    135, 9}, {    271,11}, {     79, 9}, {    319, 8}, \
-    {    639,10}, {    175,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207,12}, {     63,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,11}, {    143,10}, \
-    {    287, 9}, {    575,10}, {    303, 9}, {    607,10}, \
-    {    319, 9}, {    639,11}, {    175,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 79
-#define MUL_FFT_THRESHOLD                 3520
-
-#define SQR_FFT_MODF_THRESHOLD             308  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    280, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     21, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     47,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     47,11}, {     31,10}, {     71, 9}, \
-    {    143,11}, {     47,12}, {     31,11}, {     63, 9}, \
-    {    255, 8}, {    511, 9}, {    271,10}, {    143,11}, \
-    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511, 8}, {   1023,10}, {    271, 9}, {    543,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319, 9}, {    639,11}, {    175,10}, {    351,12}, \
-    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
-    {    415,13}, {   8192,14}, {  16384,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 80
-#define SQR_FFT_THRESHOLD                 2752
-
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  62
-#define MULLO_MUL_N_THRESHOLD             2995
-
-#define DC_DIV_QR_THRESHOLD                 59
-#define DC_DIVAPPR_Q_THRESHOLD             200
-#define DC_BDIV_QR_THRESHOLD                70
-#define DC_BDIV_Q_THRESHOLD                168
-
-#define INV_MULMOD_BNM1_THRESHOLD           53
-#define INV_NEWTON_THRESHOLD               170
-#define INV_APPR_THRESHOLD                 166
-
-#define BINV_NEWTON_THRESHOLD              220
-#define REDC_1_TO_REDC_N_THRESHOLD          67
-
-#define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD             942
-#define MUPI_DIV_QR_THRESHOLD               57
-#define MU_BDIV_QR_THRESHOLD               889
-#define MU_BDIV_Q_THRESHOLD               1078
-
-#define POWM_SEC_TABLE  4,26,216,804,1731
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                     106
-#define HGCD_APPR_THRESHOLD                109
-#define HGCD_REDUCE_THRESHOLD             2205
-#define GCD_DC_THRESHOLD                   492
-#define GCDEXT_DC_THRESHOLD                327
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                16
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               537
-#define SET_STR_PRECOMPUTE_THRESHOLD      1576
-
-#define FAC_DSC_THRESHOLD                  426
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/powerpc64/mode64/p6/mul_basecase.asm b/gmp/mpn/powerpc64/mode64/p6/mul_basecase.asm
deleted file mode 100644
index 3d32b46c35..0000000000
--- a/gmp/mpn/powerpc64/mode64/p6/mul_basecase.asm
+++ /dev/null
@@ -1,589 +0,0 @@
-dnl  PowerPC-64 mpn_mul_basecase.
-
-dnl  Copyright 1999-2001, 2003-2006, 2008, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 ?
-C POWER6		12.25
-
-C TODO
-C  * Reduce register usage.  At least 4 register less can be used.
-C  * Unroll more.  8-way unrolling would bring us to 10 c/l, 16-way unrolling
-C    would bring us to 9 c/l.
-C  * The bdz insns for b1 and b2 will never branch,
-C  * Align things better, perhaps by moving things like pointer updates from
-C    before to after loops.
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`un', `r5')
-define(`vp', `r6')
-define(`vn', `r7')
-
-define(`v0',	   `r25')
-define(`outer_rp', `r22')
-define(`outer_up', `r23')
-
-ASM_START()
-PROLOGUE(mpn_mul_basecase)
-
-C Special code for un <= 2, for efficiency of these important cases,
-C and since it simplifies the default code.
-	cmpdi	cr0, un, 2
-	bgt	cr0, L(un_gt2)
-	cmpdi	cr6, vn, 1
-	ld	r7, 0(vp)
-	ld	r5, 0(up)
-	mulld	r8, r5, r7	C weight 0
-	mulhdu	r9, r5, r7	C weight 1
-	std	r8, 0(rp)
-	beq	cr0, L(2x)
-	std	r9, 8(rp)
-	blr
-	ALIGN(16)
-L(2x):	ld	r0, 8(up)
-	mulld	r8, r0, r7	C weight 1
-	mulhdu	r10, r0, r7	C weight 2
-	addc	r9, r9, r8
-	addze	r10, r10
-	bne	cr6, L(2x2)
-	std	r9, 8(rp)
-	std	r10, 16(rp)
-	blr
-	ALIGN(16)
-L(2x2):	ld	r6, 8(vp)
-	nop
-	mulld	r8, r5, r6	C weight 1
-	mulhdu	r11, r5, r6	C weight 2
-	mulld	r12, r0, r6	C weight 2
-	mulhdu	r0, r0, r6	C weight 3
-	addc	r9, r9, r8
-	std	r9, 8(rp)
-	adde	r11, r11, r10
-	addze	r0, r0
-	addc	r11, r11, r12
-	addze	r0, r0
-	std	r11, 16(rp)
-	std	r0, 24(rp)
-	blr
-
-L(un_gt2):
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	std	r29, -24(r1)
-	std	r28, -32(r1)
-	std	r27, -40(r1)
-	std	r26, -48(r1)
-	std	r25, -56(r1)
-	std	r24, -64(r1)
-	std	r23, -72(r1)
-	std	r22, -80(r1)
-	std	r21, -88(r1)
-	std	r20, -96(r1)
-
-	mr	outer_rp, rp
-	mr	outer_up, up
-
-	ld	v0, 0(vp)	C new v limb
-	addi	vp, vp, 8
-	ld	r26, 0(up)
-
-	rldicl.	r0, un, 0,62	C r0 = n & 3, set cr0
-	cmpdi	cr6, r0, 2
-	addi	un, un, 4	C compute count...
-	srdi	un, un, 2	C ...for ctr
-	mtctr	un		C copy inner loop count into ctr
-	beq	cr0, L(b0)
-	blt	cr6, L(b1)
-	beq	cr6, L(b2)
-
-
-	ALIGN(16)
-L(b3):
-	ld	r27, 8(up)
-	ld	r20, 16(up)
-	mulld	r0, r26, v0
-	mulhdu	r31, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	mulld	r9, r20, v0
-	mulhdu	r10, r20, v0
-	addc	r24, r24, r31
-	adde	r9, r9, r8
-	addze	r12, r10
-	std	r0, 0(rp)
-	std	r24, 8(rp)
-	std	r9, 16(rp)
-	addi	up, up, 16
-	addi	rp, rp, 16
-	bdz	L(end_m_3)
-
-	ALIGN(32)
-L(lo_m_3):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)
-	ld	r21, 32(up)
-	mulld	r0, r26, v0
-	mulhdu	r31, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	mulld	r9, r20, v0
-	mulhdu	r27, r20, v0
-	mulld	r11, r21, v0
-	mulhdu	r26, r21, v0
-	adde	r0, r0, r12
-	adde	r24, r24, r31
-	std	r0, 8(rp)
-	adde	r9, r9, r8
-	std	r24, 16(rp)
-	adde	r11, r11, r27
-	std	r9, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	mr	r12, r26
-	bdnz	L(lo_m_3)
-
-	ALIGN(16)
-L(end_m_3):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	beq	L(ret)
-
-	ALIGN(16)
-L(outer_lo_3):
-	mtctr	un		C copy inner loop count into ctr
-	addi	rp, outer_rp, 24
-	addi	up, outer_up, 16
-	addi	outer_rp, outer_rp, 8
-	ld	v0, 0(vp)	C new v limb
-	addi	vp, vp, 8
-	ld	r26, -16(up)
-	ld	r27, -8(up)
-	ld	r20, 0(up)
-	mulld	r0, r26, v0
-	mulhdu	r31, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	mulld	r9, r20, v0
-	mulhdu	r10, r20, v0
-	ld	r28, -16(rp)
-	ld	r29, -8(rp)
-	ld	r30, 0(rp)
-	addc	r24, r24, r31
-	adde	r9, r9, r8
-	addze	r12, r10
-	addc	r0, r0, r28
-	std	r0, -16(rp)
-	adde	r24, r24, r29
-	std	r24, -8(rp)
-	adde	r9, r9, r30
-	std	r9, 0(rp)
-	bdz	L(end_3)
-
-	ALIGN(32)		C registers dying
-L(lo_3):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)	C
-	ld	r21, 32(up)	C
-	addi	up, up, 32	C
-	addi	rp, rp, 32	C
-	mulld	r0, r26, v0	C
-	mulhdu	r10, r26, v0	C 26
-	mulld	r24, r27, v0	C
-	mulhdu	r8, r27, v0	C 27
-	mulld	r9, r20, v0	C
-	mulhdu	r27, r20, v0	C 26
-	mulld	r11, r21, v0	C
-	mulhdu	r26, r21, v0	C 27
-	ld	r28, -24(rp)	C
-	adde	r0, r0, r12	C 0 12
-	ld	r29, -16(rp)	C
-	adde	r24, r24, r10	C 24 10
-	ld	r30, -8(rp)	C
-	ld	r31, 0(rp)	C
-	adde	r9, r9, r8	C 8 9
-	adde	r11, r11, r27	C 27 11
-	addze	r12, r26	C 26
-	addc	r0, r0, r28	C 0 28
-	std	r0, -24(rp)	C 0
-	adde	r24, r24, r29	C 7 29
-	std	r24, -16(rp)	C 7
-	adde	r9, r9, r30	C 9 30
-	std	r9, -8(rp)	C 9
-	adde	r11, r11, r31	C 11 31
-	std	r11, 0(rp)	C 11
-	bdnz	L(lo_3)		C
-
-	ALIGN(16)
-L(end_3):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	bne	L(outer_lo_3)
-	b	L(ret)
-
-
-	ALIGN(16)
-L(b1):
-	mulld	r0, r26, v0
-	mulhdu	r12, r26, v0
-	addic	r0, r0, 0
-	std	r0, 0(rp)
-	bdz	L(end_m_1)
-
-	ALIGN(16)
-L(lo_m_1):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)
-	ld	r21, 32(up)
-	mulld	r0, r26, v0
-	mulhdu	r31, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	mulld	r9, r20, v0
-	mulhdu	r27, r20, v0
-	mulld	r11, r21, v0
-	mulhdu	r26, r21, v0
-	adde	r0, r0, r12
-	adde	r24, r24, r31
-	std	r0, 8(rp)
-	adde	r9, r9, r8
-	std	r24, 16(rp)
-	adde	r11, r11, r27
-	std	r9, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	mr	r12, r26
-	bdnz	L(lo_m_1)
-
-	ALIGN(16)
-L(end_m_1):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	beq	L(ret)
-
-	ALIGN(16)
-L(outer_lo_1):
-	mtctr	un		C copy inner loop count into ctr
-	addi	rp, outer_rp, 8
-	mr	up, outer_up
-	addi	outer_rp, outer_rp, 8
-	ld	v0, 0(vp)	C new v limb
-	addi	vp, vp, 8
-	ld	r26, 0(up)
-	ld	r28, 0(rp)
-	mulld	r0, r26, v0
-	mulhdu	r12, r26, v0
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	bdz	L(end_1)
-
-	ALIGN(32)		C registers dying
-L(lo_1):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)	C
-	ld	r21, 32(up)	C
-	addi	up, up, 32	C
-	addi	rp, rp, 32	C
-	mulld	r0, r26, v0	C
-	mulhdu	r10, r26, v0	C 26
-	mulld	r24, r27, v0	C
-	mulhdu	r8, r27, v0	C 27
-	mulld	r9, r20, v0	C
-	mulhdu	r27, r20, v0	C 26
-	mulld	r11, r21, v0	C
-	mulhdu	r26, r21, v0	C 27
-	ld	r28, -24(rp)	C
-	adde	r0, r0, r12	C 0 12
-	ld	r29, -16(rp)	C
-	adde	r24, r24, r10	C 24 10
-	ld	r30, -8(rp)	C
-	ld	r31, 0(rp)	C
-	adde	r9, r9, r8	C 8 9
-	adde	r11, r11, r27	C 27 11
-	addze	r12, r26	C 26
-	addc	r0, r0, r28	C 0 28
-	std	r0, -24(rp)	C 0
-	adde	r24, r24, r29	C 7 29
-	std	r24, -16(rp)	C 7
-	adde	r9, r9, r30	C 9 30
-	std	r9, -8(rp)	C 9
-	adde	r11, r11, r31	C 11 31
-	std	r11, 0(rp)	C 11
-	bdnz	L(lo_1)		C
-
-	ALIGN(16)
-L(end_1):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	bne	L(outer_lo_1)
-	b	L(ret)
-
-
-	ALIGN(16)
-L(b0):
-	addi	up, up, -8
-	addi	rp, rp, -8
-	li	r12, 0
-	addic	r12, r12, 0
-	bdz	L(end_m_0)
-
-	ALIGN(16)
-L(lo_m_0):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)
-	ld	r21, 32(up)
-	mulld	r0, r26, v0
-	mulhdu	r31, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	mulld	r9, r20, v0
-	mulhdu	r27, r20, v0
-	mulld	r11, r21, v0
-	mulhdu	r26, r21, v0
-	adde	r0, r0, r12
-	adde	r24, r24, r31
-	std	r0, 8(rp)
-	adde	r9, r9, r8
-	std	r24, 16(rp)
-	adde	r11, r11, r27
-	std	r9, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	mr	r12, r26
-	bdnz	L(lo_m_0)
-
-	ALIGN(16)
-L(end_m_0):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	beq	L(ret)
-
-	ALIGN(16)
-L(outer_lo_0):
-	mtctr	un		C copy inner loop count into ctr
-	addi	rp, outer_rp, 0
-	addi	up, outer_up, -8
-	addi	outer_rp, outer_rp, 8
-	ld	v0, 0(vp)	C new v limb
-	addi	vp, vp, 8
-	li	r12, 0
-	addic	r12, r12, 0
-	bdz	L(end_0)
-
-	ALIGN(32)		C registers dying
-L(lo_0):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)	C
-	ld	r21, 32(up)	C
-	addi	up, up, 32	C
-	addi	rp, rp, 32	C
-	mulld	r0, r26, v0	C
-	mulhdu	r10, r26, v0	C 26
-	mulld	r24, r27, v0	C
-	mulhdu	r8, r27, v0	C 27
-	mulld	r9, r20, v0	C
-	mulhdu	r27, r20, v0	C 26
-	mulld	r11, r21, v0	C
-	mulhdu	r26, r21, v0	C 27
-	ld	r28, -24(rp)	C
-	adde	r0, r0, r12	C 0 12
-	ld	r29, -16(rp)	C
-	adde	r24, r24, r10	C 24 10
-	ld	r30, -8(rp)	C
-	ld	r31, 0(rp)	C
-	adde	r9, r9, r8	C 8 9
-	adde	r11, r11, r27	C 27 11
-	addze	r12, r26	C 26
-	addc	r0, r0, r28	C 0 28
-	std	r0, -24(rp)	C 0
-	adde	r24, r24, r29	C 7 29
-	std	r24, -16(rp)	C 7
-	adde	r9, r9, r30	C 9 30
-	std	r9, -8(rp)	C 9
-	adde	r11, r11, r31	C 11 31
-	std	r11, 0(rp)	C 11
-	bdnz	L(lo_0)		C
-
-	ALIGN(16)
-L(end_0):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	bne	L(outer_lo_0)
-	b	L(ret)
-
-
-	ALIGN(16)
-L(b2):	ld	r27, 8(up)
-	addi	up, up, 8
-	mulld	r0, r26, v0
-	mulhdu	r10, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	addc	r24, r24, r10
-	addze	r12, r8
-	std	r0, 0(rp)
-	std	r24, 8(rp)
-	addi	rp, rp, 8
-	bdz	L(end_m_2)
-
-	ALIGN(16)
-L(lo_m_2):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)
-	ld	r21, 32(up)
-	mulld	r0, r26, v0
-	mulhdu	r31, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	mulld	r9, r20, v0
-	mulhdu	r27, r20, v0
-	mulld	r11, r21, v0
-	mulhdu	r26, r21, v0
-	adde	r0, r0, r12
-	adde	r24, r24, r31
-	std	r0, 8(rp)
-	adde	r9, r9, r8
-	std	r24, 16(rp)
-	adde	r11, r11, r27
-	std	r9, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	mr	r12, r26
-	bdnz	L(lo_m_2)
-
-	ALIGN(16)
-L(end_m_2):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	beq	L(ret)
-
-	ALIGN(16)
-L(outer_lo_2):
-	mtctr	un		C copy inner loop count into ctr
-	addi	rp, outer_rp, 16
-	addi	up, outer_up, 8
-	addi	outer_rp, outer_rp, 8
-	ld	v0, 0(vp)	C new v limb
-	addi	vp, vp, 8
-	ld	r26, -8(up)
-	ld	r27, 0(up)
-	ld	r28, -8(rp)
-	ld	r29, 0(rp)
-	mulld	r0, r26, v0
-	mulhdu	r10, r26, v0
-	mulld	r24, r27, v0
-	mulhdu	r8, r27, v0
-	addc	r24, r24, r10
-	addze	r12, r8
-	addc	r0, r0, r28
-	std	r0, -8(rp)
-	adde	r24, r24, r29
-	std	r24, 0(rp)
-	bdz	L(end_2)
-
-	ALIGN(16)		C registers dying
-L(lo_2):
-	ld	r26, 8(up)
-	ld	r27, 16(up)
-	ld	r20, 24(up)	C
-	ld	r21, 32(up)	C
-	addi	up, up, 32	C
-	addi	rp, rp, 32	C
-	mulld	r0, r26, v0	C
-	mulhdu	r10, r26, v0	C 26
-	mulld	r24, r27, v0	C
-	mulhdu	r8, r27, v0	C 27
-	mulld	r9, r20, v0	C
-	mulhdu	r27, r20, v0	C 26
-	mulld	r11, r21, v0	C
-	mulhdu	r26, r21, v0	C 27
-	ld	r28, -24(rp)	C
-	adde	r0, r0, r12	C 0 12
-	ld	r29, -16(rp)	C
-	adde	r24, r24, r10	C 24 10
-	ld	r30, -8(rp)	C
-	ld	r31, 0(rp)	C
-	adde	r9, r9, r8	C 8 9
-	adde	r11, r11, r27	C 27 11
-	addze	r12, r26	C 26
-	addc	r0, r0, r28	C 0 28
-	std	r0, -24(rp)	C 0
-	adde	r24, r24, r29	C 7 29
-	std	r24, -16(rp)	C 7
-	adde	r9, r9, r30	C 9 30
-	std	r9, -8(rp)	C 9
-	adde	r11, r11, r31	C 11 31
-	std	r11, 0(rp)	C 11
-	bdnz	L(lo_2)		C
-
-	ALIGN(16)
-L(end_2):
-	addze	r12, r12
-	addic.	vn, vn, -1
-	std	r12, 8(rp)
-	bne	L(outer_lo_2)
-C	b	L(ret)
-
-L(ret):	ld	r31, -8(r1)
-	ld	r30, -16(r1)
-	ld	r29, -24(r1)
-	ld	r28, -32(r1)
-	ld	r27, -40(r1)
-	ld	r26, -48(r1)
-	ld	r25, -56(r1)
-	ld	r24, -64(r1)
-	ld	r23, -72(r1)
-	ld	r22, -80(r1)
-	ld	r21, -88(r1)
-	ld	r20, -96(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/p7/aormul_2.asm b/gmp/mpn/powerpc64/mode64/p7/aormul_2.asm
deleted file mode 100644
index 8731e01a89..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/aormul_2.asm
+++ /dev/null
@@ -1,135 +0,0 @@
-dnl  PowerPC-64 mpn_mul_2 and mpn_addmul_2.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                    cycles/limb    cycles/limb
-C			mul_2         addmul_2
-C POWER3/PPC630		 ?		 ?
-C POWER4/PPC970		 ?		 ?
-C POWER5		 ?		 ?
-C POWER6		 ?		 ?
-C POWER7-SMT4		 3		 3
-C POWER7-SMT2		 ?		 ?
-C POWER7-SMT1		 ?		 ?
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n',  `r5')
-define(`vp', `r6')
-
-define(`cy0', `r10')
-ifdef(`EXTRA_REGISTER',
-` define(`cy1', EXTRA_REGISTER)',
-` define(`cy1', `r31')')
-
-ifdef(`OPERATION_mul_2',`
-  define(`AM',		`')
-  define(`ADDX',	`addc')
-  define(`func',	`mpn_mul_2')
-')
-ifdef(`OPERATION_addmul_2',`
-  define(`AM',		`$1')
-  define(`ADDX',	`adde')
-  define(`func',	`mpn_addmul_2')
-')
-
-MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2)
-
-ASM_START()
-PROLOGUE(func)
-
-ifdef(`EXTRA_REGISTER',,`
-	std	r31, -8(r1)
-')
-	andi.	r12, n, 1
-	addi	r0, n, 1
-	srdi	r0, r0, 1
-	mtctr	r0
-	ld	r11, 0(vp)		C v0
-	li	cy0, 0
-	ld	r12, 8(vp)		C v1
-	li	cy1, 0
-	ld	r5, 0(up)
-	beq	L(lo0)
-	addi	up, up, -8
-	addi	rp, rp, -8
-	b	L(lo1)
-
-	ALIGN(32)
-L(top):
-AM(`	ld	r0, -8(rp)')
-	ld	r5, 0(up)
-AM(`	addc	r6, r6, r0')
-	ADDX	r7, r7, r8
-	addze	r9, r9
-	addc	r6, r6, cy0
-	adde	cy0, r7, cy1
-	std	r6, -8(rp)
-	addze	cy1, r9
-L(lo0):	mulld	r6, r11, r5		C v0 * u[i]  weight 0
-	mulhdu	r7, r11, r5		C v0 * u[i]  weight 1
-	mulld	r8, r12, r5		C v1 * u[i]  weight 1
-	mulhdu	r9, r12, r5		C v1 * u[i]  weight 2
-AM(`	ld	r0, 0(rp)')
-	ld	r5, 8(up)
-AM(`	addc	r6, r6, r0')
-	ADDX	r7, r7, r8
-	addze	r9, r9
-	addc	r6, r6, cy0
-	adde	cy0, r7, cy1
-	std	r6, 0(rp)
-	addze	cy1, r9
-L(lo1):	mulld	r6, r11, r5		C v0 * u[i]  weight 0
-	mulhdu	r7, r11, r5		C v0 * u[i]  weight 1
-	addi	up, up, 16
-	addi	rp, rp, 16
-	mulld	r8, r12, r5		C v1 * u[i]  weight 1
-	mulhdu	r9, r12, r5		C v1 * u[i]  weight 2
-	bdnz	L(top)
-
-L(end):
-AM(`	ld	r0, -8(rp)')
-AM(`	addc	r6, r6, r0')
-	ADDX	r7, r7, r8
-	addze	r9, r9
-	addc	r6, r6, cy0
-	std	r6, -8(rp)
-	adde	cy0, r7, cy1
-	addze	cy1, r9
-	std	cy0, 0(rp)
-	mr	r3, cy1
-
-ifdef(`EXTRA_REGISTER',,`
-	ld	r31, -8(r1)
-')
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/p7/aors_n.asm b/gmp/mpn/powerpc64/mode64/p7/aors_n.asm
deleted file mode 100644
index 857c701dec..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/aors_n.asm
+++ /dev/null
@@ -1,128 +0,0 @@
-dnl  PowerPC-64 mpn_add_n, mpn_sub_n optimised for POWER7.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 ?
-C POWER6		 ?
-C POWER7		 2.18
-
-C This is a tad bit slower than the cnd_aors_n.asm code, which is of course an
-C anomaly.
-
-ifdef(`OPERATION_add_n',`
-  define(ADDSUBC,	adde)
-  define(ADDSUB,	addc)
-  define(func,		mpn_add_n)
-  define(func_nc,	mpn_add_nc)
-  define(GENRVAL,	`addi	r3, r3, 1')
-  define(SETCBR,	`addic	r0, $1, -1')
-  define(CLRCB,		`addic	r0, r0, 0')
-')
-ifdef(`OPERATION_sub_n',`
-  define(ADDSUBC,	subfe)
-  define(ADDSUB,	subfc)
-  define(func,		mpn_sub_n)
-  define(func_nc,	mpn_sub_nc)
-  define(GENRVAL,	`neg	r3, r3')
-  define(SETCBR,	`subfic	r0, $1, 0')
-  define(CLRCB,		`addic	r0, r1, -1')
-')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-C INPUT PARAMETERS
-define(`rp',	`r3')
-define(`up',	`r4')
-define(`vp',	`r5')
-define(`n',	`r6')
-
-ASM_START()
-PROLOGUE(func_nc)
-	SETCBR(r7)
-	b	L(ent)
-EPILOGUE()
-
-PROLOGUE(func)
-	CLRCB
-L(ent):
-	andi.	r7, n, 1
-	beq	L(bx0)
-
-L(bx1):	ld	r7, 0(up)
-	ld	r9, 0(vp)
-	ADDSUBC	r11, r9, r7
-	std	r11, 0(rp)
-	cmpldi	cr6, n, 1
-	beq	cr6, L(end)
-	addi	up, up, 8
-	addi	vp, vp, 8
-	addi	rp, rp, 8
-
-L(bx0):	addi	r0, n, 2	C compute branch...
-	srdi	r0, r0, 2	C ...count
-	mtctr	r0
-
-	andi.	r7, n, 2
-	bne	L(mid)
-
-	addi	up, up, 16
-	addi	vp, vp, 16
-	addi	rp, rp, 16
-
-	ALIGN(32)
-L(top):	ld	r6, -16(up)
-	ld	r7, -8(up)
-	ld	r8, -16(vp)
-	ld	r9, -8(vp)
-	ADDSUBC	r10, r8, r6
-	ADDSUBC	r11, r9, r7
-	std	r10, -16(rp)
-	std	r11, -8(rp)
-L(mid):	ld	r6, 0(up)
-	ld	r7, 8(up)
-	ld	r8, 0(vp)
-	ld	r9, 8(vp)
-	ADDSUBC	r10, r8, r6
-	ADDSUBC	r11, r9, r7
-	std	r10, 0(rp)
-	std	r11, 8(rp)
-	addi	up, up, 32
-	addi	vp, vp, 32
-	addi	rp, rp, 32
-	bdnz	L(top)
-
-L(end):	subfe	r3, r0, r0	C -cy
-	GENRVAL
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm b/gmp/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm
deleted file mode 100644
index ddf5fd84b1..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/aorsorrlsh1_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl  PowerPC-64 mpn_addlsh1_n, mpn_sublsh1_n, mpn_rsblsh1_n.
-
-dnl  Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,		1)
-define(RSH,		63)
-
-ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
-
-include_mpn(`powerpc64/mode64/p7/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm b/gmp/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm
deleted file mode 100644
index 3f9d88d6ca..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/aorsorrlsh2_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl  PowerPC-64 mpn_addlsh2_n, mpn_sublsh2_n, mpn_rsblsh2_n.
-
-dnl  Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,		2)
-define(RSH,		62)
-
-ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
-
-include_mpn(`powerpc64/mode64/p7/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm b/gmp/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm
deleted file mode 100644
index 525120262f..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/aorsorrlshC_n.asm
+++ /dev/null
@@ -1,129 +0,0 @@
-dnl  PowerPC-64 mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n.
-
-dnl  Copyright 2003, 2005, 2009, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C                  cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          ?
-C POWER5                 ?
-C POWER6                 ?
-C POWER7                 2.5
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`vp', `r5')
-define(`n',  `r6')
-
-ifdef(`DO_add', `
-  define(`ADDSUBC',	`addc	$1, $2, $3')
-  define(`ADDSUBE',	`adde	$1, $2, $3')
-  define(INITCY,	`addic	$1, r1, 0')
-  define(RETVAL,	`addze	r3, $1')
-  define(`func',	mpn_addlsh`'LSH`'_n)')
-ifdef(`DO_sub', `
-  define(`ADDSUBC',	`subfc	$1, $2, $3')
-  define(`ADDSUBE',	`subfe	$1, $2, $3')
-  define(INITCY,	`addic	$1, r1, -1')
-  define(RETVAL,	`subfze	r3, $1
-			neg	r3, r3')
-  define(`func',	mpn_sublsh`'LSH`'_n)')
-ifdef(`DO_rsb', `
-  define(`ADDSUBC',	`subfc	$1, $3, $2')
-  define(`ADDSUBE',	`subfe	$1, $3, $2')
-  define(INITCY,	`addic	$1, r1, -1')
-  define(RETVAL,	`addme	r3, $1')
-  define(`func',	mpn_rsblsh`'LSH`'_n)')
-
-define(`s0', `r0')  define(`s1', `r9')
-define(`u0', `r6')  define(`u1', `r7')
-define(`v0', `r10') define(`v1', `r11')
-
-
-ASM_START()
-PROLOGUE(func)
-	rldic	r7, n, 3, 59
-	add	up, up, r7
-	add	vp, vp, r7
-	add	rp, rp, r7
-
-ifdef(`DO_add', `
-	addic	r0, n, 3	C set cy flag as side effect
-',`
-	subfc	r0, r0, r0	C set cy flag
-	addi	r0, n, 3
-')
-	srdi	r0, r0, 2
-	mtctr	r0
-
-	andi.	r0, n, 1
-	beq	L(bx0)
-
-L(bx1):	andi.	r0, n, 2
-	li	s0, 0
-	bne	L(lo3)
-	b	L(lo1)
-
-L(bx0):	andi.	r0, n, 2
-	li	s1, 0
-	bne	L(lo2)
-
-	ALIGN(32)
-L(top):	addi	rp, rp, 32
-	ld	v0, 0(vp)
-	addi	vp, vp, 32
-	rldimi	s1, v0, LSH, 0
-	ld	u0, 0(up)
-	addi	up, up, 32
-	srdi	s0, v0, RSH
-	ADDSUBE(s1, s1, u0)
-	std	s1, -32(rp)
-L(lo3):	ld	v1, -24(vp)
-	rldimi	s0, v1, LSH, 0
-	ld	u1, -24(up)
-	srdi	s1, v1, RSH
-	ADDSUBE(s0, s0, u1)
-	std	s0, -24(rp)
-L(lo2):	ld	v0, -16(vp)
-	rldimi	s1, v0, LSH, 0
-	ld	u0, -16(up)
-	srdi	s0, v0, RSH
-	ADDSUBE(s1, s1, u0)
-	std	s1, -16(rp)
-L(lo1):	ld	v1, -8(vp)
-	rldimi	s0, v1, LSH, 0
-	ld	u1, -8(up)
-	srdi	s1, v1, RSH
-	ADDSUBE(s0, s0, u1)
-	std	s0, -8(rp)
-	bdnz	L(top)		C decrement CTR and loop back
-
-	RETVAL(	s1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/p7/gcd_1.asm b/gmp/mpn/powerpc64/mode64/p7/gcd_1.asm
deleted file mode 100644
index 47cb40bdc5..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/gcd_1.asm
+++ /dev/null
@@ -1,110 +0,0 @@
-dnl  PowerPC-64 mpn_gcd_1.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/bit (approx)
-C POWER3/PPC630		 -
-C POWER4/PPC970		 -
-C POWER5		 -
-C POWER6		 -
-C POWER7		 7.6
-C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
-
-C INPUT PARAMETERS
-define(`up',    `r3')
-define(`n',     `r4')
-define(`v0',    `r5')
-
-EXTERN_FUNC(mpn_mod_1)
-EXTERN_FUNC(mpn_modexact_1c_odd)
-
-ASM_START()
-PROLOGUE(mpn_gcd_1,toc)
-	mflr	r0
-	std	r30, -16(r1)
-	std	r31, -8(r1)
-	std	r0, 16(r1)
-	stdu	r1, -128(r1)
-
-	ld	r7, 0(up)		C U low limb
-	or	r0, r5, r7		C x | y
-
-	neg	r6, r0
-	and	r6, r6, r0
-	cntlzd	r31, r6			C common twos
-	subfic	r31, r31, 63
-
-	neg	r6, r5
-	and	r6, r6, r5
-	cntlzd	r8, r6
-	subfic	r8, r8, 63
-	srd	r5, r5, r8
-	mr	r30, r5			C v0 saved
-
-	cmpdi	r4, BMOD_1_TO_MOD_1_THRESHOLD
-	blt	L(bmod)
-	CALL(	mpn_mod_1)
-	b	L(reduced)
-L(bmod):
-	li	r6, 0
-	CALL(	mpn_modexact_1c_odd)
-L(reduced):
-
-define(`cnt',  `r9')dnl
-
-	neg.	r6, r3
-	and	r6, r6, r3
-	cntlzd	cnt, r6
-	li	r12, 63
-	bne	L(mid)
-	b	L(end)
-
-	ALIGN(16)
-L(top):	isel	r30, r3, r30, 29	C y = min(x,y)
-	isel	r3, r10, r11, 29	C x = |y - x|
-L(mid):	subf	cnt, cnt, r12		C cnt = 63-cnt
-	srd	r3, r3, cnt
-	subf	r10, r3, r30		C r10 = y - x
-	subf	r11, r30, r3		C r11 = x - y
-	cmpld	cr7, r30, r3
-	and	r8, r11, r10		C isolate lsb
-	cntlzd	cnt, r8
-	bne	cr7, L(top)
-
-L(end):	sld	r3, r30, r31
-
-	addi	r1, r1, 128
-	ld	r0, 16(r1)
-	ld	r30, -16(r1)
-	ld	r31, -8(r1)
-	mtlr	r0
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/p7/gmp-mparam.h b/gmp/mpn/powerpc64/mode64/p7/gmp-mparam.h
deleted file mode 100644
index 7e719e8aac..0000000000
--- a/gmp/mpn/powerpc64/mode64/p7/gmp-mparam.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/* POWER7 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009-2011, 2013, 2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 3700 MHz POWER7/SMT4 (gcc111.fsffrance.org) */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.8 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        24
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
-#define USE_PREINV_DIVREM_1                  0
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD            1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           28
-
-#define MUL_TOOM22_THRESHOLD                22
-#define MUL_TOOM33_THRESHOLD                72
-#define MUL_TOOM44_THRESHOLD               200
-#define MUL_TOOM6H_THRESHOLD               298
-#define MUL_TOOM8H_THRESHOLD               406
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     140
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     132
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     138
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     124
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 36
-#define SQR_TOOM3_THRESHOLD                109
-#define SQR_TOOM4_THRESHOLD                196
-#define SQR_TOOM6_THRESHOLD                414
-#define SQR_TOOM8_THRESHOLD                547
-
-#define MULMID_TOOM42_THRESHOLD             58
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               20
-
-#define MUL_FFT_MODF_THRESHOLD             412  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    412, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
-    {     13, 7}, {     28, 8}, {     15, 7}, {     33, 8}, \
-    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     21, 9}, {     11, 8}, {     29, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     43,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
-    {    159,11}, {     95,10}, {    191, 9}, {    383,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,11}, {    143,10}, {    287, 9}, {    575,11}, \
-    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
-    {    383, 9}, {    767,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    575,11}, {    303,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    335,10}, \
-    {    671,11}, {    351,10}, {    703, 9}, {   1407,11}, \
-    {    383,10}, {    767,11}, {    415,10}, {    831,12}, \
-    {    223,11}, {    447,10}, {    895,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    543,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
-    {    351,11}, {    703,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,10}, {   1663,12}, {    447,11}, \
-    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
-    {   2175,12}, {    575,11}, {   1151,12}, {    607,11}, \
-    {   1215,13}, {    319,12}, {    639,11}, {   1279,12}, \
-    {    671,11}, {   1343,10}, {   2687,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    767,11}, {   1535,12}, \
-    {    799,11}, {   1599,12}, {    831,11}, {   1663,13}, \
-    {    447,12}, {    895,11}, {   1791,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1343,11}, {   2687,13}, {    703,12}, \
-    {   1407,11}, {   2815,14}, {    383,13}, {    767,12}, \
-    {   1599,13}, {    831,12}, {   1663,13}, {    895,12}, \
-    {   1791,13}, {    959,12}, {   1919,11}, {   3839,14}, \
-    {    511,13}, {   1023,12}, {   2047,13}, {   1087,12}, \
-    {   2175,13}, {   1215,12}, {   2431,11}, {   4863,14}, \
-    {    639,13}, {   1279,12}, {   2559,13}, {   1343,12}, \
-    {   2687,13}, {   1407,12}, {   2815,13}, {   1471,12}, \
-    {   2943,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1663,14}, {    895,13}, {   1791,12}, {   3583,13}, \
-    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2687,14}, {   1407,13}, {   2815,15}, \
-    {    767,14}, {   1535,13}, {   3199,14}, {   1663,13}, \
-    {   3455,12}, {   6911,14}, {   1919,13}, {   3839,16}, \
-    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
-    {   2431,13}, {   4863,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 231
-#define MUL_FFT_THRESHOLD                 4288
-
-#define SQR_FFT_MODF_THRESHOLD             368  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    368, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
-    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
-    {     21, 9}, {     11, 8}, {     29, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    135,11}, {     79,10}, \
-    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271,11}, {    143,10}, {    287, 9}, \
-    {    575,10}, {    303,11}, {    159,10}, {    319, 9}, \
-    {    639,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    271,10}, {    543, 9}, {   1087,11}, \
-    {    287,10}, {    575, 9}, {   1151,11}, {    303,10}, \
-    {    607,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,12}, {    223,11}, {    447,10}, {    895,11}, \
-    {    479,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,12}, {    287,11}, {    575,10}, {   1151,11}, \
-    {    607,12}, {    319,11}, {    639,10}, {   1279,11}, \
-    {    671,12}, {    351,11}, {    703,13}, {    191,12}, \
-    {    383,11}, {    767,12}, {    415,11}, {    831,10}, \
-    {   1663,12}, {    447,11}, {    895,12}, {    479,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,12}, {    575,11}, {   1151,12}, \
-    {    607,13}, {    319,12}, {    639,11}, {   1279,12}, \
-    {    703,11}, {   1407,10}, {   2815,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
-    {    831,11}, {   1663,13}, {    447,12}, {    895,11}, \
-    {   1791,12}, {    959,11}, {   1919,10}, {   3839,14}, \
-    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,11}, {   2431,13}, {    639,12}, {   1343,11}, \
-    {   2687,13}, {    703,12}, {   1407,14}, {    383,13}, \
-    {    767,12}, {   1599,13}, {    831,12}, {   1663,13}, \
-    {    895,12}, {   1791,13}, {    959,12}, {   1919,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1151,12}, \
-    {   2303,13}, {   1215,12}, {   2431,14}, {    639,13}, \
-    {   1279,12}, {   2559,13}, {   1343,12}, {   2687,13}, \
-    {   1407,12}, {   2815,13}, {   1471,14}, {    767,13}, \
-    {   1663,12}, {   3327,13}, {   1727,14}, {    895,13}, \
-    {   1791,12}, {   3583,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2943,15}, {    767,14}, {   1535,13}, {   3199,14}, \
-    {   1663,13}, {   3455,14}, {   1791,13}, {   3583,14}, \
-    {   1919,13}, {   3839,16}, {    511,15}, {   1023,14}, \
-    {   2175,13}, {   4479,14}, {   2303,13}, {   4607,14}, \
-    {   2431,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 230
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  34
-#define MULLO_MUL_N_THRESHOLD             9174
-
-#define DC_DIV_QR_THRESHOLD                 33
-#define DC_DIVAPPR_Q_THRESHOLD             126
-#define DC_BDIV_QR_THRESHOLD                63
-#define DC_BDIV_Q_THRESHOLD                152
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD               155
-#define INV_APPR_THRESHOLD                 125
-
-#define BINV_NEWTON_THRESHOLD              294
-#define REDC_1_TO_REDC_2_THRESHOLD          17
-#define REDC_2_TO_REDC_N_THRESHOLD         115
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1334
-#define MUPI_DIV_QR_THRESHOLD               54
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  1,14,62,642,960
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                     126
-#define HGCD_APPR_THRESHOLD                184
-#define HGCD_REDUCE_THRESHOLD             3014
-#define GCD_DC_THRESHOLD                   440
-#define GCDEXT_DC_THRESHOLD                386
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        17
-#define SET_STR_DC_THRESHOLD              1655
-#define SET_STR_PRECOMPUTE_THRESHOLD      3417
-
-#define FAC_DSC_THRESHOLD                 1138
-#define FAC_ODD_THRESHOLD                   27
diff --git a/gmp/mpn/powerpc64/mode64/rsh1add_n.asm b/gmp/mpn/powerpc64/mode64/rsh1add_n.asm
new file mode 100644
index 0000000000..0cd6cf4e8c
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/rsh1add_n.asm
@@ -0,0 +1,104 @@
+dnl  PowerPC-64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:     2		(1.5 c/l should be possible)
+C POWER4/PPC970:     4		(2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C vp	r5
+C n	r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`x',`r0')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`v0',`r10')
+define(`v1',`r11')
+
+
+ASM_START()
+PROLOGUE(mpn_rsh1add_n)
+	mtctr	r6		C copy size to count register
+	addi	rp, rp, -8
+
+	ld	u1, 0(up)
+	ld	v1, 0(vp)
+	addc	x, v1, u1
+	rldicl	r12, x, 0, 63	C return value
+	srdi	s1, x, 1
+
+	bdz	L(1)
+
+	ld	u0, 8(up)
+	ld	v0, 8(vp)
+
+	bdz	L(end)
+
+L(oop):	ldu	u1, 16(up)
+	ldu	v1, 16(vp)
+	adde	x, v0, u0
+	srdi	s0, x, 1
+	rldimi	s1, x, 63, 0
+	std	s1, 8(rp)
+
+	bdz	L(exit)
+
+	ld	u0, 8(up)
+	ld	v0, 8(vp)
+	adde	x, v1, u1
+	srdi	s1, x, 1
+	rldimi	s0, x, 63, 0
+	stdu	s0, 16(rp)
+
+	bdnz	L(oop)
+
+L(end):	adde	x, v0, u0
+	srdi	s0, x, 1
+	rldimi	s1, x, 63, 0
+	std	s1, 8(rp)
+
+	li	x, 0
+	addze	x, x
+	rldimi	s0, x, 63, 0
+	std	s0, 16(rp)
+	mr	r3, r12
+	blr
+
+L(exit):	adde	x, v1, u1
+	srdi	s1, x, 1
+	rldimi	s0, x, 63, 0
+	stdu	s0, 16(rp)
+
+L(1):	li	x, 0
+	addze	x, x
+	rldimi	s1, x, 63, 0
+	std	s1, 8(rp)
+	mr	r3, r12
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/rsh1aors_n.asm b/gmp/mpn/powerpc64/mode64/rsh1aors_n.asm
deleted file mode 100644
index 7f7734bcef..0000000000
--- a/gmp/mpn/powerpc64/mode64/rsh1aors_n.asm
+++ /dev/null
@@ -1,172 +0,0 @@
-dnl  PowerPC-64 mpn_rsh1add_n, mpn_rsh1sub_n
-
-dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 2.9
-C POWER5		 ?
-C POWER6		 3.5
-C POWER7		 2.25
-
-define(`rp', `r3')
-define(`up', `r4')
-define(`vp', `r5')
-define(`n',  `r6')
-
-ifdef(`OPERATION_rsh1add_n', `
-  define(`ADDSUBC',	`addc')
-  define(`ADDSUBE',	`adde')
-  define(INITCY,	`addic	$1, r1, 0')
-  define(`func',	mpn_rsh1add_n)')
-ifdef(`OPERATION_rsh1sub_n', `
-  define(`ADDSUBC',	`subfc')
-  define(`ADDSUBE',	`subfe')
-  define(INITCY,	`addic	$1, r1, -1')
-  define(`func',	mpn_rsh1sub_n)')
-
-define(`s0', `r9')
-define(`s1', `r7')
-define(`x0', `r0')
-define(`x1', `r12')
-define(`u0', `r8')
-define(`v0', `r10')
-
-
-ASM_START()
-PROLOGUE(func)
-	ld	u0, 0(up)
-	ld	v0, 0(vp)
-
-	cmpdi	cr6, n, 2
-
-	addi	r0, n, 1
-	srdi	r0, r0, 2
-	mtctr	r0			C copy size to count register
-
-	andi.	r0, n, 1
-	bne	cr0, L(bx1)
-
-L(bx0):	ADDSUBC	x1, v0, u0
-	ld	u0, 8(up)
-	ld	v0, 8(vp)
-	ADDSUBE	x0, v0, u0
-	ble	cr6, L(n2)
-	ld	u0, 16(up)
-	ld	v0, 16(vp)
-	srdi	s0, x1, 1
-	rldicl	r11, x1, 0, 63		C return value
-	ADDSUBE	x1, v0, u0
-	andi.	n, n, 2
-	bne	cr0, L(b10)
-L(b00):	addi	rp, rp, -24
-	b	L(lo0)
-L(b10):	addi	up, up, 16
-	addi	vp, vp, 16
-	addi	rp, rp, -8
-	b	L(lo2)
-
-	ALIGN(16)
-L(bx1):	ADDSUBC	x0, v0, u0
-	ble	cr6, L(n1)
-	ld	u0, 8(up)
-	ld	v0, 8(vp)
-	ADDSUBE	x1, v0, u0
-	ld	u0, 16(up)
-	ld	v0, 16(vp)
-	srdi	s1, x0, 1
-	rldicl	r11, x0, 0, 63		C return value
-	ADDSUBE	x0, v0, u0
-	andi.	n, n, 2
-	bne	cr0, L(b11)
-L(b01):	addi	up, up, 8
-	addi	vp, vp, 8
-	addi	rp, rp, -16
-	b	L(lo1)
-L(b11):	addi	up, up, 24
-	addi	vp, vp, 24
-	bdz	L(end)
-
-	ALIGN(32)
-L(top):	ld	u0, 0(up)
-	ld	v0, 0(vp)
-	srdi	s0, x1, 1
-	rldimi	s1, x1, 63, 0
-	std	s1, 0(rp)
-	ADDSUBE	x1, v0, u0
-L(lo2):	ld	u0, 8(up)
-	ld	v0, 8(vp)
-	srdi	s1, x0, 1
-	rldimi	s0, x0, 63, 0
-	std	s0, 8(rp)
-	ADDSUBE	x0, v0, u0
-L(lo1):	ld	u0, 16(up)
-	ld	v0, 16(vp)
-	srdi	s0, x1, 1
-	rldimi	s1, x1, 63, 0
-	std	s1, 16(rp)
-	ADDSUBE	x1, v0, u0
-L(lo0):	ld	u0, 24(up)
-	ld	v0, 24(vp)
-	srdi	s1, x0, 1
-	rldimi	s0, x0, 63, 0
-	std	s0, 24(rp)
-	ADDSUBE	x0, v0, u0
-	addi	up, up, 32
-	addi	vp, vp, 32
-	addi	rp, rp, 32
-	bdnz	L(top)
-
-L(end):	srdi	s0, x1, 1
-	rldimi	s1, x1, 63, 0
-	std	s1, 0(rp)
-L(cj2):	srdi	s1, x0, 1
-	rldimi	s0, x0, 63, 0
-	std	s0, 8(rp)
-L(cj1):	ADDSUBE	x1, x1, x1		C pseudo-depends on x1
-	rldimi	s1, x1, 63, 0
-	std	s1, 16(rp)
-	mr	r3, r11
-	blr
-
-L(n1):	srdi	s1, x0, 1
-	rldicl	r11, x0, 0, 63		C return value
-	ADDSUBE	x1, x1, x1		C pseudo-depends on x1
-	rldimi	s1, x1, 63, 0
-	std	s1, 0(rp)
-	mr	r3, r11
-	blr
-
-L(n2):	addi	rp, rp, -8
-	srdi	s0, x1, 1
-	rldicl	r11, x1, 0, 63		C return value
-	b	L(cj2)
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/rsh1sub_n.asm b/gmp/mpn/powerpc64/mode64/rsh1sub_n.asm
new file mode 100644
index 0000000000..e4c78ff2b5
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/rsh1sub_n.asm
@@ -0,0 +1,102 @@
+dnl  PowerPC-64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:     2		(1.5 c/l should be possible)
+C POWER4/PPC970:     4		(2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C vp	r5
+C n	r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`x',`r0')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`v0',`r10')
+define(`v1',`r11')
+
+
+ASM_START()
+PROLOGUE(mpn_rsh1sub_n)
+	mtctr	r6		C copy size to count register
+	addi	rp, rp, -8
+
+	ld	u1, 0(up)
+	ld	v1, 0(vp)
+	subfc	x, v1, u1
+	rldicl	r12, x, 0, 63	C return value
+	srdi	s1, x, 1
+
+	bdz	L(1)
+
+	ld	u0, 8(up)
+	ld	v0, 8(vp)
+
+	bdz	L(end)
+
+L(oop):	ldu	u1, 16(up)
+	ldu	v1, 16(vp)
+	subfe	x, v0, u0
+	srdi	s0, x, 1
+	rldimi	s1, x, 63, 0
+	std	s1, 8(rp)
+
+	bdz	L(exit)
+
+	ld	u0, 8(up)
+	ld	v0, 8(vp)
+	subfe	x, v1, u1
+	srdi	s1, x, 1
+	rldimi	s0, x, 63, 0
+	stdu	s0, 16(rp)
+
+	bdnz	L(oop)
+
+L(end):	subfe	x, v0, u0
+	srdi	s0, x, 1
+	rldimi	s1, x, 63, 0
+	std	s1, 8(rp)
+
+	subfe	x, x, x
+	rldimi	s0, x, 63, 0
+	std	s0, 16(rp)
+	mr	r3, r12
+	blr
+
+L(exit):	subfe	x, v1, u1
+	srdi	s1, x, 1
+	rldimi	s0, x, 63, 0
+	stdu	s0, 16(rp)
+
+L(1):	subfe	x, x, x
+	rldimi	s1, x, 63, 0
+	std	s1, 8(rp)
+	mr	r3, r12
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/sqr_basecase.asm b/gmp/mpn/powerpc64/mode64/sqr_basecase.asm
deleted file mode 100644
index e76bb8878d..0000000000
--- a/gmp/mpn/powerpc64/mode64/sqr_basecase.asm
+++ /dev/null
@@ -1,863 +0,0 @@
-dnl  PowerPC-64 mpn_sqr_basecase.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 1999-2001, 2003-2006, 2008, 2010, 2011 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C POWER3/PPC630         6-18
-C POWER4/PPC970          8
-C POWER5                 8
-C POWER6                16.25
-C POWER7                 3.77
-
-C NOTES
-C  * This is very crude, cleanup!
-C  * Try to reduce the number of needed live registers.
-C  * Rewrite for POWER6 to use 8 consecutive muls, not 2 groups of 4.  The
-C    cost will be more live registers.
-C  * Rewrite for POWER7 to use addmul_2 building blocks; this will reduce code
-C    size a lot and speed things up perhaps 25%.
-C  * Use computed goto in order to compress the code.
-C  * Implement a larger final corner.
-C  * Schedule callee-saves register saves into other insns.  This could save
-C    about 5 cycles/call.  (We cannot analogously optimise the restores, since
-C    the sqr_diag_addlsh1 loop has no wind-down code as currently written.)
-C  * Should the alternating std/adde sequences be split?  Some pipelines handle
-C    adde poorly, and might sequentialise all these instructions.
-C  * The sqr_diag_addlsh1 loop was written for POWER6 and its preferences for
-C    adjacent integer multiply insns.  Except for the multiply insns, the code
-C    was not carefully optimised for POWER6 or any other CPU.
-C  * Perform cross-jumping in sqr_diag_addlsh1's feed-in code, into the loop.
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n',  `r5')
-
-define(`rp_outer', `r25')
-define(`up_outer', `r21')
-define(`rp_saved', `r22')
-define(`up_saved', `r23')
-define(`n_saved',  `r24')
-
-ASM_START()
-PROLOGUE(mpn_sqr_basecase)
-	cmpdi	cr0, n, 2
-	bge	cr0, L(ge2)
-	ld	r5, 0(up)	C n = 1
-	nop
-	mulld	r8, r5, r5	C weight 0
-	mulhdu	r9, r5, r5	C weight 1
-	std	r8, 0(rp)
-	std	r9, 8(rp)
-	blr
-	ALIGN(16)
-L(ge2):	bgt	cr0, L(gt2)
-	ld	r0, 0(up)	C n = 2
-	nop
-	mulld	r8, r0, r0	C u0 * u0
-	mulhdu	r9, r0, r0	C u0 * u0
-	ld	r6, 8(up)
-	mulld	r10, r6, r6	C u1 * u1
-	mulhdu	r11, r6, r6	C u1 * u1
-	mulld	r4, r6, r0	C u1 * u0
-	mulhdu	r5, r6, r0	C u1 * u0
-	addc	r4, r4, r4
-	adde	r5, r5, r5
-	addze	r11, r11
-	addc	r9, r9, r4
-	adde	r10, r10, r5
-	addze	r11, r11
-	std	r8, 0(rp)
-	std	r9, 8(rp)
-	std	r10, 16(rp)
-	std	r11, 24(rp)
-	blr
-
-	ALIGN(16)
-L(gt2):	std	r31,  -8(r1)
-	std	r30, -16(r1)
-	std	r29, -24(r1)
-	std	r28, -32(r1)
-	std	r27, -40(r1)
-	std	r26, -48(r1)
-	std	r25, -56(r1)
-	std	r24, -64(r1)
-	std	r23, -72(r1)
-	std	r22, -80(r1)
-	std	r21, -88(r1)
-
-	mr	rp_saved, rp
-	mr	up_saved, up
-	mr	n_saved, n
-	mr	rp_outer, rp
-	mr	up_outer, up
-
-	rldicl.	r0, n, 0,62	C r0 = n & 3, set cr0
-	cmpdi	cr6, r0, 2
-	addic	r7, n, 2	C compute count...
-	srdi	r7, r7, 2	C ...for ctr
-	mtctr	r7		C copy count into ctr
-	beq-	cr0, L(b0)
-	blt-	cr6, L(b1)
-	beq-	cr6, L(b2)
-
-L(b3):	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	addi	up, up, 24
-	li	r12, 0		C carry limb
-	bdz	L(em3)
-
-	ALIGN(16)
-L(tm3):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 0(up)
-	ld	r27, 8(up)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	std	r0, 8(rp)
-	adde	r26, r26, r8
-	std	r7, 16(rp)
-	adde	r11, r11, r10
-	std	r26, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	bdnz	L(tm3)
-
-L(em3):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	std	r0, 8(rp)
-	std	r7, 16(rp)
-	addze	r8, r8
-	std	r8, 24(rp)
-	addi	n, n, 2
-	b	L(outer_loop)
-
-L(b0):	ld	r6, 0(up)
-	ld	r27, 8(up)
-	mulld	r7, r27, r6
-	mulhdu	r12, r27, r6
-	std	r7, 8(rp)
-	addi	rp, rp, 8
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	addi	up, up, 32
-	bdz	L(em0)
-
-	ALIGN(16)
-L(tm0):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 0(up)
-	ld	r27, 8(up)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	std	r0, 8(rp)
-	adde	r26, r26, r8
-	std	r7, 16(rp)
-	adde	r11, r11, r10
-	std	r26, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	bdnz	L(tm0)
-
-L(em0):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	std	r0, 8(rp)
-	std	r7, 16(rp)
-	addze	r8, r8
-	std	r8, 24(rp)
-	addi	n, n, 2
-	b	L(outer_loop_ent_2)
-
-L(b1):	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r12, r27, r6
-	addc	r7, r7, r26
-	std	r0, 8(rp)
-	std	r7, 16(rp)
-	addi	rp, rp, 16
-	ld	r9, 24(up)
-	ld	r27, 32(up)
-	addi	up, up, 40
-	bdz	L(em1)
-
-	ALIGN(16)
-L(tm1):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 0(up)
-	ld	r27, 8(up)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	std	r0, 8(rp)
-	adde	r26, r26, r8
-	std	r7, 16(rp)
-	adde	r11, r11, r10
-	std	r26, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	bdnz	L(tm1)
-
-L(em1):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	std	r0, 8(rp)
-	std	r7, 16(rp)
-	addze	r8, r8
-	std	r8, 24(rp)
-	addi	n, n, 2
-	b	L(outer_loop_ent_3)
-
-L(b2):	addi	r7, r7, -1	C FIXME
-	mtctr	r7		C FIXME
-	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 24(up)
-	mulld	r11, r9, r6
-	mulhdu	r10, r9, r6
-	addc	r7, r7, r26
-	adde	r11, r11, r8
-	addze	r12, r10
-	std	r0, 8(rp)
-	std	r7, 16(rp)
-	std	r11, 24(rp)
-	addi	rp, rp, 24
-	ld	r9, 32(up)
-	ld	r27, 40(up)
-	addi	up, up, 48
-	bdz	L(em2)
-
-	ALIGN(16)
-L(tm2):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 0(up)
-	ld	r27, 8(up)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	std	r0, 8(rp)
-	adde	r26, r26, r8
-	std	r7, 16(rp)
-	adde	r11, r11, r10
-	std	r26, 24(rp)
-	addi	up, up, 32
-	std	r11, 32(rp)
-	addi	rp, rp, 32
-	bdnz	L(tm2)
-
-L(em2):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	std	r0, 8(rp)
-	std	r7, 16(rp)
-	addze	r8, r8
-	std	r8, 24(rp)
-	addi	n, n, 2
-	b	L(outer_loop_ent_0)
-
-
-L(outer_loop):
-	addi	n, n, -1
-	addi	up_outer, up_outer, 8
-	addi	rp_outer, rp_outer, 16
-
-	mr	up, up_outer
-	addi	rp, rp_outer, 8
-
-	srdi	r0, n, 2
-	mtctr	r0
-
-	bdz	L(outer_end)
-
-	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r9, 24(up)
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	ld	r30, 16(rp)
-	mulld	r11, r9, r6
-	mulhdu	r10, r9, r6
-	addc	r7, r7, r26
-	adde	r11, r11, r8
-	addze	r12, r10
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	adde	r7, r7, r29
-	std	r7, 8(rp)
-	adde	r11, r11, r30
-	std	r11, 16(rp)
-	addi	rp, rp, 24
-	ld	r9, 32(up)
-	ld	r27, 40(up)
-	addi	up, up, 48
-	bdz	L(ea1)
-
-	ALIGN(16)
-L(ta1):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6	C 9
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6	C 27
-	ld	r9, 0(up)
-	ld	r28, 0(rp)
-	ld	r27, 8(up)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12	C 0 12
-	adde	r7, r7, r26	C 5 7
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6	C 9
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6	C 27
-	ld	r9, 16(up)
-	ld	r30, 16(rp)
-	ld	r27, 24(up)
-	ld	r31, 24(rp)
-	adde	r26, r26, r8	C 8 5
-	adde	r11, r11, r10	C 10 11
-	addze	r12, r12	C 12
-	addc	r0, r0, r28	C 0 28
-	std	r0, 0(rp)	C 0
-	adde	r7, r7, r29	C 7 29
-	std	r7, 8(rp)	C 7
-	adde	r26, r26, r30	C 5 30
-	std	r26, 16(rp)	C 5
-	adde	r11, r11, r31	C 11 31
-	std	r11, 24(rp)	C 11
-	addi	up, up, 32
-	addi	rp, rp, 32
-	bdnz	L(ta1)
-
-L(ea1):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	addze	r8, r8
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	adde	r7, r7, r29
-	std	r7, 8(rp)
-	addze	r8, r8
-	std	r8, 16(rp)
-
-L(outer_loop_ent_0):
-	addi	n, n, -1
-	addi	up_outer, up_outer, 8
-	addi	rp_outer, rp_outer, 16
-
-	mr	up, up_outer
-	addi	rp, rp_outer, 8
-
-	srdi	r0, n, 2
-	mtctr	r0
-
-	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	addc	r0, r0, r28
-	adde	r7, r7, r26
-	addze	r12, r8
-	std	r0, 0(rp)
-	adde	r7, r7, r29
-	std	r7, 8(rp)
-	addi	rp, rp, 16
-	ld	r9, 24(up)
-	ld	r27, 32(up)
-	addi	up, up, 40
-	bdz	L(ea0)
-
-	ALIGN(16)
-L(ta0):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6	C 9
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6	C 27
-	ld	r9, 0(up)
-	ld	r28, 0(rp)
-	ld	r27, 8(up)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12	C 0 12
-	adde	r7, r7, r26	C 5 7
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6	C 9
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6	C 27
-	ld	r9, 16(up)
-	ld	r30, 16(rp)
-	ld	r27, 24(up)
-	ld	r31, 24(rp)
-	adde	r26, r26, r8	C 8 5
-	adde	r11, r11, r10	C 10 11
-	addze	r12, r12	C 12
-	addc	r0, r0, r28	C 0 28
-	std	r0, 0(rp)	C 0
-	adde	r7, r7, r29	C 7 29
-	std	r7, 8(rp)	C 7
-	adde	r26, r26, r30	C 5 30
-	std	r26, 16(rp)	C 5
-	adde	r11, r11, r31	C 11 31
-	std	r11, 24(rp)	C 11
-	addi	up, up, 32
-	addi	rp, rp, 32
-	bdnz	L(ta0)
-
-L(ea0):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	addze	r8, r8
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	adde	r7, r7, r29
-	std	r7, 8(rp)
-	addze	r8, r8
-	std	r8, 16(rp)
-
-L(outer_loop_ent_3):
-	addi	n, n, -1
-	addi	up_outer, up_outer, 8
-	addi	rp_outer, rp_outer, 16
-
-	mr	up, up_outer
-	addi	rp, rp_outer, 8
-
-	srdi	r0, n, 2
-	mtctr	r0
-
-	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r28, 0(rp)
-	mulld	r0, r9, r6
-	mulhdu	r12, r9, r6
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	addi	rp, rp, 8
-	ld	r9, 16(up)
-	ld	r27, 24(up)
-	addi	up, up, 32
-	bdz	L(ea3)
-
-	ALIGN(16)
-L(ta3):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6	C 9
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6	C 27
-	ld	r9, 0(up)
-	ld	r28, 0(rp)
-	ld	r27, 8(up)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12	C 0 12
-	adde	r7, r7, r26	C 5 7
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6	C 9
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6	C 27
-	ld	r9, 16(up)
-	ld	r30, 16(rp)
-	ld	r27, 24(up)
-	ld	r31, 24(rp)
-	adde	r26, r26, r8	C 8 5
-	adde	r11, r11, r10	C 10 11
-	addze	r12, r12	C 12
-	addc	r0, r0, r28	C 0 28
-	std	r0, 0(rp)	C 0
-	adde	r7, r7, r29	C 7 29
-	std	r7, 8(rp)	C 7
-	adde	r26, r26, r30	C 5 30
-	std	r26, 16(rp)	C 5
-	adde	r11, r11, r31	C 11 31
-	std	r11, 24(rp)	C 11
-	addi	up, up, 32
-	addi	rp, rp, 32
-	bdnz	L(ta3)
-
-L(ea3):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	addze	r8, r8
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	adde	r7, r7, r29
-	std	r7, 8(rp)
-	addze	r8, r8
-	std	r8, 16(rp)
-
-
-L(outer_loop_ent_2):
-	addi	n, n, -1
-	addi	up_outer, up_outer, 8
-	addi	rp_outer, rp_outer, 16
-
-	mr	up, up_outer
-	addi	rp, rp_outer, 8
-
-	srdi	r0, n, 2
-	mtctr	r0
-
-	addic	r0, r0, 0
-	li	r12, 0		C cy_limb = 0
-	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r27, 16(up)
-	bdz	L(ea2)
-	addi	up, up, 24
-
-	ALIGN(16)
-L(ta2):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6	C 9
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6	C 27
-	ld	r9, 0(up)
-	ld	r28, 0(rp)
-	ld	r27, 8(up)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12	C 0 12
-	adde	r7, r7, r26	C 5 7
-	mulld	r26, r9, r6
-	mulhdu	r10, r9, r6	C 9
-	mulld	r11, r27, r6
-	mulhdu	r12, r27, r6	C 27
-	ld	r9, 16(up)
-	ld	r30, 16(rp)
-	ld	r27, 24(up)
-	ld	r31, 24(rp)
-	adde	r26, r26, r8	C 8 5
-	adde	r11, r11, r10	C 10 11
-	addze	r12, r12	C 12
-	addc	r0, r0, r28	C 0 28
-	std	r0, 0(rp)	C 0
-	adde	r7, r7, r29	C 7 29
-	std	r7, 8(rp)	C 7
-	adde	r26, r26, r30	C 5 30
-	std	r26, 16(rp)	C 5
-	adde	r11, r11, r31	C 11 31
-	std	r11, 24(rp)	C 11
-	addi	up, up, 32
-	addi	rp, rp, 32
-	bdnz	L(ta2)
-
-L(ea2):	mulld	r0, r9, r6
-	mulhdu	r26, r9, r6
-	mulld	r7, r27, r6
-	mulhdu	r8, r27, r6
-	ld	r28, 0(rp)
-	ld	r29, 8(rp)
-	adde	r0, r0, r12
-	adde	r7, r7, r26
-	addze	r8, r8
-	addc	r0, r0, r28
-	std	r0, 0(rp)
-	adde	r7, r7, r29
-	std	r7, 8(rp)
-	addze	r8, r8
-	std	r8, 16(rp)
-
-	b	L(outer_loop)
-
-L(outer_end):
-	ld	r6, 0(up)
-	ld	r9, 8(up)
-	ld	r11, 0(rp)
-	mulld	r0, r9, r6
-	mulhdu	r8, r9, r6
-	addc	r0, r0, r11
-	std	r0, 0(rp)
-	addze	r8, r8
-	std	r8, 8(rp)
-
-define(`rp',  `rp_saved')
-define(`up',  `r5')
-define(`n',   `r6')
-define(`climb',	`r0')
-
-	addi	r4, rp_saved, 8
-	mr	r5, up_saved
-	mr	r6, n_saved
-
-	rldicl.	r0, n, 0,62		C r0 = n & 3, set cr0
-	cmpdi	cr6, r0, 2
-	addi	n, n, 2			C compute count...
-	srdi	n, n, 2			C ...for ctr
-	mtctr	n			C put loop count into ctr
-	beq	cr0, L(xb0)
-	blt	cr6, L(xb1)
-	beq	cr6, L(xb2)
-
-L(xb3):	ld	r6,   0(up)
-	ld	r7,   8(up)
-	ld	r12, 16(up)
-	addi	up, up, 24
-	mulld	r24, r6, r6
-	mulhdu	r25, r6, r6
-	mulld	r26, r7, r7
-	mulhdu	r27, r7, r7
-	mulld	r28, r12, r12
-	mulhdu	r29, r12, r12
-	ld	r10,  8(rp)
-	ld	r11, 16(rp)
-	ld	r6,  24(rp)
-	ld	r7,  32(rp)
-	addc	r10, r10, r10
-	adde	r11, r11, r11
-	adde	r6, r6, r6
-	adde	r7, r7, r7
-	addze	climb, r29
-	addc	r10, r10, r25
-	adde	r11, r11, r26
-	adde	r6, r6, r27
-	adde	r7, r7, r28
-	std	r24,  0(rp)
-	std	r10,  8(rp)
-	std	r11, 16(rp)
-	std	r6,  24(rp)
-	std	r7,  32(rp)
-	addi	rp, rp, 40
-	bdnz	L(top)
-	b	L(end)
-
-L(xb2):	ld	r6,  0(up)
-	ld	r7,  8(up)
-	addi	up, up, 16
-	mulld	r24, r6, r6
-	mulhdu	r25, r6, r6
-	mulld	r26, r7, r7
-	mulhdu	r27, r7, r7
-	ld	r10,  8(rp)
-	ld	r11, 16(rp)
-	addc	r10, r10, r10
-	adde	r11, r11, r11
-	addze	climb, r27
-	addc	r10, r10, r25
-	adde	r11, r11, r26
-	std	r24,  0(rp)
-	std	r10,  8(rp)
-	std	r11, 16(rp)
-	addi	rp, rp, 24
-	bdnz	L(top)
-	b	L(end)
-
-L(xb0):	ld	r6,   0(up)
-	ld	r7,   8(up)
-	ld	r12, 16(up)
-	ld	r23, 24(up)
-	addi	up, up, 32
-	mulld	r24, r6, r6
-	mulhdu	r25, r6, r6
-	mulld	r26, r7, r7
-	mulhdu	r27, r7, r7
-	mulld	r28, r12, r12
-	mulhdu	r29, r12, r12
-	mulld	r30, r23, r23
-	mulhdu	r31, r23, r23
-	ld	r10,  8(rp)
-	ld	r11, 16(rp)
-	ld	r6,  24(rp)
-	ld	r7,  32(rp)
-	ld	r12, 40(rp)
-	ld	r23, 48(rp)
-	addc	r10, r10, r10
-	adde	r11, r11, r11
-	adde	r6, r6, r6
-	adde	r7, r7, r7
-	adde	r12, r12, r12
-	adde	r23, r23, r23
-	addze	climb, r31
-	std	r24,  0(rp)
-	addc	r10, r10, r25
-	std	r10,  8(rp)
-	adde	r11, r11, r26
-	std	r11, 16(rp)
-	adde	r6, r6, r27
-	std	r6,  24(rp)
-	adde	r7, r7, r28
-	std	r7,  32(rp)
-	adde	r12, r12, r29
-	std	r12, 40(rp)
-	adde	r23, r23, r30
-	std	r23, 48(rp)
-	addi	rp, rp, 56
-	bdnz	L(top)
-	b	L(end)
-
-L(xb1):	ld	r6,  0(up)
-	addi	up, up, 8
-	mulld	r24, r6, r6
-	mulhdu	climb, r6, r6
-	std	r24, 0(rp)
-	addic	rp, rp, 8		C clear carry as side-effect
-
-	ALIGN(32)
-L(top):	ld	r6,   0(up)
-	ld	r7,   8(up)
-	ld	r12, 16(up)
-	ld	r23, 24(up)
-	addi	up, up, 32
-	mulld	r24, r6, r6
-	mulhdu	r25, r6, r6
-	mulld	r26, r7, r7
-	mulhdu	r27, r7, r7
-	mulld	r28, r12, r12
-	mulhdu	r29, r12, r12
-	mulld	r30, r23, r23
-	mulhdu	r31, r23, r23
-	ld	r8,   0(rp)
-	ld	r9,   8(rp)
-	adde	r8, r8, r8
-	adde	r9, r9, r9
-	ld	r10, 16(rp)
-	ld	r11, 24(rp)
-	adde	r10, r10, r10
-	adde	r11, r11, r11
-	ld	r6,  32(rp)
-	ld	r7,  40(rp)
-	adde	r6, r6, r6
-	adde	r7, r7, r7
-	ld	r12, 48(rp)
-	ld	r23, 56(rp)
-	adde	r12, r12, r12
-	adde	r23, r23, r23
-	addze	r31, r31
-	addc	r8, r8, climb
-	std	r8,   0(rp)
-	adde	r9, r9, r24
-	std	r9,   8(rp)
-	adde	r10, r10, r25
-	std	r10, 16(rp)
-	adde	r11, r11, r26
-	std	r11, 24(rp)
-	adde	r6, r6, r27
-	std	r6,  32(rp)
-	adde	r7, r7, r28
-	std	r7,  40(rp)
-	adde	r12, r12, r29
-	std	r12, 48(rp)
-	adde	r23, r23, r30
-	std	r23, 56(rp)
-	mr	climb, r31
-	addi	rp, rp, 64
-	bdnz	L(top)
-
-L(end):	addze	climb, climb
-	std	climb,  0(rp)
-
-	ld	r31,  -8(r1)
-	ld	r30, -16(r1)
-	ld	r29, -24(r1)
-	ld	r28, -32(r1)
-	ld	r27, -40(r1)
-	ld	r26, -48(r1)
-	ld	r25, -56(r1)
-	ld	r24, -64(r1)
-	ld	r23, -72(r1)
-	ld	r22, -80(r1)
-	ld	r21, -88(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/sublsh1_n.asm b/gmp/mpn/powerpc64/mode64/sublsh1_n.asm
new file mode 100644
index 0000000000..69e0dfa5a2
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/sublsh1_n.asm
@@ -0,0 +1,83 @@
+dnl  PowerPC-64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:     2		(1.5 c/l should be possible)
+C POWER4/PPC970:     4		(2.0 c/l should be possible)
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C vp	r5
+C n	r6
+
+define(`rp',`r3')
+define(`up',`r4')
+define(`vp',`r5')
+
+define(`s0',`r6')
+define(`s1',`r7')
+define(`u0',`r8')
+define(`v0',`r10')
+define(`v1',`r11')
+
+ASM_START()
+PROLOGUE(mpn_sublsh1_n)
+	mtctr	r6		C put n in ctr
+
+	ld	v0, 0(vp)	C load v limb
+	ld	u0, 0(up)	C load u limb
+	addic	up, up, -8	C update up; set cy
+	addi	rp, rp, -8	C update rp
+	sldi	s1, v0, 1
+	bdz	L(end)		C If done, skip loop
+
+L(oop):	ld	v1, 8(vp)	C load v limb
+	subfe	s1, s1, u0	C add limbs with cy, set cy
+	std	s1, 8(rp)	C store result limb
+	srdi	s0, v0, 63	C shift down previous v limb
+	ldu	u0, 16(up)	C load u limb and update up
+	rldimi	s0, v1, 1, 0	C left shift v limb and merge with prev v limb
+
+	bdz	L(exit)		C decrement ctr and exit if done
+
+	ldu	v0, 16(vp)	C load v limb and update vp
+	subfe	s0, s0, u0	C add limbs with cy, set cy
+	stdu	s0, 16(rp)	C store result limb and update rp
+	srdi	s1, v1, 63	C shift down previous v limb
+	ld	u0, 8(up)	C load u limb
+	rldimi	s1, v0, 1, 0	C left shift v limb and merge with prev v limb
+
+	bdnz	L(oop)		C decrement ctr and loop back
+
+L(end):	subfe	r7, s1, u0
+	std	r7, 8(rp)	C store last result limb
+	srdi	r3, v0, 63
+	subfze	r3, r3
+	neg	r3, r3
+	blr
+L(exit):	subfe	r7, s0, u0
+	std	r7, 16(rp)	C store last result limb
+	srdi	r3, v1, 63
+	subfze	r3, r3
+	neg	r3, r3
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/mode64/submul_1.asm b/gmp/mpn/powerpc64/mode64/submul_1.asm
new file mode 100644
index 0000000000..3c1e8a5c82
--- /dev/null
+++ b/gmp/mpn/powerpc64/mode64/submul_1.asm
@@ -0,0 +1,62 @@
+dnl  PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+dnl  the result from a second limb vector.
+
+dnl  Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:    6-18
+C POWER4/PPC970:    10
+C POWER5:           10.5
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vl', `r6')
+define(`cy', `r7')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+	li	cy, 0			C cy_limb = 0
+
+PROLOGUE(mpn_submul_1c)
+	mtctr	n
+	addic	r0, r0, 0
+	addi	rp, rp, -8
+	ALIGN(16)
+L(top):
+	ld	r0, 0(up)
+	ld	r10, 8(rp)
+	mulld	r9, r0, vl
+	mulhdu	r5, r0, vl
+	adde	r9, r9, cy
+	addi	up, up, 8
+	addze	cy, r5
+	subf	r12, r9, r10
+	not	r0, r10
+	addc	r11, r9, r0		C inverted carry from subf
+	stdu	r12, 8(rp)
+	bdnz	L(top)
+
+	addze	r3, cy
+	blr
+EPILOGUE(mpn_submul_1)
+EPILOGUE(mpn_submul_1c)
diff --git a/gmp/mpn/powerpc64/p6/lshift.asm b/gmp/mpn/powerpc64/p6/lshift.asm
deleted file mode 100644
index 1a200fb346..0000000000
--- a/gmp/mpn/powerpc64/p6/lshift.asm
+++ /dev/null
@@ -1,132 +0,0 @@
-dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
-
-dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 2.25
-C POWER6		 4
-
-C TODO
-C  * Micro-optimise header code
-C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4236
-C    bytes, 4-way code would become about 50% larger.
-
-C INPUT PARAMETERS
-define(`rp_param',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`cnt', `r6')
-
-define(`tnc',`r0')
-define(`retval',`r3')
-define(`rp',  `r7')
-
-ASM_START()
-PROLOGUE(mpn_lshift,toc)
-
-ifdef(`HAVE_ABI_mode32',`
-	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
-')
-	mflr	r12
-	sldi	r8, n, 3
-	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
-	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
-	add	up, up, r8		C make up point at end of up[]
-	add	r11, r11, r10		C address of L(oN) for N = cnt
-	srdi	r10, n, 1
-	add	rp, rp_param, r8	C make rp point at end of rp[]
-	subfic	tnc, cnt, 64
-	rlwinm.	r8, n, 0,31,31		C extract bit 0
-	mtctr	r10
-	beq	L(evn)
-
-L(odd):	ld	r9, -8(up)
-	cmpdi	cr0, n, 1		C n = 1?
-	beq	L(1)
-	ld	r8, -16(up)
-	addi	r11, r11, -84		C L(o1) - L(e1) - 64
-	mtlr	r11
-	srd	r3, r9, tnc		C retval
-	addi	up, up, 8
-	addi	rp, rp, -8
-	blr				C branch to L(oN)
-
-L(evn):	ld	r8, -8(up)
-	ld	r9, -16(up)
-	addi	r11, r11, -64
-	mtlr	r11
-	srd	r3, r8, tnc		C retval
-	blr				C branch to L(eN)
-
-L(1):	srd	r3, r9, tnc		C retval
-	sld	r8, r9, cnt
-	std	r8, -8(rp)
-	mtlr	r12
-ifdef(`HAVE_ABI_mode32',
-`	mr	r4, r3
-	srdi	r3, r3, 32
-')
-	blr
-
-
-define(SHIFT,`
-L(lo$1):ld	r8, -24(up)
-	std	r11, -8(rp)
-	addi	rp, rp, -16
-L(o$1):	srdi	r10, r8, eval(64-$1)
-	rldimi	r10, r9, $1, 0
-	ld	r9, -32(up)
-	addi	up, up, -16
-	std	r10, 0(rp)
-L(e$1):	srdi	r11, r9, eval(64-$1)
-	rldimi	r11, r8, $1, 0
-	bdnz	L(lo$1)
-	std	r11, -8(rp)
-	sldi	r10, r9, $1
-	b	L(com)
-	nop
-	nop
-')
-
-	ALIGN(64)
-forloop(`i',1,63,`SHIFT(i)')
-
-L(com):	std	r10, -16(rp)
-	mtlr	r12
-ifdef(`HAVE_ABI_mode32',
-`	mr	r4, r3
-	srdi	r3, r3, 32
-')
-	blr
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/powerpc64/p6/lshiftc.asm b/gmp/mpn/powerpc64/p6/lshiftc.asm
deleted file mode 100644
index e4b3caaab8..0000000000
--- a/gmp/mpn/powerpc64/p6/lshiftc.asm
+++ /dev/null
@@ -1,136 +0,0 @@
-dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
-
-dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 2.25
-C POWER6		 4
-
-C TODO
-C  * Micro-optimise header code
-C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4236
-C    bytes, 4-way code would become about 50% larger.
-
-C INPUT PARAMETERS
-define(`rp_param',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`cnt', `r6')
-
-define(`tnc',`r0')
-define(`retval',`r3')
-define(`rp',  `r7')
-
-ASM_START()
-PROLOGUE(mpn_lshiftc,toc)
-
-ifdef(`HAVE_ABI_mode32',`
-	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
-')
-	mflr	r12
-	sldi	r8, n, 3
-	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
-	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
-	add	up, up, r8		C make up point at end of up[]
-	add	r11, r11, r10		C address of L(oN) for N = cnt
-	srdi	r10, n, 1
-	add	rp, rp_param, r8	C make rp point at end of rp[]
-	subfic	tnc, cnt, 64
-	rlwinm.	r8, n, 0,31,31		C extract bit 0
-	mtctr	r10
-	beq	L(evn)
-
-L(odd):	ld	r9, -8(up)
-	cmpdi	cr0, n, 1		C n = 1?
-	beq	L(1)
-	ld	r8, -16(up)
-	addi	r11, r11, -88		C L(o1) - L(e1) - 64
-	mtlr	r11
-	srd	r3, r9, tnc		C retval
-	addi	up, up, 8
-	addi	rp, rp, -8
-	blr				C branch to L(oN)
-
-L(evn):	ld	r8, -8(up)
-	ld	r9, -16(up)
-	addi	r11, r11, -64
-	mtlr	r11
-	srd	r3, r8, tnc		C retval
-	blr				C branch to L(eN)
-
-L(1):	srd	r3, r9, tnc		C retval
-	sld	r8, r9, cnt
-	nor	r8, r8, r8
-	std	r8, -8(rp)
-	mtlr	r12
-ifdef(`HAVE_ABI_mode32',
-`	mr	r4, r3
-	srdi	r3, r3, 32
-')
-	blr
-
-
-define(SHIFT,`
-L(lo$1):ld	r8, -24(up)
-	nor	r11, r11, r11
-	std	r11, -8(rp)
-	addi	rp, rp, -16
-L(o$1):	srdi	r10, r8, eval(64-$1)
-	rldimi	r10, r9, $1, 0
-	ld	r9, -32(up)
-	addi	up, up, -16
-	nor	r10, r10, r10
-	std	r10, 0(rp)
-L(e$1):	srdi	r11, r9, eval(64-$1)
-	rldimi	r11, r8, $1, 0
-	bdnz	L(lo$1)
-	sldi	r10, r9, $1
-	b	L(com)
-	nop
-')
-
-	ALIGN(64)
-forloop(`i',1,63,`SHIFT(i)')
-
-L(com):	nor	r11, r11, r11
-	nor	r10, r10, r10
-	std	r11, -8(rp)
-	std	r10, -16(rp)
-	mtlr	r12
-ifdef(`HAVE_ABI_mode32',
-`	mr	r4, r3
-	srdi	r3, r3, 32
-')
-	blr
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/powerpc64/p6/rshift.asm b/gmp/mpn/powerpc64/p6/rshift.asm
deleted file mode 100644
index 9e848c1fc7..0000000000
--- a/gmp/mpn/powerpc64/p6/rshift.asm
+++ /dev/null
@@ -1,131 +0,0 @@
-dnl  PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
-
-dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C POWER3/PPC630		 ?
-C POWER4/PPC970		 ?
-C POWER5		 2
-C POWER6		 3.5  (mysteriously 3.0 for cnt=1)
-
-C TODO
-C  * Micro-optimise header code
-C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
-C    bytes, 4-way code would become about 50% larger.
-
-C INPUT PARAMETERS
-define(`rp_param',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`cnt', `r6')
-
-define(`tnc',`r0')
-define(`retval',`r3')
-define(`rp',  `r7')
-
-ASM_START()
-PROLOGUE(mpn_rshift,toc)
-
-ifdef(`HAVE_ABI_mode32',`
-	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
-')
-	mflr	r12
-	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
-	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
-	add	r11, r11, r10		C address of L(oN) for N = cnt
-	srdi	r10, n, 1
-	mr	rp, rp_param
-	subfic	tnc, cnt, 64
-	rlwinm.	r8, n, 0,31,31		C extract bit 0
-	mtctr	r10
-	beq	L(evn)
-
-L(odd):	ld	r9, 0(up)
-	cmpdi	cr0, n, 1		C n = 1?
-	beq	L(1)
-	ld	r8, 8(up)
-	addi	r11, r11, -84		C L(o1) - L(e1) - 64
-	mtlr	r11
-	sld	r3, r9, tnc		C retval
-	addi	up, up, 8
-	addi	rp, rp, 8
-	blr				C branch to L(oN)
-
-L(evn):	ld	r8, 0(up)
-	ld	r9, 8(up)
-	addi	r11, r11, -64
-	mtlr	r11
-	sld	r3, r8, tnc		C retval
-	addi	up, up, 16
-	blr				C branch to L(eN)
-
-L(1):	sld	r3, r9, tnc		C retval
-	srd	r8, r9, cnt
-	std	r8, 0(rp)
-	mtlr	r12
-ifdef(`HAVE_ABI_mode32',
-`	mr	r4, r3
-	srdi	r3, r3, 32
-')
-	blr
-
-
-define(SHIFT,`
-L(lo$1):ld	r8, 0(up)
-	std	r11, 0(rp)
-	addi	rp, rp, 16
-L(o$1):	srdi	r10, r9, $1
-	rldimi	r10, r8, eval(64-$1), 0
-	ld	r9, 8(up)
-	addi	up, up, 16
-	std	r10, -8(rp)
-L(e$1):	srdi	r11, r8, $1
-	rldimi	r11, r9, eval(64-$1), 0
-	bdnz	L(lo$1)
-	std	r11, 0(rp)
-	srdi	r10, r9, $1
-	b	L(com)
-	nop
-	nop
-')
-
-	ALIGN(64)
-forloop(`i',1,63,`SHIFT(i)')
-
-L(com):	std	r10, 8(rp)
-	mtlr	r12
-ifdef(`HAVE_ABI_mode32',
-`	mr	r4, r3
-	srdi	r3, r3, 32
-')
-	blr
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/powerpc64/p7/copyd.asm b/gmp/mpn/powerpc64/p7/copyd.asm
deleted file mode 100644
index f04ca586e8..0000000000
--- a/gmp/mpn/powerpc64/p7/copyd.asm
+++ /dev/null
@@ -1,128 +0,0 @@
-dnl  PowerPC-64 mpn_copyd.
-
-dnl  Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          ?
-C POWER5                 ?
-C POWER6                 1.25
-C POWER7                 1.09
-
-C INPUT PARAMETERS
-define(`rp',	`r3')
-define(`up',	`r4')
-define(`n',	`r5')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
-
-ifdef(`HAVE_ABI_mode32',
-`	rldicl	n, n, 0,32')
-
-	sldi	r0, n, 3
-	add	up, up, r0		C point at u[] end
-	add	rp, rp, r0		C point at r[] end
-
-	cmpdi	cr0, n, 4
-	blt	L(sml)
-
-	addi	r10, n, 4
-	srdi	r10, r10, 3
-	mtctr	r10
-
-	andi.	r0, n, 1
-	rlwinm	r11, n, 0,30,30
-	rlwinm	r12, n, 0,29,29
-	cmpdi	cr6, r11, 0
-	cmpdi	cr7, r12, 0
-
-	beq	cr0, L(xx0)
-L(xx1):	ld	r6, -8(up)
-	addi	up, up, -8
-	std	r6, -8(rp)
-	addi	rp, rp, -8
-
-L(xx0):	bne	cr6, L(x10)
-L(x00):	ld	r6, -8(up)
-	ld	r7, -16(up)
-	bne	cr7, L(100)
-L(000):	addi	rp, rp, 32
-	b	L(lo0)
-L(100):	addi	up, up, 32
-	b	L(lo4)
-L(x10):	ld	r8, -8(up)
-	ld	r9, -16(up)
-	bne	cr7, L(110)
-L(010):	addi	up, up, -16
-	addi	rp, rp, 16
-	b	L(lo2)
-L(110):	addi	up, up, 16
-	addi	rp, rp, 48
-	b	L(lo6)
-
-L(sml):	cmpdi	cr0, n, 0
-	beqlr-	cr0
-	mtctr	n
-L(t):	ld	r6, -8(up)
-	addi	up, up, -8
-	std	r6, -8(rp)
-	addi	rp, rp, -8
-	bdnz	L(t)
-	blr
-
-	ALIGN(32)
-L(top):	std	r6, -8(rp)
-	std	r7, -16(rp)
-L(lo2):	ld	r6, -8(up)
-	ld	r7, -16(up)
-	std	r8, -24(rp)
-	std	r9, -32(rp)
-L(lo0):	ld	r8, -24(up)
-	ld	r9, -32(up)
-	std	r6, -40(rp)
-	std	r7, -48(rp)
-L(lo6):	ld	r6, -40(up)
-	ld	r7, -48(up)
-	std	r8, -56(rp)
-	std	r9, -64(rp)
-	addi	rp, rp, -64
-L(lo4):	ld	r8, -56(up)
-	ld	r9, -64(up)
-	addi	up, up, -64
-	bdnz	L(top)
-
-L(end):	std	r6, -8(rp)
-	std	r7, -16(rp)
-	std	r8, -24(rp)
-	std	r9, -32(rp)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/p7/copyi.asm b/gmp/mpn/powerpc64/p7/copyi.asm
deleted file mode 100644
index 854cf9f809..0000000000
--- a/gmp/mpn/powerpc64/p7/copyi.asm
+++ /dev/null
@@ -1,129 +0,0 @@
-dnl  PowerPC-64 mpn_copyi.
-
-dnl  Copyright 2004, 2005, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          ?
-C POWER5                 ?
-C POWER6                 1.25
-C POWER7                 1.09
-
-C INPUT PARAMETERS
-define(`rp',	`r3')
-define(`up',	`r4')
-define(`n',	`r5')
-
-C TODO
-C  * Try rolling the two loop leading std to the end, allowing the code to
-C    handle also n = 2.
-C  * Consider using 4 pointers, schedule ptr update early wrt use.
-
-ASM_START()
-PROLOGUE(mpn_copyi)
-
-ifdef(`HAVE_ABI_mode32',
-`	rldicl	n, n, 0,32')
-
-	cmpdi	cr0, n, 4
-	blt	L(sml)
-
-	addi	r10, n, 4
-	srdi	r10, r10, 3
-	mtctr	r10
-
-	andi.	r0, n, 1
-	rlwinm	r11, n, 0,30,30
-	rlwinm	r12, n, 0,29,29
-	cmpdi	cr6, r11, 0
-	cmpdi	cr7, r12, 0
-
-	beq	cr0, L(xx0)
-L(xx1):	ld	r6, 0(up)
-	addi	up, up, 8
-	std	r6, 0(rp)
-	addi	rp, rp, 8
-
-L(xx0):	bne	cr6, L(x10)
-L(x00):	ld	r6, 0(up)
-	ld	r7, 8(up)
-	bne	cr7, L(100)
-L(000):	addi	rp, rp, -32
-	b	L(lo0)
-L(100):	addi	up, up, -32
-	b	L(lo4)
-L(x10):	ld	r8, 0(up)
-	ld	r9, 8(up)
-	bne	cr7, L(110)
-L(010):	addi	up, up, 16
-	addi	rp, rp, -16
-	b	L(lo2)
-L(110):	addi	up, up, -16
-	addi	rp, rp, -48
-	b	L(lo6)
-
-L(sml):	cmpdi	cr0, n, 0
-	beqlr-	cr0
-	mtctr	n
-L(t):	ld	r6, 0(up)
-	addi	up, up, 8
-	std	r6, 0(rp)
-	addi	rp, rp, 8
-	bdnz	L(t)
-	blr
-
-	ALIGN(32)
-L(top):	std	r6, 0(rp)
-	std	r7, 8(rp)
-L(lo2):	ld	r6, 0(up)
-	ld	r7, 8(up)
-	std	r8, 16(rp)
-	std	r9, 24(rp)
-L(lo0):	ld	r8, 16(up)
-	ld	r9, 24(up)
-	std	r6, 32(rp)
-	std	r7, 40(rp)
-L(lo6):	ld	r6, 32(up)
-	ld	r7, 40(up)
-	std	r8, 48(rp)
-	std	r9, 56(rp)
-	addi	rp, rp, 64
-L(lo4):	ld	r8, 48(up)
-	ld	r9, 56(up)
-	addi	up, up, 64
-	bdnz	L(top)
-
-L(end):	std	r6, 0(rp)
-	std	r7, 8(rp)
-	std	r8, 16(rp)
-	std	r9, 24(rp)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/p7/hamdist.asm b/gmp/mpn/powerpc64/p7/hamdist.asm
deleted file mode 100644
index 5af98946f7..0000000000
--- a/gmp/mpn/powerpc64/p7/hamdist.asm
+++ /dev/null
@@ -1,110 +0,0 @@
-dnl  PowerPC-64 mpn_hamdist.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          -
-C POWER4/PPC970          -
-C POWER5                 -
-C POWER6                 -
-C POWER7                 2.87
-
-define(`up', r3)
-define(`vp', r4)
-define(`n',  r5)
-
-ASM_START()
-PROLOGUE(mpn_hamdist)
-	std	r30, -16(r1)
-	std	r31, -8(r1)
-
-	addi	r0, n, 1
-ifdef(`HAVE_ABI_mode32',
-`	rldicl	r0, r0, 63,33',	C ...branch count
-`	srdi	r0, r0, 1')	C ...for ctr
-	mtctr	r0
-
-	andi.	r0, n, 1
-
-	li	r0, 0
-	li	r12, 0
-
-	beq	L(evn)
-
-L(odd):	ld	r6, 0(up)
-	addi	up, up, 8
-	ld	r8, 0(vp)
-	addi	vp, vp, 8
-	xor	r10, r6, r8
-	popcntd	r0, r10
-	bdz	L(e1)
-
-L(evn):	ld	r6, 0(up)
-	ld	r8, 0(vp)
-	ld	r7, 8(up)
-	ld	r9, 8(vp)
-	xor	r10, r6, r8
-	addi	up, up, 16
-	addi	vp, vp, 16
-	li	r30, 0
-	li	r31, 0
-	bdz	L(end)
-
-	nop
-	nop
-C	ALIGN(16)
-L(top):	add	r0, r0, r30
-	ld	r6, 0(up)
-	ld	r8, 0(vp)
-	xor	r11, r7, r9
-	popcntd	r30, r10
-	add	r12, r12, r31
-	ld	r7, 8(up)
-	ld	r9, 8(vp)
-	xor	r10, r6, r8
-	popcntd	r31, r11
-	addi	up, up, 16
-	addi	vp, vp, 16
-	bdnz	L(top)
-
-L(end):	add	r0, r0, r30
-	xor	r11, r7, r9
-	popcntd	r30, r10
-	add	r12, r12, r31
-	popcntd	r31, r11
-
-	add	r0, r0, r30
-	add	r12, r12, r31
-L(e1):	add	r3, r0, r12
-	ld	r30, -16(r1)
-	ld	r31, -8(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/p7/popcount.asm b/gmp/mpn/powerpc64/p7/popcount.asm
deleted file mode 100644
index eac72a6493..0000000000
--- a/gmp/mpn/powerpc64/p7/popcount.asm
+++ /dev/null
@@ -1,90 +0,0 @@
-dnl  PowerPC-64 mpn_popcount.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630          -
-C POWER4/PPC970          -
-C POWER5                 -
-C POWER6                 -
-C POWER7                 2
-
-define(`up', r3)
-define(`n',  r4)
-
-ASM_START()
-PROLOGUE(mpn_popcount)
-	addi	r0, n, 1
-ifdef(`HAVE_ABI_mode32',
-`	rldicl	r0, r0, 63,33',	C ...branch count
-`	srdi	r0, r0, 1')	C ...for ctr
-	mtctr	r0
-
-	andi.	r0, n, 1
-
-	li	r0, 0
-	li	r12, 0
-	beq	L(evn)
-
-L(odd):	ld	r4, 0(up)
-	addi	up, up, 8
-	popcntd	r0, r4
-	bdz	L(e1)
-
-L(evn):	ld	r4, 0(up)
-	ld	r5, 8(up)
-	popcntd	r8, r4
-	popcntd	r9, r5
-	bdz	L(e2)
-
-	ld	r4, 16(up)
-	ld	r5, 24(up)
-	bdz	L(e4)
-	addi	up, up, 32
-
-L(top):	add	r0, r0, r8
-	popcntd	r8, r4
-	ld	r4, 0(up)
-	add	r12, r12, r9
-	popcntd	r9, r5
-	ld	r5, 8(up)
-	addi	up, up, 16
-	bdnz	L(top)
-
-L(e4):	add	r0, r0, r8
-	popcntd	r8, r4
-	add	r12, r12, r9
-	popcntd	r9, r5
-L(e2):	add	r0, r0, r8
-	add	r12, r12, r9
-L(e1):	add	r3, r0, r12
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/rshift.asm b/gmp/mpn/powerpc64/rshift.asm
index 7654a16ae8..e73640d08c 100644
--- a/gmp/mpn/powerpc64/rshift.asm
+++ b/gmp/mpn/powerpc64/rshift.asm
@@ -1,207 +1,107 @@
 dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
 
-dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 
-include(`../config.m4')
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-C                   cycles/limb
-C POWER3/PPC630          ?
-C POWER4/PPC970          ?
-C POWER5                 2.25
-C POWER6                 9.75
-C POWER7                 2.15
+include(`../config.m4')
 
-C TODO
-C  * Try to reduce the number of needed live registers
-C  * Micro-optimise header code
-C  * Keep in synch with lshift.asm and lshiftc.asm
+C		cycles/limb
+C POWER3/PPC630:     1.5
+C POWER4/PPC970:     3.0
 
 C INPUT PARAMETERS
-define(`rp',  `r3')
-define(`up',  `r4')
-define(`n',   `r5')
-define(`cnt', `r6')
+define(`rp',`r3')
+define(`up',`r4')
+define(`n',`r5')
+define(`cnt',`r6')
+
+define(`tnc',`r5')
+define(`v0',`r0')
+define(`v1',`r7')
+define(`u0',`r8')
+define(`u1',`r9')
+define(`h0',`r10')
+define(`h1',`r11')
 
-define(`tnc',`r0')
-define(`u0',`r30')
-define(`u1',`r31')
-define(`retval',`r5')
 
 ASM_START()
 PROLOGUE(mpn_rshift)
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	subfic	tnc, cnt, 64
-C	sldi	r30, n, 3	C byte count corresponding to n
-C	add	rp, rp, r30	C rp = rp + n
-C	add	up, up, r30	C up = up + n
-	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
-	cmpdi	cr6, r30, 2
-	addi	r31, n, 3	C compute count...
-	ld	r10, 0(up)	C load 1st limb for b00...b11
-	sld	retval, r10, tnc
 ifdef(`HAVE_ABI_mode32',
-`	rldicl	r31, r31, 62,34',	C ...branch count
-`	srdi	r31, r31, 2')	C ...for ctr
-	mtctr	r31		C copy count into ctr
-	beq	cr0, L(b00)
-	blt	cr6, L(b01)
-	ld	r11, 8(up)	C load 2nd limb for b10 and b11
-	beq	cr6, L(b10)
-
-	ALIGN(16)
-L(b11):	srd	r8, r10, cnt
-	sld	r9, r11, tnc
-	ld	u1, 16(up)
-	addi	up, up, 24
-	srd	r12, r11, cnt
-	sld	r7, u1, tnc
+`	rldicl	n, n, 0, 32')	C zero extend n
+	mtctr	n		C copy n to count register
 	addi	rp, rp, -16
-	bdnz	L(gt3)
+	subfic	tnc, cnt, 64	C reverse shift count
 
-	or	r11, r8, r9
-	srd	r8, u1, cnt
-	b	L(cj3)
+	ld	u0, 0(up)
+	srd	h0, u0, cnt
+	sld	r12, u0, tnc	C return value
+	bdz	L(1)		C jump for n = 1
 
-	ALIGN(16)
-L(gt3):	ld	u0, 0(up)
-	or	r11, r8, r9
-	srd	r8, u1, cnt
-	sld	r9, u0, tnc
 	ld	u1, 8(up)
-	or	r10, r12, r7
-	b	L(L11)
-
-	ALIGN(32)
-L(b10):	srd	r12, r10, cnt
-	addi	rp, rp, -24
-	sld	r7, r11, tnc
-	bdnz	L(gt2)
-
-	srd	r8, r11, cnt
-	or	r10, r12, r7
-	b	L(cj2)
-
-L(gt2):	ld	u0, 16(up)
-	srd	r8, r11, cnt
-	sld	r9, u0, tnc
-	ld	u1, 24(up)
-	or	r10, r12, r7
-	srd	r12, u0, cnt
-	sld	r7, u1, tnc
-	ld	u0, 32(up)
-	or	r11, r8, r9
-	addi	up, up, 16
-	b	L(L10)
-
-	ALIGN(16)
-L(b00):	ld	u1, 8(up)
-	srd	r12, r10, cnt
-	sld	r7, u1, tnc
-	ld	u0, 16(up)
-	srd	r8, u1, cnt
-	sld	r9, u0, tnc
-	ld	u1, 24(up)
-	or	r10, r12, r7
-	srd	r12, u0, cnt
-	sld	r7, u1, tnc
-	addi	rp, rp, -8
-	bdz	L(cj4)
-
-L(gt4):	addi	up, up, 32
-	ld	u0, 0(up)
-	or	r11, r8, r9
-	b	L(L00)
-
-	ALIGN(16)
-L(b01):	bdnz	L(gt1)
-	srd	r8, r10, cnt
-	std	r8, 0(rp)
-	b	L(ret)
-
-L(gt1):	ld	u0, 8(up)
-	srd	r8, r10, cnt
-	sld	r9, u0, tnc
-	ld	u1, 16(up)
-	srd	r12, u0, cnt
-	sld	r7, u1, tnc
-	ld	u0, 24(up)
-	or	r11, r8, r9
-	srd	r8, u1, cnt
-	sld	r9, u0, tnc
-	ld	u1, 32(up)
-	addi	up, up, 40
-	or	r10, r12, r7
-	bdz	L(end)
-
-	ALIGN(32)
-L(top):	srd	r12, u0, cnt
-	sld	r7, u1, tnc
-	ld	u0, 0(up)
-	std	r11, 0(rp)
-	or	r11, r8, r9
-L(L00):	srd	r8, u1, cnt
-	sld	r9, u0, tnc
+	bdz	L(2)		C jump for n = 2
+
+	ldu	u0, 16(up)
+	bdz	L(end)		C jump for n = 3
+
+L(oop):	sld	v1, u1, tnc
+	srd	h1, u1, cnt
 	ld	u1, 8(up)
-	std	r10, 8(rp)
-	or	r10, r12, r7
-L(L11):	srd	r12, u0, cnt
-	sld	r7, u1, tnc
-	ld	u0, 16(up)
-	std	r11, 16(rp)
-	or	r11, r8, r9
-L(L10):	srd	r8, u1, cnt
-	sld	r9, u0, tnc
-	ld	u1, 24(up)
-	addi	up, up, 32
-	std	r10, 24(rp)
-	addi	rp, rp, 32
-	or	r10, r12, r7
-	bdnz	L(top)
-
-	ALIGN(32)
-L(end):	srd	r12, u0, cnt
-	sld	r7, u1, tnc
-	std	r11, 0(rp)
-L(cj4):	or	r11, r8, r9
-	srd	r8, u1, cnt
-	std	r10, 8(rp)
-L(cj3):	or	r10, r12, r7
-	std	r11, 16(rp)
-L(cj2):	std	r10, 24(rp)
-	std	r8, 32(rp)
-
-L(ret):	ld	r31, -8(r1)
-	ld	r30, -16(r1)
+	or	h0, v1, h0
+	stdu	h0, 16(rp)
+
+	bdz	L(exit)
+
+	sld	v0, u0, tnc
+	srd	h0, u0, cnt
+	ldu	u0, 16(up)
+	or	h1, v0, h1
+	std	h1, 8(rp)
+
+	bdnz	L(oop)
+
+L(end):	sld	v1, u1, tnc
+	srd	h1, u1, cnt
+	or	h0, v1, h0
+	stdu	h0, 16(rp)
+	sld	v0, u0, tnc
+	srd	h0, u0, cnt
+	or	h1, v0, h1
+	std	h1, 8(rp)
+L(1):	std	h0, 16(rp)
+ifdef(`HAVE_ABI_mode32',
+`	srdi	r3, r12, 32
+	mr	r4, r12
+',`	mr	r3, r12
+')
+	blr
+
+L(exit):	sld	v0, u0, tnc
+	srd	h0, u0, cnt
+	or	h1, v0, h1
+	std	h1, 8(rp)
+L(2):	sld	v1, u1, tnc
+	srd	h1, u1, cnt
+	or	h0, v1, h0
+	stdu	h0, 16(rp)
+	std	h1, 8(rp)
 ifdef(`HAVE_ABI_mode32',
-`	srdi	r3, retval, 32
-	mr	r4, retval
-',`	mr	r3, retval')
+`	srdi	r3, r12, 32
+	mr	r4, r12
+',`	mr	r3, r12
+')
 	blr
 EPILOGUE()
diff --git a/gmp/mpn/powerpc64/sec_tabselect.asm b/gmp/mpn/powerpc64/sec_tabselect.asm
deleted file mode 100644
index 085577ca9b..0000000000
--- a/gmp/mpn/powerpc64/sec_tabselect.asm
+++ /dev/null
@@ -1,147 +0,0 @@
-dnl  PowerPC-64 mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                   cycles/limb
-C POWER3/PPC630		 1.75
-C POWER4/PPC970		 2.0
-C POWER5		 ?
-C POWER6		 5.0
-C POWER7		 1.75
-
-define(`rp',     `r3')
-define(`tp',     `r4')
-define(`n',      `r5')
-define(`nents',  `r6')
-define(`which',  `r7')
-
-define(`i',      `r8')
-define(`j',      `r9')
-define(`stride', `r12')
-define(`mask',   `r11')
-
-
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
-	addic.	j, n, -4		C outer loop induction variable
-	std	r31, -8(r1)
-	std	r30, -16(r1)
-	std	r29, -24(r1)
-	std	r28, -32(r1)
-	std	r27, -40(r1)
-	sldi	stride, n, 3
-
-	blt	cr0, L(outer_end)
-L(outer_top):
-	mtctr	nents
-	mr	r10, tp
-	li	r28, 0
-	li	r29, 0
-	li	r30, 0
-	li	r31, 0
-	addic.	j, j, -4		C outer loop induction variable
-	mr	i, which
-
-	ALIGN(16)
-L(top):	addic	i, i, -1		C set carry iff i != 0
-	subfe	mask, mask, mask
-	ld	r0, 0(tp)
-	ld	r27, 8(tp)
-	and	r0, r0, mask
-	and	r27, r27, mask
-	or	r28, r28, r0
-	or	r29, r29, r27
-	ld	r0, 16(tp)
-	ld	r27, 24(tp)
-	and	r0, r0, mask
-	and	r27, r27, mask
-	or	r30, r30, r0
-	or	r31, r31, r27
-	add	tp, tp, stride
-	bdnz	L(top)
-
-	std	r28, 0(rp)
-	std	r29, 8(rp)
-	std	r30, 16(rp)
-	std	r31, 24(rp)
-	addi	tp, r10, 32
-	addi	rp, rp, 32
-	bge	cr0, L(outer_top)
-L(outer_end):
-
-	rldicl.	r0, n, 63, 63
-	beq	cr0, L(b0x)
-L(b1x):	mtctr	nents
-	mr	r10, tp
-	li	r28, 0
-	li	r29, 0
-	mr	i, which
-	ALIGN(16)
-L(tp2):	addic	i, i, -1
-	subfe	mask, mask, mask
-	ld	r0, 0(tp)
-	ld	r27, 8(tp)
-	and	r0, r0, mask
-	and	r27, r27, mask
-	or	r28, r28, r0
-	or	r29, r29, r27
-	add	tp, tp, stride
-	bdnz	L(tp2)
-	std	r28, 0(rp)
-	std	r29, 8(rp)
-	addi	tp, r10, 16
-	addi	rp, rp, 16
-
-L(b0x):	rldicl.	r0, n, 0, 63
-	beq	cr0, L(b00)
-L(b01):	mtctr	nents
-	mr	r10, tp
-	li	r28, 0
-	mr	i, which
-	ALIGN(16)
-L(tp1):	addic	i, i, -1
-	subfe	mask, mask, mask
-	ld	r0, 0(tp)
-	and	r0, r0, mask
-	or	r28, r28, r0
-	add	tp, tp, stride
-	bdnz	L(tp1)
-	std	r28, 0(rp)
-
-L(b00):	ld	r31, -8(r1)
-	ld	r30, -16(r1)
-	ld	r29, -24(r1)
-	ld	r28, -32(r1)
-	ld	r27, -40(r1)
-	blr
-EPILOGUE()
diff --git a/gmp/mpn/powerpc64/sqr_diagonal.asm b/gmp/mpn/powerpc64/sqr_diagonal.asm
new file mode 100644
index 0000000000..07f60e0dd5
--- /dev/null
+++ b/gmp/mpn/powerpc64/sqr_diagonal.asm
@@ -0,0 +1,55 @@
+dnl  PowerPC-64 mpn_sqr_diagonal.
+
+dnl  Copyright 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C		cycles/limb
+C POWER3/PPC630:    18
+C POWER4/PPC970:     8
+
+C INPUT PARAMETERS
+C rp	r3
+C up	r4
+C n	r5
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ifdef(`HAVE_ABI_mode32',
+`	rldicl	r5, r5, 0, 32')		C zero extend n
+	mtctr	r5
+	ld	r0, 0(r4)
+	bdz	L(end)
+	ALIGN(16)
+
+L(top):	mulld	r5, r0, r0
+	mulhdu	r6, r0, r0
+	ld	r0, 8(r4)
+	addi	r4, r4, 8
+	std	r5, 0(r3)
+	std	r6, 8(r3)
+	addi	r3, r3, 16
+	bdnz	L(top)
+
+L(end):	mulld	r5, r0, r0
+	mulhdu	r6, r0, r0
+	std	r5, 0(r3)
+	std	r6, 8(r3)
+
+	blr
+EPILOGUE()
diff --git a/gmp/mpn/powerpc64/umul.asm b/gmp/mpn/powerpc64/umul.asm
index 7fcc72f18f..516be3d98b 100644
--- a/gmp/mpn/powerpc64/umul.asm
+++ b/gmp/mpn/powerpc64/umul.asm
@@ -1,32 +1,21 @@
-dnl  PowerPC-64 umul_ppmm -- support for longlong.h
+dnl PowerPC-64 umul_ppmm -- support for longlong.h
 
-dnl  Copyright 2000, 2001, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl Copyright 2000, 2001, 2005 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+dnl General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/powerpc64/vmx/popcount.asm b/gmp/mpn/powerpc64/vmx/popcount.asm
index b95fb88b1a..b9f5896fb7 100644
--- a/gmp/mpn/powerpc64/vmx/popcount.asm
+++ b/gmp/mpn/powerpc64/vmx/popcount.asm
@@ -1,43 +1,36 @@
 dnl  PowerPC-32/VMX and PowerPC-64/VMX mpn_popcount.
 
-dnl  Copyright 2006, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C                   cycles/limb
-C 7400,7410 (G4):       ?
-C 744x,745x (G4+):      1.125
-C 970 (G5):             2.25
+C 7400,7410 (G4):       2.75
+C 744x,745x (G4+):      2.25
+C 970 (G5):             5.3
+
+C STATUS
+C  * Works for all sizes and alignments.
 
 C TODO
-C  * Rewrite the awkward huge n outer loop code.
+C  * Tune the awkward huge n outer loop code.
 C  * Two lvx, two vperm, and two vxor could make us a similar hamdist.
+C  * For the 970, a combined VMX+intop approach might be best.
 C  * Compress cnsts table in 64-bit mode, only half the values are needed.
 
 define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
@@ -46,11 +39,26 @@ define(`LIMBS_PER_2VR', eval(32/GMP_LIMB_BYTES))
 
 define(`OPERATION_popcount')
 
-define(`ap',	`r3')
-define(`n',	`r4')
+ifdef(`OPERATION_popcount',`
+  define(`func',`mpn_popcount')
+  define(`up',		`r3')
+  define(`n',		`r4')
+  define(`HAM',		`dnl')
+')
+ifdef(`OPERATION_hamdist',`
+  define(`func',`mpn_hamdist')
+  define(`up',		`r3')
+  define(`vp',		`r4')
+  define(`n',		`r5')
+  define(`HAM',		`$1')
+')
 
-define(`rtab',	`v10')
-define(`cnt4',	`v11')
+define(`x01010101',`v2')
+define(`x00110011',`v7')
+define(`x00001111',`v10')
+define(`cnt1',`v11')
+define(`cnt2',`v12')
+define(`cnt4',`v13')
 
 ifelse(GMP_LIMB_BITS,32,`
 	define(`LIMB32',`	$1')
@@ -66,7 +74,7 @@ define(`LIMBS_PER_CHUNK', 0x1000)
 define(`LIMBS_CHUNK_THRES', 0x1001)
 
 ASM_START()
-PROLOGUE(mpn_popcount,toc)
+PROLOGUE(mpn_popcount)
 	mfspr	r10, 256
 	oris	r0, r10, 0xfffc		C Set VRSAVE bit 0-13
 	mtspr	256, r0
@@ -77,29 +85,30 @@ ifdef(`HAVE_ABI_mode32',
 C Load various constants into vector registers
 	LEAL(	r11, cnsts)
 	li	r12, 16
+	vspltisb cnt1, 1		C 0x0101...01 used as shift count
+	vspltisb cnt2, 2		C 0x0202...02 used as shift count
 	vspltisb cnt4, 4		C 0x0404...04 used as shift count
-
-	li	r7, 160
-	lvx	rtab, 0, r11
+	lvx	x01010101, 0, r11	C 0x3333...33
+	lvx	x00110011, r12, r11	C 0x5555...55
+	vspltisb x00001111, 15		C 0x0f0f...0f
 
 LIMB64(`lis	r0, LIMBS_CHUNK_THRES	')
 LIMB64(`cmpd	cr7, n, r0		')
 
-	lvx	v0, 0, ap
-	addi	r7, r11, 80
-	rlwinm	r6, ap, 2,26,29
+	lvx	v0, 0, up
+	addi	r7, r11, 96
+	rlwinm	r6, up, 2,26,29
 	lvx	v8, r7, r6
 	vand	v0, v0, v8
 
-LIMB32(`rlwinm	r8, ap, 30,30,31	')
-LIMB64(`rlwinm	r8, ap, 29,31,31	')
-	add	n, n, r8		C compensate n for rounded down `ap'
+LIMB32(`rlwinm	r8, up, 30,30,31	')
+LIMB64(`rlwinm	r8, up, 29,31,31	')
+	add	n, n, r8		C compensate n for rounded down `up'
 
 	vxor	v1, v1, v1
 	li	r8, 0			C grand total count
 
-	vxor	v12, v12, v12		C zero total count
-	vxor	v13, v13, v13		C zero total count
+	vxor	v3, v3, v3		C zero total count
 
 	addic.	n, n, -LIMBS_PER_VR
 	ble	L(sum)
@@ -111,61 +120,82 @@ C For 64-bit machines, handle huge n that would overflow vsum4ubs
 LIMB64(`ble	cr7, L(small)		')
 LIMB64(`addis	r9, n, -LIMBS_PER_CHUNK	') C remaining n
 LIMB64(`lis	n, LIMBS_PER_CHUNK	')
-
-	ALIGN(16)
 L(small):
+
+
 LIMB32(`srwi	r7, n, 3	')	C loop count corresponding to n
 LIMB64(`srdi	r7, n, 2	')	C loop count corresponding to n
 	addi	r7, r7, 1
 	mtctr	r7			C copy n to count register
 	b	L(ent)
 
-	ALIGN(16)
-L(top):
-	lvx	v0, 0, ap
-L(ent):	lvx	v1, r12, ap
-	addi	ap, ap, 32
-	vsrb	v8, v0, cnt4
-	vsrb	v9, v1, cnt4
-	vperm	v2, rtab, rtab, v0
-	vperm	v3, rtab, rtab, v8
-	vperm	v4, rtab, rtab, v1
-	vperm	v5, rtab, rtab, v9
-	vaddubm	v6, v2, v3
-	vaddubm	v7, v4, v5
-	vsum4ubs v12, v6, v12
-	vsum4ubs v13, v7, v13
+	ALIGN(8)
+L(top):	lvx	v0, 0, up
+	li	r7, 128			C prefetch distance
+L(ent):	lvx	v1, r12, up
+	addi	up, up, 32
+	vsr	v4, v0, cnt1
+	vsr	v5, v1, cnt1
+	dcbt	up, r7			C prefetch
+	vand	v8, v4, x01010101
+	vand	v9, v5, x01010101
+	vsububm	v0, v0, v8		C 64 2-bit accumulators (0..2)
+	vsububm	v1, v1, v9		C 64 2-bit accumulators (0..2)
+	vsr	v4, v0, cnt2
+	vsr	v5, v1, cnt2
+	vand	v8, v0, x00110011
+	vand	v9, v1, x00110011
+	vand	v4, v4, x00110011
+	vand	v5, v5, x00110011
+	vaddubm	v0, v4, v8		C 32 4-bit accumulators (0..4)
+	vaddubm	v1, v5, v9		C 32 4-bit accumulators (0..4)
+	vaddubm	v8, v0, v1		C 32 4-bit accumulators (0..8)
+	vsr	v9, v8, cnt4
+	vand	v6, v8, x00001111
+	vand	v9, v9, x00001111
+	vaddubm	v6, v9, v6		C 16 8-bit accumulators (0..16)
+	vsum4ubs v3, v6, v3		C sum 4 x 4 bytes into 4 32-bit fields
 	bdnz	L(top)
 
 	andi.	n, n, eval(LIMBS_PER_2VR-1)
 	beq	L(rt)
 
-	lvx	v0, 0, ap
+	lvx	v0, 0, up
 	vxor	v1, v1, v1
 	cmpwi	n, LIMBS_PER_VR
 	ble	L(sum)
 L(lsum):
 	vor	v1, v0, v0
-	lvx	v0, r12, ap
+	lvx	v0, r12, up
 L(sum):
 LIMB32(`rlwinm	r6, n, 4,26,27	')
 LIMB64(`rlwinm	r6, n, 5,26,26	')
-	addi	r7, r11, 16
+	addi	r7, r11, 32
 	lvx	v8, r7, r6
 	vand	v0, v0, v8
-	vsrb	v8, v0, cnt4
-	vsrb	v9, v1, cnt4
-	vperm	v2, rtab, rtab, v0
-	vperm	v3, rtab, rtab, v8
-	vperm	v4, rtab, rtab, v1
-	vperm	v5, rtab, rtab, v9
-	vaddubm	v6, v2, v3
-	vaddubm	v7, v4, v5
-	vsum4ubs v12, v6, v12
-	vsum4ubs v13, v7, v13
-
-	ALIGN(16)
-L(rt):	vadduwm	v3, v12, v13
+
+	vsr	v4, v0, cnt1
+	vsr	v5, v1, cnt1
+	vand	v8, v4, x01010101
+	vand	v9, v5, x01010101
+	vsububm	v0, v0, v8		C 64 2-bit accumulators (0..2)
+	vsububm	v1, v1, v9		C 64 2-bit accumulators (0..2)
+	vsr	v4, v0, cnt2
+	vsr	v5, v1, cnt2
+	vand	v8, v0, x00110011
+	vand	v9, v1, x00110011
+	vand	v4, v4, x00110011
+	vand	v5, v5, x00110011
+	vaddubm	v0, v4, v8		C 32 4-bit accumulators (0..4)
+	vaddubm	v1, v5, v9		C 32 4-bit accumulators (0..4)
+	vaddubm	v8, v0, v1		C 32 4-bit accumulators (0..8)
+	vsr	v9, v8, cnt4
+	vand	v6, v8, x00001111
+	vand	v9, v9, x00001111
+	vaddubm	v6, v9, v6		C 16 8-bit accumulators (0..16)
+	vsum4ubs v3, v6, v3		C sum 4 x 4 bytes into 4 32-bit fields
+
+L(rt):
 	li	r7, -16			C FIXME: does all ppc32 and ppc64 ABIs
 	stvx	v3, r7, r1		C FIXME: ...support storing below sp?
 
@@ -180,8 +210,7 @@ L(rt):	vadduwm	v3, v12, v13
 
 C Handle outer loop for huge n.  We inherit cr7 and r0 from above.
 LIMB64(`ble	cr7, L(ret)
-	vxor	v12, v12, v12		C zero total count
-	vxor	v13, v13, v13		C zero total count
+	vxor	v3, v3, v3		C zero total count
 	mr	n, r9
 	cmpd	cr7, n, r0
 	ble	cr7, L(2)
@@ -192,16 +221,17 @@ L(2):	srdi	r7, n, 2		C loop count corresponding to n
 	b	L(top)
 ')
 
-	ALIGN(16)
 L(ret):	mr	r3, r8
 	mtspr	256, r10
 	blr
 EPILOGUE()
 
 DEF_OBJECT(cnsts,16)
-C Counts for vperm
-	.byte	0x00,0x01,0x01,0x02,0x01,0x02,0x02,0x03
-	.byte	0x01,0x02,0x02,0x03,0x02,0x03,0x03,0x04
+	.byte	0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
+	.byte	0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
+
+	.byte	0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+	.byte	0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
 C Masks for high end of number
 	.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
 	.byte	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
diff --git a/gmp/mpn/pyr/add_n.s b/gmp/mpn/pyr/add_n.s
new file mode 100644
index 0000000000..7ac02e6b4d
--- /dev/null
+++ b/gmp/mpn/pyr/add_n.s
@@ -0,0 +1,74 @@
+# Pyramid __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	2
+.globl	___gmpn_add_n
+___gmpn_add_n:
+	movw	$-1,tr0		# representation for carry clear
+
+	movw	pr3,tr2
+	andw	$3,tr2
+	beq	Lend0
+	subw	tr2,pr3
+
+Loop0:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	addwc	(pr2),tr1
+	movw	tr1,(pr0)
+
+	subwb	tr0,tr0
+	addw	$4,pr0
+	addw	$4,pr1
+	addw	$4,pr2
+	addw	$-1,tr2
+	bne	Loop0
+
+	mtstw	pr3,pr3
+	beq	Lend
+Lend0:
+Loop:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	addwc	(pr2),tr1
+	movw	tr1,(pr0)
+
+	movw	4(pr1),tr1
+	addwc	4(pr2),tr1
+	movw	tr1,4(pr0)
+
+	movw	8(pr1),tr1
+	addwc	8(pr2),tr1
+	movw	tr1,8(pr0)
+
+	movw	12(pr1),tr1
+	addwc	12(pr2),tr1
+	movw	tr1,12(pr0)
+
+	subwb	tr0,tr0
+	addw	$16,pr0
+	addw	$16,pr1
+	addw	$16,pr2
+	addw	$-4,pr3
+	bne	Loop
+Lend:
+	mnegw	tr0,pr0
+	ret
diff --git a/gmp/mpn/pyr/addmul_1.s b/gmp/mpn/pyr/addmul_1.s
new file mode 100644
index 0000000000..d40a9e77cf
--- /dev/null
+++ b/gmp/mpn/pyr/addmul_1.s
@@ -0,0 +1,43 @@
+# Pyramid __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	2
+.globl	___gmpn_addmul_1
+___gmpn_addmul_1:
+	mova	(pr0)[pr2*4],pr0
+	mova	(pr1)[pr2*4],pr1
+	mnegw	pr2,pr2
+	movw	$0,tr3
+
+Loop:	movw	(pr1)[pr2*4],tr1
+	uemul	pr3,tr0
+	addw	tr3,tr1
+	movw	$0,tr3
+	addwc	tr0,tr3
+	movw	(pr0)[pr2*0x4],tr0
+	addw	tr0,tr1
+	addwc	$0,tr3
+	movw	tr1,(pr0)[pr2*4]
+	addw	$1,pr2
+	bne	Loop
+
+	movw	tr3,pr0
+	ret
diff --git a/gmp/mpn/pyr/mul_1.s b/gmp/mpn/pyr/mul_1.s
new file mode 100644
index 0000000000..453727f6d8
--- /dev/null
+++ b/gmp/mpn/pyr/mul_1.s
@@ -0,0 +1,40 @@
+# Pyramid __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	2
+.globl	___gmpn_mul_1
+___gmpn_mul_1:
+	mova	(pr0)[pr2*4],pr0
+	mova	(pr1)[pr2*4],pr1
+	mnegw	pr2,pr2
+	movw	$0,tr3
+
+Loop:	movw	(pr1)[pr2*4],tr1
+	uemul	pr3,tr0
+	addw	tr3,tr1
+	movw	$0,tr3
+	addwc	tr0,tr3
+	movw	tr1,(pr0)[pr2*4]
+	addw	$1,pr2
+	bne	Loop
+
+	movw	tr3,pr0
+	ret
diff --git a/gmp/mpn/pyr/sub_n.s b/gmp/mpn/pyr/sub_n.s
new file mode 100644
index 0000000000..11f185a81a
--- /dev/null
+++ b/gmp/mpn/pyr/sub_n.s
@@ -0,0 +1,74 @@
+# Pyramid __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright 1995, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+.text
+	.align	2
+.globl	___gmpn_sub_n
+___gmpn_sub_n:
+	movw	$-1,tr0		# representation for carry clear
+
+	movw	pr3,tr2
+	andw	$3,tr2
+	beq	Lend0
+	subw	tr2,pr3
+
+Loop0:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	subwb	(pr2),tr1
+	movw	tr1,(pr0)
+
+	subwb	tr0,tr0
+	addw	$4,pr0
+	addw	$4,pr1
+	addw	$4,pr2
+	addw	$-1,tr2
+	bne	Loop0
+
+	mtstw	pr3,pr3
+	beq	Lend
+Lend0:
+Loop:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	subwb	(pr2),tr1
+	movw	tr1,(pr0)
+
+	movw	4(pr1),tr1
+	subwb	4(pr2),tr1
+	movw	tr1,4(pr0)
+
+	movw	8(pr1),tr1
+	subwb	8(pr2),tr1
+	movw	tr1,8(pr0)
+
+	movw	12(pr1),tr1
+	subwb	12(pr2),tr1
+	movw	tr1,12(pr0)
+
+	subwb	tr0,tr0
+	addw	$16,pr0
+	addw	$16,pr1
+	addw	$16,pr2
+	addw	$-4,pr3
+	bne	Loop
+Lend:
+	mnegw	tr0,pr0
+	ret
diff --git a/gmp/mpn/s390_32/README b/gmp/mpn/s390/README
index 59519ba538..59519ba538 100644
--- a/gmp/mpn/s390_32/README
+++ b/gmp/mpn/s390/README
diff --git a/gmp/mpn/s390_32/addmul_1.asm b/gmp/mpn/s390/addmul_1.asm
index 97189a8e76..71d49bbfca 100644
--- a/gmp/mpn/s390_32/addmul_1.asm
+++ b/gmp/mpn/s390/addmul_1.asm
@@ -4,30 +4,19 @@ dnl  result to a second limb vector.
 dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/s390/gmp-mparam.h b/gmp/mpn/s390/gmp-mparam.h
new file mode 100644
index 0000000000..d738846679
--- /dev/null
+++ b/gmp/mpn/s390/gmp-mparam.h
@@ -0,0 +1,54 @@
+/* IBM s370 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/* BITS_PER_MP_LIMB etc generated by configure */
+
+
+/* Generated by tuneup.c, 2001-12-03, gcc 2.95 */
+
+#define MUL_KARATSUBA_THRESHOLD       18
+#define MUL_TOOM3_THRESHOLD          210
+
+#define SQR_BASECASE_THRESHOLD         8
+#define SQR_KARATSUBA_THRESHOLD       40
+#define SQR_TOOM3_THRESHOLD          250
+
+#define DIV_SB_PREINV_THRESHOLD            0
+#define DIV_DC_THRESHOLD                  63
+#define POWM_THRESHOLD                63
+
+#define GCD_ACCEL_THRESHOLD            3
+
+#define DIVREM_1_NORM_THRESHOLD        0
+#define DIVREM_1_UNNORM_THRESHOLD      5
+#define MOD_1_NORM_THRESHOLD           0
+#define MOD_1_UNNORM_THRESHOLD         4
+#define USE_PREINV_MOD_1               0
+#define DIVREM_2_THRESHOLD             0
+#define DIVEXACT_1_THRESHOLD           0
+#define MODEXACT_1_ODD_THRESHOLD       0
+
+#define MUL_FFT_TABLE  { 432, 992, 1664, 4608, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD       448
+#define MUL_FFT_THRESHOLD           3840
+
+#define SQR_FFT_TABLE  { 400, 992, 1664, 4608, 10240, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD       416
+#define SQR_FFT_THRESHOLD           3328
diff --git a/gmp/mpn/s390_32/mul_1.asm b/gmp/mpn/s390/mul_1.asm
index e3ad0c59d8..649671b45c 100644
--- a/gmp/mpn/s390_32/mul_1.asm
+++ b/gmp/mpn/s390/mul_1.asm
@@ -4,30 +4,19 @@ dnl  result in a second limb vector.
 dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/s390_32/submul_1.asm b/gmp/mpn/s390/submul_1.asm
index da7d849d5d..5301096da2 100644
--- a/gmp/mpn/s390_32/submul_1.asm
+++ b/gmp/mpn/s390/submul_1.asm
@@ -4,30 +4,19 @@ dnl  result from a second limb vector.
 dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/s390_32/copyd.asm b/gmp/mpn/s390_32/copyd.asm
deleted file mode 100644
index ff252bc1a6..0000000000
--- a/gmp/mpn/s390_32/copyd.asm
+++ /dev/null
@@ -1,145 +0,0 @@
-dnl  S/390-32 mpn_copyd
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C            cycles/limb
-C            cycles/limb
-C z900		 1.65
-C z990           1.125
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C FIXME:
-C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
-C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
-C    We could then use r3...r10 in main loop.
-
-C INPUT PARAMETERS
-define(`rp_param',	`%r2')
-define(`up_param',	`%r3')
-define(`n',		`%r4')
-
-define(`rp',	`%r8')
-define(`up',	`%r9')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
-	stm	%r6, %r11, 24(%r15)
-
-	lr	%r1, n
-	sll	%r1, 2
-	la	%r10, 8(n)
-	ahi	%r1, -32
-	srl	%r10, 3
-	lhi	%r11, -32
-
-	la	rp, 0(%r1,rp_param)	C FIXME use lay on z990 and later
-	la	up, 0(%r1,up_param)	C FIXME use lay on z990 and later
-
-	lhi	%r7, 7
-	nr	%r7, n			C n mod 8
-	chi	%r7, 2
-	jh	L(b34567)
-	chi	%r7, 1
-	je	L(b1)
-	jh	L(b2)
-
-L(b0):	brct	%r10, L(top)
-	j	L(end)
-
-L(b1):	l	%r0, 28(up)
-	ahi	up, -4
-	st	%r0, 28(rp)
-	ahi	rp, -4
-	brct	%r10, L(top)
-	j	L(end)
-
-L(b2):	lm	%r0, %r1, 24(up)
-	ahi	up, -8
-	stm	%r0, %r1, 24(rp)
-	ahi	rp, -8
-	brct	%r10, L(top)
-	j	L(end)
-
-L(b34567):
-	chi	%r7, 4
-	jl	L(b3)
-	je	L(b4)
-	chi	%r7, 6
-	je	L(b6)
-	jh	L(b7)
-
-L(b5):	lm	%r0, %r4, 12(up)
-	ahi	up, -20
-	stm	%r0, %r4, 12(rp)
-	ahi	rp, -20
-	brct	%r10, L(top)
-	j	L(end)
-
-L(b3):	lm	%r0, %r2, 20(up)
-	ahi	up, -12
-	stm	%r0, %r2, 20(rp)
-	ahi	rp, -12
-	brct	%r10, L(top)
-	j	L(end)
-
-L(b4):	lm	%r0, %r3, 16(up)
-	ahi	up, -16
-	stm	%r0, %r3, 16(rp)
-	ahi	rp, -16
-	brct	%r10, L(top)
-	j	L(end)
-
-L(b6):	lm	%r0, %r5, 8(up)
-	ahi	up, -24
-	stm	%r0, %r5, 8(rp)
-	ahi	rp, -24
-	brct	%r10, L(top)
-	j	L(end)
-
-L(b7):	lm	%r0, %r6, 4(up)
-	ahi	up, -28
-	stm	%r0, %r6, 4(rp)
-	ahi	rp, -28
-	brct	%r10, L(top)
-	j	L(end)
-
-L(top):	lm	%r0, %r7, 0(up)
-	la	up, 0(%r11,up)
-	stm	%r0, %r7, 0(rp)
-	la	rp, 0(%r11,rp)
-	brct	%r10, L(top)
-
-L(end):	lm	%r6, %r11, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/copyi.asm b/gmp/mpn/s390_32/copyi.asm
deleted file mode 100644
index 1df32f100e..0000000000
--- a/gmp/mpn/s390_32/copyi.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-dnl  S/390-32 mpn_copyi
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 0.75
-C z990           0.375
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C NOTE
-C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
-	ltr	%r4, %r4
-	sll	%r4, 2
-	je	L(rtn)
-	ahi	%r4, -1
-	lr	%r5, %r4
-	srl	%r5, 8
-	ltr	%r5, %r5		C < 256 bytes to copy?
-	je	L(1)
-
-L(top):	mvc	0(256, rp), 0(up)
-	la	rp, 256(rp)
-	la	up, 256(up)
-	brct	%r5, L(top)
-
-L(1):	bras	%r5, L(2)		C make r5 point to mvc insn
-	mvc	0(1, rp), 0(up)
-L(2):	ex	%r4, 0(%r5)		C execute mvc with length ((n-1) mod 256)+1
-L(rtn):	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/addmul_1.asm b/gmp/mpn/s390_32/esame/addmul_1.asm
deleted file mode 100644
index 4375b74ae0..0000000000
--- a/gmp/mpn/s390_32/esame/addmul_1.asm
+++ /dev/null
@@ -1,72 +0,0 @@
-dnl  S/390-32 mpn_addmul_1 for systems with MLR instruction
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		18.5
-C z990		10
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`v0',	`%r5')
-
-define(`z',	`%r9')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	stm	%r9, %r12, 36(%r15)
-	lhi	%r12, 0			C zero index register
-	ahi	%r12, 0			C clear carry fla
-	lhi	%r11, 0			C clear carry limb
-	lhi	z, 0			C clear carry limb
-
-L(top):	l	%r1, 0(%r12,up)
-	l	%r10, 0(%r12,rp)
-	mlr	%r0, v0
-	alcr	%r1, %r10
-	alcr	%r0, z
-	alr	%r1, %r11
-	lr	%r11, %r0
-	st	%r1, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	n, L(top)
-
-	lhi	%r2, 0
-	alcr	%r2, %r11
-
-	lm	%r9, %r12, 36(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/aors_n.asm b/gmp/mpn/s390_32/esame/aors_n.asm
deleted file mode 100644
index 98b0dbc7b0..0000000000
--- a/gmp/mpn/s390_32/esame/aors_n.asm
+++ /dev/null
@@ -1,137 +0,0 @@
-dnl  S/390-32 mpn_add_n and mpn_sub_n.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 ?
-C z990	      2.75-3		(fast for even n, slow for odd n)
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Use r0 and save/restore one less register
-C  * Using logops_n's v1 inner loop operand order make the loop about 20%
-C    faster, at the expense of highly alignment-dependent performance.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_add_n', `
-  define(ADSB,		al)
-  define(ADSBCR,	alcr)
-  define(ADSBC,		alc)
-  define(RETVAL,`dnl
-	lhi	%r2, 0
-	alcr	%r2, %r2')
-  define(func,		mpn_add_n)
-  define(func_nc,	mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-  define(ADSB,		sl)
-  define(ADSBCR,	slbr)
-  define(ADSBC,		slb)
-  define(RETVAL,`dnl
-	slbr	%r2, %r2
-	lcr	%r2, %r2')
-  define(func,		mpn_sub_n)
-  define(func_nc,	mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	stm	%r6, %r8, 24(%r15)
-
-	ahi	n, 3
-	lhi	%r7, 3
-	lr	%r1, n
-	srl	%r1, 2
-	nr	%r7, n			C n mod 4
-	je	L(b1)
-	chi	%r7, 2
-	jl	L(b2)
-	jne	L(b0)
-
-L(b3):	lm	%r5, %r7, 0(up)
-	la	up, 12(up)
-	ADSB	%r5, 0(vp)
-	ADSBC	%r6, 4(vp)
-	ADSBC	%r7, 8(vp)
-	la	vp, 12(vp)
-	stm	%r5, %r7, 0(rp)
-	la	rp, 12(rp)
-	brct	%r1, L(top)
-	j	L(end)
-
-L(b0):	lm	%r5, %r8, 0(up)		C This redundant insns is no mistake,
-	la	up, 16(up)		C it is needed to make main loop run
-	ADSB	%r5, 0(vp)		C fast for n = 0 (mod 4).
-	ADSBC	%r6, 4(vp)
-	j	L(m0)
-
-L(b1):	l	%r5, 0(up)
-	la	up, 4(up)
-	ADSB	%r5, 0(vp)
-	la	vp, 4(vp)
-	st	%r5, 0(rp)
-	la	rp, 4(rp)
-	brct	%r1, L(top)
-	j	L(end)
-
-L(b2):	lm	%r5, %r6, 0(up)
-	la	up, 8(up)
-	ADSB	%r5, 0(vp)
-	ADSBC	%r6, 4(vp)
-	la	vp, 8(vp)
-	stm	%r5, %r6, 0(rp)
-	la	rp, 8(rp)
-	brct	%r1, L(top)
-	j	L(end)
-
-L(top):	lm	%r5, %r8, 0(up)
-	la	up, 16(up)
-	ADSBC	%r5, 0(vp)
-	ADSBC	%r6, 4(vp)
-L(m0):	ADSBC	%r7, 8(vp)
-	ADSBC	%r8, 12(vp)
-	la	vp, 16(vp)
-	stm	%r5, %r8, 0(rp)
-	la	rp, 16(rp)
-	brct	%r1, L(top)
-
-L(end):	RETVAL
-	lm	%r6, %r8, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/aorslsh1_n.asm b/gmp/mpn/s390_32/esame/aorslsh1_n.asm
deleted file mode 100644
index f2b222b121..0000000000
--- a/gmp/mpn/s390_32/esame/aorslsh1_n.asm
+++ /dev/null
@@ -1,173 +0,0 @@
-dnl  S/390-32 mpn_addlsh1_n
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 9.25
-C z990		 5
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Compute RETVAL for sublsh1_n less stupidly
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_addlsh1_n',`
-  define(ADDSUBC,       alr)
-  define(ADDSUBE,       alcr)
-  define(INITCY,        `lhi	%r13, -1')
-  define(RETVAL,        `alr	%r1, %r13
-			lhi	%r2, 2
-			alr	%r2, %r1')
-  define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_sublsh1_n',`
-  define(ADDSUBC,       slr)
-  define(ADDSUBE,       slbr)
-  define(INITCY,        `lhi	%r13, 0')
-  define(RETVAL,        `slr	%r1, %r13
-			lhi	%r2, 1
-			alr	%r2, %r1')
-  define(func, mpn_sublsh1_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-PROLOGUE(func)
-	stm	%r6, %r13, 24(%r15)
-
-	la	%r0, 3(n)
-	lhi	%r7, 3
-	srl	%r0, 2
-	nr	%r7, n			C n mod 4
-	je	L(b0)
-	chi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	lm	%r5, %r7, 0(up)
-	la	up, 12(up)
-	lm	%r9, %r11, 0(vp)
-	la	vp, 12(vp)
-
-	alr	%r9, %r9
-	alcr	%r10, %r10
-	alcr	%r11, %r11
-	slbr	%r1, %r1
-
-	ADDSUBC	%r5, %r9
-	ADDSUBE	%r6, %r10
-	ADDSUBE	%r7, %r11
-	slbr	%r13, %r13
-
-	stm	%r5, %r7, 0(rp)
-	la	rp, 12(rp)
-	brct	%r0, L(top)
-	j	L(end)
-
-L(b0):	lhi	%r1, -1
-	INITCY
-	j	L(top)
-
-L(b1):	l	%r5, 0(up)
-	la	up, 4(up)
-	l	%r9, 0(vp)
-	la	vp, 4(vp)
-
-	alr	%r9, %r9
-	slbr	%r1, %r1
-	ADDSUBC	%r5, %r9
-	slbr	%r13, %r13
-
-	st	%r5, 0(rp)
-	la	rp, 4(rp)
-	brct	%r0, L(top)
-	j	L(end)
-
-L(b2):	lm	%r5, %r6, 0(up)
-	la	up, 8(up)
-	lm	%r9, %r10, 0(vp)
-	la	vp, 8(vp)
-
-	alr	%r9, %r9
-	alcr	%r10, %r10
-	slbr	%r1, %r1
-
-	ADDSUBC	%r5, %r9
-	ADDSUBE	%r6, %r10
-	slbr	%r13, %r13
-
-	stm	%r5, %r6, 0(rp)
-	la	rp, 8(rp)
-	brct	%r0, L(top)
-	j	L(end)
-
-L(top):	lm	%r9, %r12, 0(vp)
-	la	vp, 16(vp)
-
-	ahi	%r1, 1			C restore carry
-
-	alcr	%r9, %r9
-	alcr	%r10, %r10
-	alcr	%r11, %r11
-	alcr	%r12, %r12
-
-	slbr	%r1, %r1		C save carry
-
-	lm	%r5, %r8, 0(up)
-	la	up, 16(up)
-
-	ahi	%r13, 1			C restore carry
-
-	ADDSUBE	%r5, %r9
-	ADDSUBE	%r6, %r10
-	ADDSUBE	%r7, %r11
-	ADDSUBE	%r8, %r12
-
-	slbr	%r13, %r13
-
-	stm	%r5, %r8, 0(rp)
-	la	rp, 16(rp)
-	brct	%r0, L(top)
-
-L(end):
-	RETVAL
-	lm	%r6, %r13, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/bdiv_dbm1c.asm b/gmp/mpn/s390_32/esame/bdiv_dbm1c.asm
deleted file mode 100644
index 568a2a44e8..0000000000
--- a/gmp/mpn/s390_32/esame/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,65 +0,0 @@
-dnl  S/390-32 mpn_bdiv_dbm1c for systems with MLR instruction.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		14
-C z990		10
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`qp',	  `%r2')
-define(`up',	  `%r3')
-define(`n',	  `%r4')
-define(`bd',	  `%r5')
-define(`cy',	  `%r6')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_dbm1c)
-	stm	%r6, %r7, 24(%r15)
-	lhi	%r7, 0			C zero index register
-
-L(top):	l	%r1, 0(%r7,up)
-	mlr	%r0, bd
-	slr	%r6, %r1
-	st	%r6, 0(%r7,qp)
-	slbr	%r6, %r0
-	la	%r7, 4(%r7)
-	brct	n, L(top)
-
-	lr	%r2, %r6
-	lm	%r6, %r7, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/gmp-mparam.h b/gmp/mpn/s390_32/esame/gmp-mparam.h
deleted file mode 100644
index a805fa1492..0000000000
--- a/gmp/mpn/s390_32/esame/gmp-mparam.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2008-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 4400 MHz IBM z10 running in 32-bit mode */
-/* FFT tuning limit = 15000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.7 */
-
-#define DIVREM_1_NORM_THRESHOLD              3
-#define DIVREM_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               8
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         16
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        38
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     30
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              3
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           56
-
-#define MUL_TOOM22_THRESHOLD                 8
-#define MUL_TOOM33_THRESHOLD                59
-#define MUL_TOOM44_THRESHOLD                88
-#define MUL_TOOM6H_THRESHOLD               125
-#define MUL_TOOM8H_THRESHOLD               169
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      57
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      55
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      57
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      56
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      82
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 14
-#define SQR_TOOM3_THRESHOLD                 90
-#define SQR_TOOM4_THRESHOLD                144
-#define SQR_TOOM6_THRESHOLD                196
-#define SQR_TOOM8_THRESHOLD                309
-
-#define MULMID_TOOM42_THRESHOLD             24
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               10
-
-#define MUL_FFT_MODF_THRESHOLD             252  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    252, 5}, {      9, 6}, {      5, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 8}, {     11, 7}, {     23, 9}, \
-    {      7, 8}, {     15, 7}, {     31, 8}, {     19, 7}, \
-    {     39, 8}, {     27, 9}, {     15, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
-    {     39, 8}, {     79, 9}, {     47,10}, {     31, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     79, 8}, {    319, 9}, \
-    {    175, 8}, {    351, 7}, {    703, 6}, {   1407,10}, \
-    {     95, 9}, {    191, 8}, {    383, 9}, {    207, 8}, \
-    {    415, 7}, {    831, 9}, {    223,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287, 8}, \
-    {    575, 9}, {    319,10}, {    175, 9}, {    351, 8}, \
-    {    703, 7}, {   1407,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207, 9}, {    415, 8}, {    831,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    575,11}, \
-    {    159,10}, {    351, 9}, {    703, 8}, {   1407,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447, 9}, {    895, 8}, {   1791,10}, {    479, 9}, \
-    {    959,12}, {    127,11}, {    287,10}, {    575,11}, \
-    {    351,10}, {    703, 9}, {   1407,12}, {    191,11}, \
-    {    415,10}, {    831,11}, {    447,10}, {    895, 9}, \
-    {   1791,11}, {    479,10}, {    959,13}, {    127,12}, \
-    {    255,11}, {    575,12}, {    319,11}, {    703,10}, \
-    {   1407,12}, {    383,11}, {    831,12}, {    447,11}, \
-    {    895,10}, {   1791,11}, {    959,10}, {   1919, 9}, \
-    {   3839,13}, {    255,12}, {    575,11}, {   1215,10}, \
-    {   2431,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    895,11}, {   1791,12}, {    959,11}, {   1919,10}, \
-    {   3839,14}, {    255,13}, {    511,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,13}, {    895,12}, \
-    {   1919,11}, {   3839,10}, {   7679,14}, {    511,13}, \
-    {   1023,12}, {   2047,13}, {   1151,12}, {   2431,13}, \
-    {   1407,14}, {    767,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 149
-#define MUL_FFT_THRESHOLD                 2240
-
-#define SQR_FFT_MODF_THRESHOLD             244  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    244, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {     11, 5}, {     23, 6}, {     13, 7}, {      7, 6}, \
-    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     23, 9}, {      7, 8}, {     15, 7}, \
-    {     31, 8}, {     19, 7}, {     39, 8}, {     23, 9}, \
-    {     15, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     71, 8}, {    143, 7}, {    287,10}, {     47,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
-    {    143, 8}, {    287,10}, {     79, 9}, {    159, 8}, \
-    {    319, 9}, {    175, 8}, {    351, 7}, {    703, 6}, \
-    {   1407,10}, {     95, 9}, {    191, 8}, {    383,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287, 8}, {    575,10}, {    159, 9}, {    319,10}, \
-    {    175, 9}, {    351, 8}, {    703, 7}, {   1407,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    351, 9}, \
-    {    703, 8}, {   1407,11}, {    191,10}, {    415,11}, \
-    {    223,10}, {    447, 9}, {    895, 8}, {   1791,10}, \
-    {    479, 9}, {    959,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    287,10}, {    575,11}, {    319,10}, \
-    {    639,11}, {    351,10}, {    703, 9}, {   1407,12}, \
-    {    191,11}, {    415,10}, {    831,11}, {    447,10}, \
-    {    895, 9}, {   1791,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    575,12}, {    319,11}, {    703,10}, \
-    {   1407,12}, {    383,11}, {    831,12}, {    447,11}, \
-    {    895,10}, {   1791,11}, {    959,10}, {   1919,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    575,11}, \
-    {   1215,10}, {   2431,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    895,11}, {   1791,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,10}, \
-    {   5887,13}, {    895,12}, {   1919,11}, {   3839,10}, \
-    {   7679,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1151,12}, {   2431,13}, {   1407,12}, {   2943,11}, \
-    {   5887,14}, {    767,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 161
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             4392
-
-#define DC_DIV_QR_THRESHOLD                 43
-#define DC_DIVAPPR_Q_THRESHOLD             150
-#define DC_BDIV_QR_THRESHOLD                38
-#define DC_BDIV_Q_THRESHOLD                107
-
-#define INV_MULMOD_BNM1_THRESHOLD           14
-#define INV_NEWTON_THRESHOLD               165
-#define INV_APPR_THRESHOLD                 149
-
-#define BINV_NEWTON_THRESHOLD              147
-#define REDC_1_TO_REDC_N_THRESHOLD          43
-
-#define MU_DIV_QR_THRESHOLD                777
-#define MU_DIVAPPR_Q_THRESHOLD             942
-#define MUPI_DIV_QR_THRESHOLD               69
-#define MU_BDIV_QR_THRESHOLD               654
-#define MU_BDIV_Q_THRESHOLD                777
-
-#define POWM_SEC_TABLE  3,32,126,692,1486
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     103
-#define HGCD_APPR_THRESHOLD                144
-#define HGCD_REDUCE_THRESHOLD             1437
-#define GCD_DC_THRESHOLD                   275
-#define GCDEXT_DC_THRESHOLD                206
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        20
-#define SET_STR_DC_THRESHOLD               532
-#define SET_STR_PRECOMPUTE_THRESHOLD       999
-
-#define FAC_DSC_THRESHOLD                  156
-#define FAC_ODD_THRESHOLD                   24
diff --git a/gmp/mpn/s390_32/esame/mul_1.asm b/gmp/mpn/s390_32/esame/mul_1.asm
deleted file mode 100644
index 04be963651..0000000000
--- a/gmp/mpn/s390_32/esame/mul_1.asm
+++ /dev/null
@@ -1,66 +0,0 @@
-dnl  S/390-32 mpn_mul_1 for systems with MLR instruction
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		14
-C z990		 9
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`v0',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	stm	%r11, %r12, 44(%r15)
-	lhi	%r12, 0			C zero index register
-	ahi	%r12, 0			C clear carry flag
-	lhi	%r11, 0			C clear carry limb
-
-L(top):	l	%r1, 0(%r12,up)
-	mlr	%r0, v0
-	alcr	%r1, %r11
-	lr	%r11, %r0		C copy high part to carry limb
-	st	%r1, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	n, L(top)
-
-	lhi	%r2, 0
-	alcr	%r2, %r11
-
-	lm	%r11, %r12, 44(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/mul_basecase.asm b/gmp/mpn/s390_32/esame/mul_basecase.asm
deleted file mode 100644
index 2c8138d8d2..0000000000
--- a/gmp/mpn/s390_32/esame/mul_basecase.asm
+++ /dev/null
@@ -1,130 +0,0 @@
-dnl  S/390-32/esame mpn_mul_basecase.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 ?
-C z990		 ?
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Perhaps add special case for un <= 2.
-C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
-C    up by about 10%.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`un',	`%r4')
-define(`vp',	`%r5')
-define(`vn',	`%r6')
-
-define(`zero',	`%r8')
-
-ASM_START()
-PROLOGUE(mpn_mul_basecase)
-	chi	un, 2
-	jhe	L(ge2)
-
-C un = vn = 1
-	l	%r1, 0(vp)
-	ml	%r0, 0(up)
-	st	%r1, 0(rp)
-	st	%r0, 4(rp)
-	br	%r14
-
-L(ge2):	C jne	L(gen)
-
-
-L(gen):
-C mul_1 =======================================================================
-
-	stm	%r6, %r12, 24(%r15)
-	lhi	zero, 0
-	ahi	un, -1
-
-	l	%r7, 0(vp)
-	l	%r11, 0(up)
-	lhi	%r12, 4			C init index register
-	mlr	%r10, %r7
-	lr	%r9, un
-	st	%r11, 0(rp)
-	cr	%r15, %r15		C clear carry flag
-
-L(tm):	l	%r1, 0(%r12,up)
-	mlr	%r0, %r7
-	alcr	%r1, %r10
-	lr	%r10, %r0		C copy high part to carry limb
-	st	%r1, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	%r9, L(tm)
-
-	alcr	%r0, zero
-	st	%r0, 0(%r12,rp)
-
-C addmul_1 loop ===============================================================
-
-	ahi	vn, -1
-	je	L(outer_end)
-L(outer_loop):
-
-	la	rp, 4(rp)		C rp += 1
-	la	vp, 4(vp)		C up += 1
-	l	%r7, 0(vp)
-	l	%r11, 0(up)
-	lhi	%r12, 4			C init index register
-	mlr	%r10, %r7
-	lr	%r9, un
-	al	%r11, 0(rp)
-	st	%r11, 0(rp)
-
-L(tam):	l	%r1, 0(%r12,up)
-	l	%r11, 0(%r12,rp)
-	mlr	%r0, %r7
-	alcr	%r1, %r11
-	alcr	%r0, zero
-	alr	%r1, %r10
-	lr	%r10, %r0
-	st	%r1, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	%r9, L(tam)
-
-	alcr	%r0, zero
-	st	%r0, 0(%r12,rp)
-
-	brct	vn, L(outer_loop)
-L(outer_end):
-
-	lm	%r6, %r12, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/sqr_basecase.asm b/gmp/mpn/s390_32/esame/sqr_basecase.asm
deleted file mode 100644
index dcc13112bf..0000000000
--- a/gmp/mpn/s390_32/esame/sqr_basecase.asm
+++ /dev/null
@@ -1,203 +0,0 @@
-dnl  S/390-32 mpn_sqr_basecase.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 ?
-C z990		23
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  * Clean up.
-C  * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
-C    This will ask for basecase handling of n = 3.
-C  * Update counters and pointers more straightforwardly, possibly lowering
-C    register usage.
-C  * Should we use this allocation-free style for more sqr_basecase asm
-C    implementations?  The only disadvantage is that it requires R != U.
-C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
-C    up by about 10%.  The sqr_diag_addlsh1 loop could probably be sped up even
-C    more.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-
-define(`zero',	`%r8')
-define(`rp_saved',	`%r9')
-define(`up_saved',	`%r13')
-define(`n_saved',	`%r14')
-
-ASM_START()
-PROLOGUE(mpn_sqr_basecase)
-	ahi	n, -2
-	jhe	L(ge2)
-
-C n = 1
-	l	%r5, 0(up)
-	mlr	%r4, %r5
-	st	%r5, 0(rp)
-	st	%r4, 4(rp)
-	br	%r14
-
-L(ge2):	jne	L(gen)
-
-C n = 2
-	stm	%r6, %r8, 24(%r15)
-	lhi	zero, 0
-
-	l	%r5, 0(up)
-	mlr	%r4, %r5		C u0 * u0
-	l	%r1, 4(up)
-	mlr	%r0, %r1		C u1 * u1
-	st	%r5, 0(rp)
-
-	l	%r7, 0(up)
-	ml	%r6, 4(up)		C u0 * u1
-	alr	%r7, %r7
-	alcr	%r6, %r6
-	alcr	%r0, zero
-
-	alr	%r4, %r7
-	alcr	%r1, %r6
-	alcr	%r0, zero
-	st	%r4, 4(rp)
-	st	%r1, 8(rp)
-	st	%r0, 12(rp)
-
-	lm	%r6, %r8, 24(%r15)
-	br	%r14
-
-L(gen):
-C mul_1 =======================================================================
-
-	stm	%r6, %r14, 24(%r15)
-	lhi	zero, 0
-	lr	up_saved, up
-	lr	rp_saved, rp
-	lr	n_saved, n
-
-	l	%r6, 0(up)
-	l	%r11, 4(up)
-	lhi	%r12, 8		C init index register
-	mlr	%r10, %r6
-	lr	%r5, n
-	st	%r11, 4(rp)
-	cr	%r15, %r15		C clear carry flag
-
-L(tm):	l	%r1, 0(%r12,up)
-	mlr	%r0, %r6
-	alcr	%r1, %r10
-	lr	%r10, %r0		C copy high part to carry limb
-	st	%r1, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	%r5, L(tm)
-
-	alcr	%r0, zero
-	st	%r0, 0(%r12,rp)
-
-C addmul_1 loop ===============================================================
-
-	ahi	n, -1
-	je	L(outer_end)
-L(outer_loop):
-
-	la	rp, 8(rp)		C rp += 2
-	la	up, 4(up)		C up += 1
-	l	%r6, 0(up)
-	l	%r11, 4(up)
-	lhi	%r12, 8		C init index register
-	mlr	%r10, %r6
-	lr	%r5, n
-	al	%r11, 4(rp)
-	st	%r11, 4(rp)
-
-L(tam):	l	%r1, 0(%r12,up)
-	l	%r7, 0(%r12,rp)
-	mlr	%r0, %r6
-	alcr	%r1, %r7
-	alcr	%r0, zero
-	alr	%r1, %r10
-	lr	%r10, %r0
-	st	%r1, 0(%r12,rp)
-	la	%r12, 4(%r12)
-	brct	%r5, L(tam)
-
-	alcr	%r0, zero
-	st	%r0, 0(%r12,rp)
-
-	brct	n, L(outer_loop)
-L(outer_end):
-
-	l	%r6, 4(up)
-	l	%r1, 8(up)
-	lr	%r7, %r0		C Same as: l %r7, 12(,rp)
-	mlr	%r0, %r6
-	alr	%r1, %r7
-	alcr	%r0, zero
-	st	%r1, 12(rp)
-	st	%r0, 16(rp)
-
-C sqr_dia_addlsh1 ============================================================
-
-define(`up', `up_saved')
-define(`rp', `rp_saved')
-	la	n, 1(n_saved)
-
-	l	%r1, 0(up)
-	mlr	%r0, %r1
-	st	%r1, 0(rp)
-C	clr	%r15, %r15		C clear carry (already clear per above)
-
-L(top):	l	%r11, 4(up)
-	la	up, 4(up)
-	l	%r6, 4(rp)
-	l	%r7, 8(rp)
-	mlr	%r10, %r11
-	alcr	%r6, %r6
-	alcr	%r7, %r7
-	alcr	%r10, zero		C propagate carry to high product limb
-	alr	%r6, %r0
-	alcr	%r7, %r11
-	stm	%r6, %r7, 4(rp)
-	la	rp, 8(rp)
-	lr	%r0, %r10		C copy carry limb
-	brct	n, L(top)
-
-	alcr	%r0, zero
-	st	%r0, 4(rp)
-
-	lm	%r6, %r14, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/esame/submul_1.asm b/gmp/mpn/s390_32/esame/submul_1.asm
deleted file mode 100644
index a71e57e230..0000000000
--- a/gmp/mpn/s390_32/esame/submul_1.asm
+++ /dev/null
@@ -1,70 +0,0 @@
-dnl  S/390-32 mpn_submul_1 for systems with MLR instruction.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		20
-C z990		11
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`v0',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	stm	%r9, %r12, 36(%r15)
-	lhi	%r12, 0
-	slr	%r11, %r11
-
-L(top):	l	%r1, 0(%r12, up)
-	l	%r10, 0(%r12, rp)
-	mlr	%r0, v0
-	slbr	%r10, %r1
-	slbr	%r9, %r9
-	slr	%r0, %r9		C conditional incr
-	slr	%r10, %r11
-	lr	%r11, %r0
-	st	%r10, 0(%r12, rp)
-	la	%r12, 4(%r12)
-	brct	%r4,  L(top)
-
-	lr	%r2, %r11
-	slbr	%r9, %r9
-	slr	%r2, %r9
-
-	lm	%r9, %r12, 36(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/gmp-mparam.h b/gmp/mpn/s390_32/gmp-mparam.h
deleted file mode 100644
index 1aca74a818..0000000000
--- a/gmp/mpn/s390_32/gmp-mparam.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 770 MHz IBM z900 running in 32-bit mode, using just traditional insns */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            5
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               5
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         15
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        30
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                19
-#define MUL_TOOM33_THRESHOLD               114
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               226
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     106
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
-
-#define SQR_BASECASE_THRESHOLD               7
-#define SQR_TOOM2_THRESHOLD                 40
-#define SQR_TOOM3_THRESHOLD                126
-#define SQR_TOOM4_THRESHOLD                192
-#define SQR_TOOM6_THRESHOLD                246
-#define SQR_TOOM8_THRESHOLD                357
-
-#define MULMID_TOOM42_THRESHOLD             28
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               18
-
-#define MUL_FFT_MODF_THRESHOLD             244  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    244, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {      8, 5}, {     17, 6}, {     13, 7}, {      7, 6}, \
-    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     19, 8}, \
-    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
-    {     33, 8}, {     19, 7}, {     39, 8}, {     23, 7}, \
-    {     47, 8}, {     27, 9}, {     15, 8}, {     39, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
-    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
-    {    143, 9}, {     79,10}, {     47,11}, {   2048,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 48
-#define MUL_FFT_THRESHOLD                 2688
-
-#define SQR_FFT_MODF_THRESHOLD             216  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    216, 5}, {      7, 4}, {     15, 5}, {     17, 6}, \
-    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
-    {     20, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
-    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
-    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 9}, {     15, 8}, {     39, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
-    {     63, 9}, {     39, 8}, {     79, 9}, {     47,10}, \
-    {     31, 9}, {     63, 8}, {    127, 9}, {     71, 8}, \
-    {    143, 9}, {     79,10}, {     47,11}, {   2048,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 44
-#define SQR_FFT_THRESHOLD                 1856
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  61
-#define MULLO_MUL_N_THRESHOLD             5240
-
-#define DC_DIV_QR_THRESHOLD                 70
-#define DC_DIVAPPR_Q_THRESHOLD             234
-#define DC_BDIV_QR_THRESHOLD                59
-#define DC_BDIV_Q_THRESHOLD                137
-
-#define INV_MULMOD_BNM1_THRESHOLD           36
-#define INV_NEWTON_THRESHOLD               327
-#define INV_APPR_THRESHOLD                 268
-
-#define BINV_NEWTON_THRESHOLD              324
-#define REDC_1_TO_REDC_N_THRESHOLD          63
-
-#define MU_DIV_QR_THRESHOLD               1099
-#define MU_DIVAPPR_Q_THRESHOLD            1360
-#define MUPI_DIV_QR_THRESHOLD              138
-#define MU_BDIV_QR_THRESHOLD               889
-#define MU_BDIV_Q_THRESHOLD               1234
-
-#define MATRIX22_STRASSEN_THRESHOLD         18
-#define HGCD_THRESHOLD                     167
-#define GCD_DC_THRESHOLD                   518
-#define GCDEXT_DC_THRESHOLD                378
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        25
-#define SET_STR_DC_THRESHOLD               577
-#define SET_STR_PRECOMPUTE_THRESHOLD      1217
diff --git a/gmp/mpn/s390_32/logops_n.asm b/gmp/mpn/s390_32/logops_n.asm
deleted file mode 100644
index 1f2cd2a8f6..0000000000
--- a/gmp/mpn/s390_32/logops_n.asm
+++ /dev/null
@@ -1,295 +0,0 @@
-dnl  S/390-32 logops.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb     variant 1           variant 2       variant 3
-C	        rp!=up  rp=up
-C z900		 ?	 ?		 ?		 ?
-C z990		 2.5	 1		 2.75		 2.75
-C z9		 ?			 ?		 ?
-C z10		 ?			 ?		 ?
-C z196		 ?			 ?		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`nn',	`%r5')
-
-ifdef(`OPERATION_and_n',`
-  define(`func',`mpn_and_n')
-  define(`VARIANT_1')
-  define(`LOGOPC',`nc')
-  define(`LOGOP',`n')')
-ifdef(`OPERATION_andn_n',`
-  define(`func',`mpn_andn_n')
-  define(`VARIANT_2')
-  define(`LOGOP',`n')')
-ifdef(`OPERATION_nand_n',`
-  define(`func',`mpn_nand_n')
-  define(`VARIANT_3')
-  define(`LOGOP',`n')')
-ifdef(`OPERATION_ior_n',`
-  define(`func',`mpn_ior_n')
-  define(`VARIANT_1')
-  define(`LOGOPC',`oc')
-  define(`LOGOP',`o')')
-ifdef(`OPERATION_iorn_n',`
-  define(`func',`mpn_iorn_n')
-  define(`VARIANT_2')
-  define(`LOGOP',`o')')
-ifdef(`OPERATION_nior_n',`
-  define(`func',`mpn_nior_n')
-  define(`VARIANT_3')
-  define(`LOGOP',`o')')
-ifdef(`OPERATION_xor_n',`
-  define(`func',`mpn_xor_n')
-  define(`VARIANT_1')
-  define(`LOGOPC',`xc')
-  define(`LOGOP',`x')')
-ifdef(`OPERATION_xnor_n',`
-  define(`func',`mpn_xnor_n')
-  define(`VARIANT_2')
-  define(`LOGOP',`x')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-ASM_START()
-PROLOGUE(func)
-ifdef(`VARIANT_1',`
-	cr	rp, up
-	jne	L(normal)
-
-	sll	nn, 2
-	ahi	nn, -1
-	lr	%r1, nn
-	srl	%r1, 8
-	ltr	%r1, %r1		C < 256 bytes to copy?
-	je	L(1)
-
-L(tp):	LOGOPC	0(256, rp), 0(vp)
-	la	rp, 256(rp)
-	la	vp, 256(vp)
-	brct	%r1, L(tp)
-
-L(1):	bras	%r1, L(2)		C make r1 point to mvc insn
-	LOGOPC	0(1, rp), 0(vp)
-L(2):	ex	nn, 0(%r1)		C execute mvc with length ((nn-1) mod 256)+1
-L(rtn):	br	%r14
-
-
-L(normal):
-	stm	%r6, %r8, 12(%r15)
-	ahi	nn, 3
-	lhi	%r7, 3
-	lr	%r0, nn
-	srl	%r0, 2
-	nr	%r7, nn			C nn mod 4
-	je	L(b1)
-	chi	%r7, 2
-	jl	L(b2)
-	jne	L(top)
-
-L(b3):	lm	%r5, %r7, 0(up)
-	la	up, 12(up)
-	LOGOP	%r5, 0(vp)
-	LOGOP	%r6, 4(vp)
-	LOGOP	%r7, 8(vp)
-	stm	%r5, %r7, 0(rp)
-	la	rp, 12(rp)
-	la	vp, 12(vp)
-	j	L(mid)
-
-L(b1):	l	%r5, 0(up)
-	la	up, 4(up)
-	LOGOP	%r5, 0(vp)
-	st	%r5, 0(rp)
-	la	rp, 4(rp)
-	la	vp, 4(vp)
-	j	L(mid)
-
-L(b2):	lm	%r5, %r6, 0(up)
-	la	up, 8(up)
-	LOGOP	%r5, 0(vp)
-	LOGOP	%r6, 4(vp)
-	stm	%r5, %r6, 0(rp)
-	la	rp, 8(rp)
-	la	vp, 8(vp)
-	j	L(mid)
-
-L(top):	lm	%r5, %r8, 0(up)
-	la	up, 16(up)
-	LOGOP	%r5, 0(vp)
-	LOGOP	%r6, 4(vp)
-	LOGOP	%r7, 8(vp)
-	LOGOP	%r8, 12(vp)
-	stm	%r5, %r8, 0(rp)
-	la	rp, 16(rp)
-	la	vp, 16(vp)
-L(mid):	brct	%r0, L(top)
-
-	lm	%r6, %r8, 12(%r15)
-	br	%r14
-')
-
-ifdef(`VARIANT_2',`
-	stm	%r6, %r8, 12(%r15)
-	lhi	%r1, -1
-
-	ahi	nn, 3
-	lhi	%r7, 3
-	lr	%r0, nn
-	srl	%r0, 2
-	nr	%r7, nn			C nn mod 4
-	je	L(b1)
-	chi	%r7, 2
-	jl	L(b2)
-	jne	L(top)
-
-L(b3):	lm	%r5, %r7, 0(vp)
-	la	vp, 12(vp)
-	xr	%r5, %r1
-	xr	%r6, %r1
-	xr	%r7, %r1
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 4(up)
-	LOGOP	%r7, 8(up)
-	stm	%r5, %r7, 0(rp)
-	la	rp, 12(rp)
-	la	up, 12(up)
-	j	L(mid)
-
-L(b1):	l	%r5, 0(vp)
-	la	vp, 4(vp)
-	xr	%r5, %r1
-	LOGOP	%r5, 0(up)
-	st	%r5, 0(rp)
-	la	rp, 4(rp)
-	la	up, 4(up)
-	j	L(mid)
-
-L(b2):	lm	%r5, %r6, 0(vp)
-	la	vp, 8(vp)
-	xr	%r5, %r1
-	xr	%r6, %r1
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 4(up)
-	stm	%r5, %r6, 0(rp)
-	la	rp, 8(rp)
-	la	up, 8(up)
-	j	L(mid)
-
-L(top):	lm	%r5, %r8, 0(vp)
-	la	vp, 16(vp)
-	xr	%r5, %r1
-	xr	%r6, %r1
-	xr	%r7, %r1
-	xr	%r8, %r1
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 4(up)
-	LOGOP	%r7, 8(up)
-	LOGOP	%r8, 12(up)
-	la	up, 16(up)
-	stm	%r5, %r8, 0(rp)
-	la	rp, 16(rp)
-L(mid):	brct	%r0, L(top)
-
-	lm	%r6, %r8, 12(%r15)
-	br	%r14
-')
-
-ifdef(`VARIANT_3',`
-	stm	%r6, %r8, 12(%r15)
-	lhi	%r1, -1
-
-	ahi	nn, 3
-	lhi	%r7, 3
-	lr	%r0, nn
-	srl	%r0, 2
-	nr	%r7, nn			C nn mod 4
-	je	L(b1)
-	chi	%r7, 2
-	jl	L(b2)
-	jne	L(top)
-
-L(b3):	lm	%r5, %r7, 0(vp)
-	la	vp, 12(vp)
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 4(up)
-	xr	%r5, %r1
-	xr	%r6, %r1
-	LOGOP	%r7, 8(up)
-	xr	%r7, %r1
-	stm	%r5, %r7, 0(rp)
-	la	rp, 12(rp)
-	la	up, 12(up)
-	j	L(mid)
-
-L(b1):	l	%r5, 0(vp)
-	la	vp, 4(vp)
-	LOGOP	%r5, 0(up)
-	xr	%r5, %r1
-	st	%r5, 0(rp)
-	la	rp, 4(rp)
-	la	up, 4(up)
-	j	L(mid)
-
-L(b2):	lm	%r5, %r6, 0(vp)
-	la	vp, 8(vp)
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 4(up)
-	xr	%r5, %r1
-	xr	%r6, %r1
-	stm	%r5, %r6, 0(rp)
-	la	rp, 8(rp)
-	la	up, 8(up)
-	j	L(mid)
-
-L(top):	lm	%r5, %r8, 0(vp)
-	la	vp, 16(vp)
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 4(up)
-	xr	%r5, %r1
-	xr	%r6, %r1
-	LOGOP	%r7, 8(up)
-	LOGOP	%r8, 12(up)
-	xr	%r7, %r1
-	xr	%r8, %r1
-	stm	%r5, %r8, 0(rp)
-	la	up, 16(up)
-	la	rp, 16(rp)
-L(mid):	brct	%r0, L(top)
-
-	lm	%r6, %r8, 12(%r15)
-	br	%r14
-')
-
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/lshift.asm b/gmp/mpn/s390_32/lshift.asm
deleted file mode 100644
index da7d76e844..0000000000
--- a/gmp/mpn/s390_32/lshift.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl  S/390-32 mpn_lshift.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 6
-C z990	         3
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  *
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`cnt',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_lshift)
-	lr	%r1, n
-	sll	%r1, 2
-	stm	%r6, %r12, 24(%r15)
-	la	up, 0(%r1,up)		C put up near end of U
-	la	rp, 0(%r1,rp)		C put rp near end of R
-	ahi	up, -20
-	ahi	rp, -16
-	lhi	%r8, 32
-	sr	%r8, cnt
-	l	%r12, 16(up)
-	srl	%r12, 0(%r8)		C return value
-	lhi	%r7, 3
-	nr	%r7, n
-	srl	n, 2
-	je	L(b0)
-	chi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	l	%r10, 16(up)
-	l	%r11, 12(up)
-	l	%r9,   8(up)
-	ahi	up, -8
-	lr	%r8, %r11
-	sldl	%r10, 0(cnt)
-	sldl	%r8,  0(cnt)
-	st	%r10, 12(rp)
-	st	%r8,   8(rp)
-	ahi	rp, -8
-	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b2):	l	%r10, 16(up)
-	l	%r11, 12(up)
-	ahi	up, -4
-	sldl	%r10, 0(cnt)
-	st	%r10, 12(rp)
-	ahi	rp, -4
-	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b1):	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b0):	l	%r10,16(up)
-	l	%r8, 12(up)
-	l	%r6,  8(up)
-	l	%r0,  4(up)
-	ahi	up, -12
-	lr	%r11, %r8
-	lr	%r9,  %r6
-	lr	%r7,  %r0
-	sldl	%r10,0(cnt)
-	sldl	%r8, 0(cnt)
-	sldl	%r6, 0(cnt)
-	st	%r10, 12(rp)
-	st	%r8,   8(rp)
-	st	%r6,   4(rp)
-	ahi	rp, -12
-	ahi	n, -1
-	je	L(end)
-
-	ALIGN(8)
-L(top):	l	%r10, 16(up)
-	l	%r8,  12(up)
-	l	%r6,   8(up)
-	l	%r0,   4(up)
-	l	%r1,   0(up)
-	lr	%r11, %r8
-	lr	%r9,  %r6
-	lr	%r7,  %r0
-	ahi	up, -16
-	sldl	%r10, 0(cnt)
-	sldl	%r8,  0(cnt)
-	sldl	%r6,  0(cnt)
-	sldl	%r0,  0(cnt)
-	st	%r10, 12(rp)
-	st	%r8,   8(rp)
-	st	%r6,   4(rp)
-	st	%r0,   0(rp)
-	ahi	rp, -16
-	brct	n, L(top)
-
-L(end):	l	%r10, 16(up)
-	sll	%r10, 0(cnt)
-	st	%r10, 12(rp)
-
-	lr	%r2, %r12
-	lm	%r6, %r12, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/lshiftc.asm b/gmp/mpn/s390_32/lshiftc.asm
deleted file mode 100644
index f601673249..0000000000
--- a/gmp/mpn/s390_32/lshiftc.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl  S/390-32 mpn_lshiftc.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 7
-C z990	         3.375
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  *
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`cnt',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_lshiftc)
-	lr	%r1, n
-	sll	%r1, 2
-	stm	%r6, %r13, 24(%r15)
-	la	up, 0(%r1,up)		C put up near end of U
-	la	rp, 0(%r1,rp)		C put rp near end of R
-	ahi	up, -20
-	ahi	rp, -16
-	lhi	%r8, 32
-	sr	%r8, cnt
-	l	%r12, 16(up)
-	srl	%r12, 0(%r8)		C return value
-	lhi	%r13, -1
-	lhi	%r7, 3
-	nr	%r7, n
-	srl	n, 2
-	je	L(b0)
-	chi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	l	%r10, 16(up)
-	l	%r11, 12(up)
-	l	%r9,   8(up)
-	ahi	up, -8
-	lr	%r8, %r11
-	sldl	%r10, 0(cnt)
-	sldl	%r8,  0(cnt)
-	xr	%r10, %r13
-	xr	%r8, %r13
-	st	%r10, 12(rp)
-	st	%r8,   8(rp)
-	ahi	rp, -8
-	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b2):	l	%r10, 16(up)
-	l	%r11, 12(up)
-	ahi	up, -4
-	sldl	%r10, 0(cnt)
-	xr	%r10, %r13
-	st	%r10, 12(rp)
-	ahi	rp, -4
-	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b1):	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b0):	l	%r10,16(up)
-	l	%r8, 12(up)
-	l	%r6,  8(up)
-	l	%r0,  4(up)
-	ahi	up, -12
-	lr	%r11, %r8
-	lr	%r9,  %r6
-	lr	%r7,  %r0
-	sldl	%r10,0(cnt)
-	sldl	%r8, 0(cnt)
-	sldl	%r6, 0(cnt)
-	xr	%r10, %r13
-	xr	%r8, %r13
-	xr	%r6, %r13
-	st	%r10, 12(rp)
-	st	%r8,   8(rp)
-	st	%r6,   4(rp)
-	ahi	rp, -12
-	ahi	n, -1
-	je	L(end)
-
-	ALIGN(8)
-L(top):	l	%r10, 16(up)
-	l	%r8,  12(up)
-	l	%r6,   8(up)
-	l	%r0,   4(up)
-	l	%r1,   0(up)
-	lr	%r11, %r8
-	lr	%r9,  %r6
-	lr	%r7,  %r0
-	ahi	up, -16
-	sldl	%r10, 0(cnt)
-	sldl	%r8,  0(cnt)
-	sldl	%r6,  0(cnt)
-	sldl	%r0,  0(cnt)
-	xr	%r10, %r13
-	xr	%r8, %r13
-	xr	%r6, %r13
-	xr	%r0, %r13
-	st	%r10, 12(rp)
-	st	%r8,   8(rp)
-	st	%r6,   4(rp)
-	st	%r0,   0(rp)
-	ahi	rp, -16
-	brct	n, L(top)
-
-L(end):	l	%r10, 16(up)
-	sll	%r10, 0(cnt)
-	xr	%r10, %r13
-	st	%r10, 12(rp)
-
-	lr	%r2, %r12
-	lm	%r6, %r13, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_32/rshift.asm b/gmp/mpn/s390_32/rshift.asm
deleted file mode 100644
index 5f2cf37ca0..0000000000
--- a/gmp/mpn/s390_32/rshift.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-dnl  S/390-32 mpn_rshift.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 6
-C z990	         3
-C z9		 ?
-C z10		 ?
-C z196		 ?
-
-C TODO
-C  *
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`cnt',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_rshift)
-	stm	%r6, %r12, 24(%r15)
-	lhi	%r8, 32
-	sr	%r8, cnt
-	l	%r12, 0(up)
-	sll	%r12, 0(%r8)		C return value
-	lhi	%r7, 3
-	nr	%r7, n
-	srl	n, 2
-	je	L(b0)
-	chi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	l	%r11, 0(up)
-	l	%r10, 4(up)
-	l	%r8,  8(up)
-	ahi	up, 8
-	lr	%r9, %r10
-	srdl	%r10, 0(cnt)
-	srdl	%r8,  0(cnt)
-	st	%r11, 0(rp)
-	st	%r9,  4(rp)
-	ahi	rp, 8
-	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b2):	l	%r11, 0(up)
-	l	%r10, 4(up)
-	ahi	up, 4
-	srdl	%r10, 0(cnt)
-	st	%r11, 0(rp)
-	ahi	rp, 4
-	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b1):	ltr	n, n
-	je	L(end)
-	j	L(top)
-
-L(b0):	l	%r11, 0(up)
-	l	%r9,  4(up)
-	l	%r7,  8(up)
-	l	%r1, 12(up)
-	ahi	up, 12
-	lr	%r10, %r9
-	lr	%r8,  %r7
-	lr	%r6,  %r1
-	srdl	%r10, 0(cnt)
-	srdl	%r8,  0(cnt)
-	srdl	%r6,  0(cnt)
-	st	%r11, 0(rp)
-	st	%r9,  4(rp)
-	st	%r7,  8(rp)
-	ahi	rp, 12
-	ahi	n, -1
-	je	L(end)
-
-	ALIGN(8)
-L(top):	l	%r11, 0(up)
-	l	%r9,  4(up)
-	l	%r7,  8(up)
-	l	%r1, 12(up)
-	l	%r0, 16(up)
-	lr	%r10, %r9
-	lr	%r8,  %r7
-	lr	%r6,  %r1
-	ahi	up, 16
-	srdl	%r10, 0(cnt)
-	srdl	%r8,  0(cnt)
-	srdl	%r6,  0(cnt)
-	srdl	%r0,  0(cnt)
-	st	%r11, 0(rp)
-	st	%r9,  4(rp)
-	st	%r7,  8(rp)
-	st	%r1, 12(rp)
-	ahi	rp, 16
-	brct	n, L(top)
-
-L(end):	l	%r11, 0(up)
-	srl	%r11, 0(cnt)
-	st	%r11, 0(rp)
-
-	lr	%r2, %r12
-	lm	%r6, %r12, 24(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/README b/gmp/mpn/s390_64/README
deleted file mode 100644
index 8f482a9cd2..0000000000
--- a/gmp/mpn/s390_64/README
+++ /dev/null
@@ -1,88 +0,0 @@
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
-
-
-
-There are 5 generations of 64-but s390 processors, z900, z990, z9,
-z10, and z196.  The current GMP code was optimised for the two oldest,
-z900 and z990.
-
-
-mpn_copyi
-
-This code makes use of a loop around MVC.  It almost surely runs very
-close to optimally.  A small improvement could be done by using one
-MVC for size 256 bytes, now we use two (we use an extra MVC when
-copying any multiple of 256 bytes).
-
-
-mpn_copyd
-
-We have tried several feed-in variants here, branch tree, jump table
-and computed goto.  The fastest (on z990) turned out to be computed
-goto.
-
-An approach not tried is EX of LMG and STMG, modifying the register set
-on-the-fly.  Using that trick, we could completely avoid using
-separate feed-in paths.
-
-
-mpn_lshift, mpn_rshift
-
-The current code runs at pipeline decode bandwith on z990.
-
-
-mpn_add_n, mpn_sub_n
-
-The current code is 4-way unrolled.  It should be unrolled more, at
-least 8x, in order to reach 2.5 c/l.
-
-
-mpn_mul_1, mpn_addmul_1, mpn_submul_1
-
-The current code is very naive, but due to the non-pipelined nature of
-MLGR on z900 and z990, more sophisticated code would not gain much.
-
-On z10 one would need to cluster at least 4 MLGR together, in order to
-reduce stalling.
-
-On z196, one surely want to use unrolling and pipelining, to perhaps
-reach around 12 c/l.  A major issue here and on z10 is ALCGR's 3 cycle
-stalling.
-
-
-mpn_mul_2, mpn_addmul_2
-
-At least for older machines (z900, z990) with very slow MLGR, we
-should use Karatsuba's algorithm on 2-limb units, making mul_2 and
-addmul_2 the main multiplication primitives.  The newer machines might
-benefit less from this approach, perhaps in particular z10, where MLGR
-clustering is more important.
-
-With Karatsuba, one could hope for around 16 cycles per accumulated
-128 cross product, on z990.
diff --git a/gmp/mpn/s390_64/addmul_1.asm b/gmp/mpn/s390_64/addmul_1.asm
deleted file mode 100644
index 84cca12361..0000000000
--- a/gmp/mpn/s390_64/addmul_1.asm
+++ /dev/null
@@ -1,72 +0,0 @@
-dnl  S/390-64 mpn_addmul_1
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		34
-C z990		23
-C z9		 ?
-C z10		28
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`v0',	`%r5')
-
-define(`z',	`%r9')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	stmg	%r9, %r12, 72(%r15)
-	lghi	%r12, 0			C zero index register
-	aghi	%r12, 0			C clear carry flag
-	lghi	%r11, 0			C clear carry limb
-	lghi	z, 0			C keep register zero
-
-L(top):	lg	%r1, 0(%r12,up)
-	lg	%r10, 0(%r12,rp)
-	mlgr	%r0, v0
-	alcgr	%r1, %r10
-	alcgr	%r0, z
-	algr	%r1, %r11
-	lgr	%r11, %r0
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
-	brctg	n, L(top)
-
-	lghi	%r2, 0
-	alcgr	%r2, %r11
-
-	lmg	%r9, %r12, 72(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/aorrlsh1_n.asm b/gmp/mpn/s390_64/aorrlsh1_n.asm
deleted file mode 100644
index 697259efef..0000000000
--- a/gmp/mpn/s390_64/aorrlsh1_n.asm
+++ /dev/null
@@ -1,168 +0,0 @@
-dnl  S/390-64 mpn_addlsh1_n and mpn_rsblsh1_n.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 9
-C z990		 4.75
-C z9		 ?
-C z10		11
-C z196		 ?
-
-C TODO
-C  * Optimise for small n, avoid 'la' like in aors_n.asm.
-C  * Tune to reach 3.5 c/l.  For addlsh1, we could let the main alcgr propagate
-C    carry to the lsh1 alcgr.
-C  * Compute RETVAL for sublsh1_n less stupidly.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_addlsh1_n',`
-  define(ADSB,		alg)
-  define(ADSBC,		alcg)
-  define(INITCY,	`lghi	%r9, -1')
-  define(RETVAL,	`la	%r2, 2(%r1,%r9)')
-  define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_rsblsh1_n',`
-  define(ADSB,		slg)
-  define(ADSBC,		slbg)
-  define(INITCY,	`lghi	%r9, 0')
-  define(RETVAL,`dnl
-	algr	%r1, %r9
-	lghi	%r2, 1
-	algr	%r2, %r1')
-  define(func, mpn_rsblsh1_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
-
-ASM_START()
-PROLOGUE(func)
-	stmg	%r6, %r9, 48(%r15)
-
-	aghi	n, 3
-	lghi	%r7, 3
-	srlg	%r0, n, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b1)
-	cghi	%r7, 2
-	jl	L(b2)
-	jne	L(b0)
-
-L(b3):	lmg	%r5, %r7, 0(vp)
-	la	vp, 24(vp)
-
-	algr	%r5, %r5
-	alcgr	%r6, %r6
-	alcgr	%r7, %r7
-	slbgr	%r1, %r1
-
-	ADSB	%r5, 0(up)
-	ADSBC	%r6, 8(up)
-	ADSBC	%r7, 16(up)
-	la	up, 24(up)
-	slbgr	%r9, %r9
-
-	stmg	%r5, %r7, 0(rp)
-	la	rp, 24(rp)
-	brctg	%r0, L(top)
-	j	L(end)
-
-L(b0):	lghi	%r1, -1
-	INITCY
-	j	L(top)
-
-L(b1):	lg	%r5, 0(vp)
-	la	vp, 8(vp)
-
-	algr	%r5, %r5
-	slbgr	%r1, %r1
-	ADSB	%r5, 0(up)
-	la	up, 8(up)
-	slbgr	%r9, %r9
-
-	stg	%r5, 0(rp)
-	la	rp, 8(rp)
-	brctg	%r0, L(top)
-	j	L(end)
-
-L(b2):	lmg	%r5, %r6, 0(vp)
-	la	vp, 16(vp)
-
-	algr	%r5, %r5
-	alcgr	%r6, %r6
-	slbgr	%r1, %r1
-
-	ADSB	%r5, 0(up)
-	ADSBC	%r6, 8(up)
-	la	up, 16(up)
-	slbgr	%r9, %r9
-
-	stmg	%r5, %r6, 0(rp)
-	la	rp, 16(rp)
-	brctg	%r0, L(top)
-	j	L(end)
-
-L(top):	lmg	%r5, %r8, 0(vp)
-	la	vp, 32(vp)
-
-	aghi	%r1, 1			C restore carry
-
-	alcgr	%r5, %r5
-	alcgr	%r6, %r6
-	alcgr	%r7, %r7
-	alcgr	%r8, %r8
-
-	slbgr	%r1, %r1		C save carry
-
-	aghi	%r9, 1			C restore carry
-
-	ADSBC	%r5, 0(up)
-	ADSBC	%r6, 8(up)
-	ADSBC	%r7, 16(up)
-	ADSBC	%r8, 24(up)
-	la	up, 32(up)
-
-	slbgr	%r9, %r9		C save carry
-
-	stmg	%r5, %r8, 0(rp)
-	la	rp, 32(rp)
-	brctg	%r0, L(top)
-
-L(end):	RETVAL
-	lmg	%r6, %r9, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/aors_n.asm b/gmp/mpn/s390_64/aors_n.asm
deleted file mode 100644
index a3c3ca791c..0000000000
--- a/gmp/mpn/s390_64/aors_n.asm
+++ /dev/null
@@ -1,136 +0,0 @@
-dnl  S/390-64 mpn_add_n and mpn_sub_n.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 5.5
-C z990		 3
-C z9		 ?
-C z10		 6
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Use r0 and save/restore one less register
-C  * Using logops_n's v1 inner loop operand order make the loop about 20%
-C    faster, at the expense of highly alignment-dependent performance.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_add_n', `
-  define(ADSB,		alg)
-  define(ADSBCR,	alcgr)
-  define(ADSBC,		alcg)
-  define(RETVAL,`dnl
-	lghi	%r2, 0
-	alcgr	%r2, %r2')
-  define(func,		mpn_add_n)
-  define(func_nc,	mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-  define(ADSB,		slg)
-  define(ADSBCR,	slbgr)
-  define(ADSBC,		slbg)
-  define(RETVAL,`dnl
-	slbgr	%r2, %r2
-	lcgr	%r2, %r2')
-  define(func,		mpn_sub_n)
-  define(func_nc,	mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
-
-ASM_START()
-PROLOGUE(func)
-	stmg	%r6, %r8, 48(%r15)
-
-	aghi	n, 3
-	lghi	%r7, 3
-	srlg	%r1, n, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b1)
-	cghi	%r7, 2
-	jl	L(b2)
-	jne	L(b0)
-
-L(b3):	lmg	%r5, %r7, 0(up)
-	la	up, 24(up)
-	ADSB	%r5, 0(vp)
-	ADSBC	%r6, 8(vp)
-	ADSBC	%r7, 16(vp)
-	la	vp, 24(vp)
-	stmg	%r5, %r7, 0(rp)
-	la	rp, 24(rp)
-	brctg	%r1, L(top)
-	j	L(end)
-
-L(b0):	lmg	%r5, %r8, 0(up)		C This redundant insns is no mistake,
-	la	up, 32(up)		C it is needed to make main loop run
-	ADSB	%r5, 0(vp)		C fast for n = 0 (mod 4).
-	ADSBC	%r6, 8(vp)
-	j	L(m0)
-
-L(b1):	lg	%r5, 0(up)
-	la	up, 8(up)
-	ADSB	%r5, 0(vp)
-	la	vp, 8(vp)
-	stg	%r5, 0(rp)
-	la	rp, 8(rp)
-	brctg	%r1, L(top)
-	j	L(end)
-
-L(b2):	lmg	%r5, %r6, 0(up)
-	la	up, 16(up)
-	ADSB	%r5, 0(vp)
-	ADSBC	%r6, 8(vp)
-	la	vp, 16(vp)
-	stmg	%r5, %r6, 0(rp)
-	la	rp, 16(rp)
-	brctg	%r1, L(top)
-	j	L(end)
-
-L(top):	lmg	%r5, %r8, 0(up)
-	la	up, 32(up)
-	ADSBC	%r5, 0(vp)
-	ADSBC	%r6, 8(vp)
-L(m0):	ADSBC	%r7, 16(vp)
-	ADSBC	%r8, 24(vp)
-	la	vp, 32(vp)
-	stmg	%r5, %r8, 0(rp)
-	la	rp, 32(rp)
-	brctg	%r1, L(top)
-
-L(end):	RETVAL
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/bdiv_dbm1c.asm b/gmp/mpn/s390_64/bdiv_dbm1c.asm
deleted file mode 100644
index 35e900a279..0000000000
--- a/gmp/mpn/s390_64/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,65 +0,0 @@
-dnl  S/390-64 mpn_bdiv_dbm1c
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		29
-C z990		22
-C z9		 ?
-C z10		19
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`qp',	  `%r2')
-define(`up',	  `%r3')
-define(`n',	  `%r4')
-define(`bd',	  `%r5')
-define(`cy',	  `%r6')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_dbm1c)
-	stmg	%r6, %r7, 48(%r15)
-	lghi	%r7, 0			C zero index register
-
-L(top):	lg	%r1, 0(%r7,up)
-	mlgr	%r0, bd
-	slgr	%r6, %r1
-	stg	%r6, 0(%r7,qp)
-	la	%r7, 8(%r7)
-	slbgr	%r6, %r0
-	brctg	n, L(top)
-
-	lgr	%r2, %r6
-	lmg	%r6, %r7, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/copyd.asm b/gmp/mpn/s390_64/copyd.asm
deleted file mode 100644
index 8631e19f00..0000000000
--- a/gmp/mpn/s390_64/copyd.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl  S/390-64 mpn_copyd
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 2.67
-C z990           1.5
-C z9		 ?
-C z10		 1.8
-C z196		 ?
-
-C FIXME:
-C  * Avoid saving/restoring callee-saves registers for n < 3.  This could be
-C    done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
-C    We could then use r3...r10 in main loop.
-C  * Could we use some EX trick, modifying lmg/stmg, for the feed-in code?
-
-C INPUT PARAMETERS
-define(`rp_param',	`%r2')
-define(`up_param',	`%r3')
-define(`n',		`%r4')
-
-define(`rp',	`%r8')
-define(`up',	`%r9')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
-	stmg	%r6, %r11, 48(%r15)
-
-	sllg	%r1, n, 3
-	la	%r10, 8(n)
-	aghi	%r1, -64
-	srlg	%r10, %r10, 3
-	lghi	%r11, -64
-
-	la	rp, 0(%r1,rp_param)	C FIXME use lay on z990 and later
-	la	up, 0(%r1,up_param)	C FIXME use lay on z990 and later
-
-	lghi	%r7, 7
-	ngr	%r7, n			C n mod 8
-	cghi	%r7, 2
-	jh	L(b34567)
-	cghi	%r7, 1
-	je	L(b1)
-	jh	L(b2)
-
-L(b0):	brctg	%r10, L(top)
-	j	L(end)
-
-L(b1):	lg	%r0, 56(up)
-	aghi	up, -8
-	stg	%r0, 56(rp)
-	aghi	rp, -8
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(b2):	lmg	%r0, %r1, 48(up)
-	aghi	up, -16
-	stmg	%r0, %r1, 48(rp)
-	aghi	rp, -16
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(b34567):
-	cghi	%r7, 4
-	jl	L(b3)
-	je	L(b4)
-	cghi	%r7, 6
-	je	L(b6)
-	jh	L(b7)
-
-L(b5):	lmg	%r0, %r4, 24(up)
-	aghi	up, -40
-	stmg	%r0, %r4, 24(rp)
-	aghi	rp, -40
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(b3):	lmg	%r0, %r2, 40(up)
-	aghi	up, -24
-	stmg	%r0, %r2, 40(rp)
-	aghi	rp, -24
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(b4):	lmg	%r0, %r3, 32(up)
-	aghi	up, -32
-	stmg	%r0, %r3, 32(rp)
-	aghi	rp, -32
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(b6):	lmg	%r0, %r5, 16(up)
-	aghi	up, -48
-	stmg	%r0, %r5, 16(rp)
-	aghi	rp, -48
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(b7):	lmg	%r0, %r6, 8(up)
-	aghi	up, -56
-	stmg	%r0, %r6, 8(rp)
-	aghi	rp, -56
-	brctg	%r10, L(top)
-	j	L(end)
-
-L(top):	lmg	%r0, %r7, 0(up)
-	la	up, 0(%r11,up)
-	stmg	%r0, %r7, 0(rp)
-	la	rp, 0(%r11,rp)
-	brctg	%r10, L(top)
-
-L(end):	lmg	%r6, %r11, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/copyi.asm b/gmp/mpn/s390_64/copyi.asm
deleted file mode 100644
index bfb88814ea..0000000000
--- a/gmp/mpn/s390_64/copyi.asm
+++ /dev/null
@@ -1,68 +0,0 @@
-dnl  S/390-64 mpn_copyi
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 1.25
-C z990           0.75
-C z9		 ?
-C z10		 1
-C z196		 ?
-
-C NOTE
-C  * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
-	ltgr	%r4, %r4
-	sllg	%r4, %r4, 3
-	je	L(rtn)
-	aghi	%r4, -1
-	srlg	%r5, %r4, 8
-	ltgr	%r5, %r5		C < 256 bytes to copy?
-	je	L(1)
-
-L(top):	mvc	0(256, rp), 0(up)
-	la	rp, 256(rp)
-	la	up, 256(up)
-	brctg	%r5, L(top)
-
-L(1):	bras	%r5, L(2)		C make r5 point to mvc insn
-	mvc	0(1, rp), 0(up)
-L(2):	ex	%r4, 0(%r5)		C execute mvc with length ((n-1) mod 256)+1
-L(rtn):	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/gmp-mparam.h b/gmp/mpn/s390_64/gmp-mparam.h
deleted file mode 100644
index dacd9966a4..0000000000
--- a/gmp/mpn/s390_64/gmp-mparam.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* S/390-64 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 1200 MHz z990 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        62
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     17
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           98
-
-#define MUL_TOOM22_THRESHOLD                10
-#define MUL_TOOM33_THRESHOLD                41
-#define MUL_TOOM44_THRESHOLD               105
-#define MUL_TOOM6H_THRESHOLD               149
-#define MUL_TOOM8H_THRESHOLD               212
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      72
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      64
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      55
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 16
-#define SQR_TOOM3_THRESHOLD                 57
-#define SQR_TOOM4_THRESHOLD                153
-#define SQR_TOOM6_THRESHOLD                204
-#define SQR_TOOM8_THRESHOLD                309
-
-#define MULMID_TOOM42_THRESHOLD             20
-
-#define MULMOD_BNM1_THRESHOLD               10
-#define SQRMOD_BNM1_THRESHOLD               11
-
-#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    220, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     19, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
-    {     15,10}, {     31, 9}, {     63,10}, {     39, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,12}, {     31,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
-    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
-    {    319,10}, {    175, 9}, {    351, 8}, {    703, 7}, \
-    {   1407,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207,11}, {    111,10}, {    223,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,11}, {    143,10}, \
-    {    287, 9}, {    575, 8}, {   1151,10}, {    319,11}, \
-    {    175,10}, {    351, 9}, {    703,12}, {     95,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
-    {    415,11}, {    223,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 97
-#define MUL_FFT_THRESHOLD                 1728
-
-#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    212, 5}, {      7, 4}, {     15, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     19, 9}, {     11, 8}, \
-    {     25,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
-    {     19, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
-    {     39,10}, {     23,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,12}, {     31,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    511,10}, \
-    {    143, 9}, {    287,11}, {     79,10}, {    159, 9}, \
-    {    319, 8}, {    639,10}, {    175, 9}, {    351, 8}, \
-    {    703,10}, {    191, 9}, {    383, 8}, {    767,10}, \
-    {    207, 9}, {    415,11}, {    111,10}, {    223,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1151,11}, \
-    {    159,10}, {    319,11}, {    175,10}, {    351, 9}, \
-    {    703,11}, {    191,10}, {    383,11}, {    207,10}, \
-    {    415,11}, {    223,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 93
-#define SQR_FFT_THRESHOLD                 1600
-
-#define MULLO_BASECASE_THRESHOLD             2
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             3176
-
-#define DC_DIV_QR_THRESHOLD                 28
-#define DC_DIVAPPR_Q_THRESHOLD             107
-#define DC_BDIV_QR_THRESHOLD                31
-#define DC_BDIV_Q_THRESHOLD                 78
-
-#define INV_MULMOD_BNM1_THRESHOLD           43
-#define INV_NEWTON_THRESHOLD               129
-#define INV_APPR_THRESHOLD                 117
-
-#define BINV_NEWTON_THRESHOLD              149
-#define REDC_1_TO_REDC_N_THRESHOLD          38
-
-#define MU_DIV_QR_THRESHOLD                748
-#define MU_DIVAPPR_Q_THRESHOLD             748
-#define MUPI_DIV_QR_THRESHOLD               65
-#define MU_BDIV_QR_THRESHOLD               562
-#define MU_BDIV_Q_THRESHOLD                734
-
-#define POWM_SEC_TABLE  4,23,274,961,2783
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                      79
-#define HGCD_APPR_THRESHOLD                 70
-#define HGCD_REDUCE_THRESHOLD             1094
-#define GCD_DC_THRESHOLD                   183
-#define GCDEXT_DC_THRESHOLD                148
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                30
-#define GET_STR_PRECOMPUTE_THRESHOLD        41
-#define SET_STR_DC_THRESHOLD               402
-#define SET_STR_PRECOMPUTE_THRESHOLD      1104
-
-#define FAC_DSC_THRESHOLD                  842
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/s390_64/invert_limb.asm b/gmp/mpn/s390_64/invert_limb.asm
deleted file mode 100644
index edcebddf1c..0000000000
--- a/gmp/mpn/s390_64/invert_limb.asm
+++ /dev/null
@@ -1,94 +0,0 @@
-dnl  S/390-64 mpn_invert_limb
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900	       142
-C z990          86
-C z9		 ?
-C z10	       120
-C z196		 ?
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_invert_limb)
-	stg	%r9, 72(%r15)
-	srlg	%r9, %r2, 55
-	agr	%r9, %r9
-	larl	%r4, approx_tab-512
-	srlg	%r3, %r2, 24
-	aghi	%r3, 1
-	lghi	%r5, 1
-	llgh	%r4, 0(%r9, %r4)
-	sllg	%r9, %r4, 11
-	msgr	%r4, %r4
-	msgr	%r4, %r3
-	srlg	%r4, %r4, 40
-	aghi	%r9, -1
-	sgr	%r9, %r4
-	sllg	%r0, %r9, 60
-	sllg	%r1, %r9, 13
-	msgr	%r9, %r9
-	msgr	%r9, %r3
-	sgr	%r0, %r9
-	ngr	%r5, %r2
-	srlg	%r4, %r2, 1
-	srlg	%r3, %r0, 47
-	agr	%r3, %r1
-	agr	%r4, %r5
-	msgr	%r4, %r3
-	srlg	%r1, %r3, 1
-	lcgr	%r5, %r5
-	ngr	%r1, %r5
-	sgr	%r1, %r4
-	mlgr	%r0, %r3
-	srlg	%r9, %r0, 1
-	sllg	%r4, %r3, 31
-	agr	%r4, %r9
-	lgr	%r1, %r4
-	mlgr	%r0, %r2
-	algr	%r1, %r2
-	alcgr	%r0, %r2
-	lgr	%r2, %r4
-	sgr	%r2, %r0
-	lg	%r9, 72(%r15)
-	br	%r14
-EPILOGUE()
-	RODATA
-	ALIGN(2)
-approx_tab:
-forloop(i,256,512-1,dnl
-`	.word	eval(0x7fd00/i)
-')dnl
-ASM_END()
diff --git a/gmp/mpn/s390_64/logops_n.asm b/gmp/mpn/s390_64/logops_n.asm
deleted file mode 100644
index 914cfb6a41..0000000000
--- a/gmp/mpn/s390_64/logops_n.asm
+++ /dev/null
@@ -1,291 +0,0 @@
-dnl  S/390-64 logops.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb     variant 1           variant 2       variant 3
-C	        rp!=up  rp=up
-C z900		 4.5	 2.25		 5.5		 5.5
-C z990		 2.75	 2		 3.25		 3.25
-C z9		 ?			 ?		 ?
-C z10		 3.25			 3.75		 3.75
-C z196		 ?			 ?		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_and_n',`
-  define(`func',`mpn_and_n')
-  define(`VARIANT_1')
-  define(`LOGOPC',`nc')
-  define(`LOGOP',`ng')')
-ifdef(`OPERATION_andn_n',`
-  define(`func',`mpn_andn_n')
-  define(`VARIANT_2')
-  define(`LOGOP',`ng')')
-ifdef(`OPERATION_nand_n',`
-  define(`func',`mpn_nand_n')
-  define(`VARIANT_3')
-  define(`LOGOP',`ng')')
-ifdef(`OPERATION_ior_n',`
-  define(`func',`mpn_ior_n')
-  define(`VARIANT_1')
-  define(`LOGOPC',`oc')
-  define(`LOGOP',`og')')
-ifdef(`OPERATION_iorn_n',`
-  define(`func',`mpn_iorn_n')
-  define(`VARIANT_2')
-  define(`LOGOP',`og')')
-ifdef(`OPERATION_nior_n',`
-  define(`func',`mpn_nior_n')
-  define(`VARIANT_3')
-  define(`LOGOP',`og')')
-ifdef(`OPERATION_xor_n',`
-  define(`func',`mpn_xor_n')
-  define(`VARIANT_1')
-  define(`LOGOPC',`xc')
-  define(`LOGOP',`xg')')
-ifdef(`OPERATION_xnor_n',`
-  define(`func',`mpn_xnor_n')
-  define(`VARIANT_2')
-  define(`LOGOP',`xg')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-ASM_START()
-PROLOGUE(func)
-ifdef(`VARIANT_1',`
-	cgr	rp, up
-	jne	L(normal)
-
-	sllg	n, n, 3
-	aghi	n, -1
-	srlg	%r1, n, 8
-	ltgr	%r1, %r1		C < 256 bytes to copy?
-	je	L(1)
-
-L(tp):	LOGOPC	0(256, rp), 0(vp)
-	la	rp, 256(rp)
-	la	vp, 256(vp)
-	brctg	%r1, L(tp)
-
-L(1):	bras	%r1, L(2)		C make r1 point to mvc insn
-	LOGOPC	0(1, rp), 0(vp)
-L(2):	ex	n, 0(%r1)		C execute mvc with length ((n-1) mod 256)+1
-L(rtn):	br	%r14
-
-
-L(normal):
-	stmg	%r6, %r8, 48(%r15)
-	aghi	n, 3
-	lghi	%r7, 3
-	srlg	%r0, n, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b1)
-	cghi	%r7, 2
-	jl	L(b2)
-	jne	L(top)
-
-L(b3):	lmg	%r5, %r7, 0(up)
-	la	up, 24(up)
-	LOGOP	%r5, 0(vp)
-	LOGOP	%r6, 8(vp)
-	LOGOP	%r7, 16(vp)
-	stmg	%r5, %r7, 0(rp)
-	la	rp, 24(rp)
-	la	vp, 24(vp)
-	j	L(mid)
-
-L(b1):	lg	%r5, 0(up)
-	la	up, 8(up)
-	LOGOP	%r5, 0(vp)
-	stg	%r5, 0(rp)
-	la	rp, 8(rp)
-	la	vp, 8(vp)
-	j	L(mid)
-
-L(b2):	lmg	%r5, %r6, 0(up)
-	la	up, 16(up)
-	LOGOP	%r5, 0(vp)
-	LOGOP	%r6, 8(vp)
-	stmg	%r5, %r6, 0(rp)
-	la	rp, 16(rp)
-	la	vp, 16(vp)
-	j	L(mid)
-
-L(top):	lmg	%r5, %r8, 0(up)
-	la	up, 32(up)
-	LOGOP	%r5, 0(vp)
-	LOGOP	%r6, 8(vp)
-	LOGOP	%r7, 16(vp)
-	LOGOP	%r8, 24(vp)
-	stmg	%r5, %r8, 0(rp)
-	la	rp, 32(rp)
-	la	vp, 32(vp)
-L(mid):	brctg	%r0, L(top)
-
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-')
-
-ifdef(`VARIANT_2',`
-	stmg	%r6, %r8, 48(%r15)
-	lghi	%r1, -1
-
-	aghi	n, 3
-	lghi	%r7, 3
-	srlg	%r0, n, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b1)
-	cghi	%r7, 2
-	jl	L(b2)
-	jne	L(top)
-
-L(b3):	lmg	%r5, %r7, 0(vp)
-	la	vp, 24(vp)
-	xgr	%r5, %r1
-	xgr	%r6, %r1
-	xgr	%r7, %r1
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 8(up)
-	LOGOP	%r7, 16(up)
-	stmg	%r5, %r7, 0(rp)
-	la	rp, 24(rp)
-	la	up, 24(up)
-	j	L(mid)
-
-L(b1):	lg	%r5, 0(vp)
-	la	vp, 8(vp)
-	xgr	%r5, %r1
-	LOGOP	%r5, 0(up)
-	stg	%r5, 0(rp)
-	la	rp, 8(rp)
-	la	up, 8(up)
-	j	L(mid)
-
-L(b2):	lmg	%r5, %r6, 0(vp)
-	la	vp, 16(vp)
-	xgr	%r5, %r1
-	xgr	%r6, %r1
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 8(up)
-	stmg	%r5, %r6, 0(rp)
-	la	rp, 16(rp)
-	la	up, 16(up)
-	j	L(mid)
-
-L(top):	lmg	%r5, %r8, 0(vp)
-	la	vp, 32(vp)
-	xgr	%r5, %r1
-	xgr	%r6, %r1
-	xgr	%r7, %r1
-	xgr	%r8, %r1
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 8(up)
-	LOGOP	%r7, 16(up)
-	LOGOP	%r8, 24(up)
-	la	up, 32(up)
-	stmg	%r5, %r8, 0(rp)
-	la	rp, 32(rp)
-L(mid):	brctg	%r0, L(top)
-
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-')
-
-ifdef(`VARIANT_3',`
-	stmg	%r6, %r8, 48(%r15)
-	lghi	%r1, -1
-
-	aghi	n, 3
-	lghi	%r7, 3
-	srlg	%r0, n, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b1)
-	cghi	%r7, 2
-	jl	L(b2)
-	jne	L(top)
-
-L(b3):	lmg	%r5, %r7, 0(vp)
-	la	vp, 24(vp)
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 8(up)
-	xgr	%r5, %r1
-	xgr	%r6, %r1
-	LOGOP	%r7, 16(up)
-	xgr	%r7, %r1
-	stmg	%r5, %r7, 0(rp)
-	la	rp, 24(rp)
-	la	up, 24(up)
-	j	L(mid)
-
-L(b1):	lg	%r5, 0(vp)
-	la	vp, 8(vp)
-	LOGOP	%r5, 0(up)
-	xgr	%r5, %r1
-	stg	%r5, 0(rp)
-	la	rp, 8(rp)
-	la	up, 8(up)
-	j	L(mid)
-
-L(b2):	lmg	%r5, %r6, 0(vp)
-	la	vp, 16(vp)
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 8(up)
-	xgr	%r5, %r1
-	xgr	%r6, %r1
-	stmg	%r5, %r6, 0(rp)
-	la	rp, 16(rp)
-	la	up, 16(up)
-	j	L(mid)
-
-L(top):	lmg	%r5, %r8, 0(vp)
-	la	vp, 32(vp)
-	LOGOP	%r5, 0(up)
-	LOGOP	%r6, 8(up)
-	xgr	%r5, %r1
-	xgr	%r6, %r1
-	LOGOP	%r7, 16(up)
-	LOGOP	%r8, 24(up)
-	xgr	%r7, %r1
-	xgr	%r8, %r1
-	stmg	%r5, %r8, 0(rp)
-	la	up, 32(up)
-	la	rp, 32(rp)
-L(mid):	brctg	%r0, L(top)
-
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-')
-
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/lshift.asm b/gmp/mpn/s390_64/lshift.asm
deleted file mode 100644
index 4dae035a62..0000000000
--- a/gmp/mpn/s390_64/lshift.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-dnl  S/390-64 mpn_lshift.
-
-dnl  Copyright 2011, 2012, 2014 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 7
-C z990           3
-C z9		 ?
-C z10		 6
-C z196		 ?
-
-C NOTES
-C  * This uses discrete loads and stores in a software pipeline.  Using lmg and
-C    stmg is not faster.
-C  * One could assume more pipelining could approach 2.5 c/l, but we have not
-C    found any 8-way loop that runs better than the current 4-way loop.
-C  * Consider using the same feed-in code for 1 <= n <= 3 as for n mod 4,
-C    similarly to the x86_64 sqr_basecase feed-in.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`cnt',	`%r5')
-
-define(`tnc',	`%r6')
-
-ASM_START()
-PROLOGUE(mpn_lshift)
-	cghi	n, 3
-	jh	L(gt1)
-
-	stmg	%r6, %r7, 48(%r15)
-	larl	%r1, L(tab)-4
-	lcgr	tnc, cnt
-	sllg	n, n, 2
-	b	0(n,%r1)
-L(tab):	j	L(n1)
-	j	L(n2)
-	j	L(n3)
-
-L(n1):	lg	%r1, 0(up)
-	sllg	%r0, %r1, 0(cnt)
-	stg	%r0, 0(rp)
-	srlg	%r2, %r1, 0(tnc)
-	lg	%r6, 48(%r15)		C restoring r7 not needed
-	br	%r14
-
-L(n2):	lg	%r1, 8(up)
-	srlg	%r4, %r1, 0(tnc)
-	sllg	%r0, %r1, 0(cnt)
-	j	L(cj)
-
-L(n3):	lg	%r1, 16(up)
-	srlg	%r4, %r1, 0(tnc)
-	sllg	%r0, %r1, 0(cnt)
-	lg	%r1, 8(up)
-	srlg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	sllg	%r0, %r1, 0(cnt)
-	stg	%r7, 16(rp)
-L(cj):	lg	%r1, 0(up)
-	srlg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	sllg	%r0, %r1, 0(cnt)
-	stg	%r7, 8(rp)
-	stg	%r0, 0(rp)
-	lgr	%r2, %r4
-	lmg	%r6, %r7, 48(%r15)
-	br	%r14
-
-L(gt1):	stmg	%r6, %r13, 48(%r15)
-	lcgr	tnc, cnt		C tnc = -cnt
-
-	sllg	%r1, n, 3
-	srlg	%r0, n, 2		C loop count
-
-	agr	up, %r1			C point up at end of U
-	agr	rp, %r1			C point rp at end of R
-	aghi	up, -56
-	aghi	rp, -40
-
-	lghi	%r7, 3
-	ngr	%r7, n
-	je	L(b0)
-	cghi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	lg	%r7, 48(up)
-	srlg	%r9, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 40(up)
-	lg	%r7, 32(up)
-	srlg	%r4, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	la	rp, 16(rp)
-	j	L(lm3)
-
-L(b2):	lg	%r8, 48(up)
-	lg	%r7, 40(up)
-	srlg	%r9, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	la	rp, 24(rp)
-	la	up, 8(up)
-	j	L(lm2)
-
-L(b1):	lg	%r7, 48(up)
-	srlg	%r9, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 40(up)
-	lg	%r7, 32(up)
-	srlg	%r4, %r8, 0(tnc)
-	sllg	%r10, %r8, 0(cnt)
-	ogr	%r11, %r4
-	la	rp, 32(rp)
-	la	up, 16(up)
-	j	L(lm1)
-
-L(b0):	lg	%r8, 48(up)
-	lg	%r7, 40(up)
-	srlg	%r9, %r8, 0(tnc)
-	sllg	%r10, %r8, 0(cnt)
-	la	rp, 40(rp)
-	la	up, 24(up)
-	j	L(lm0)
-
-	ALIGN(8)
-L(top):	srlg	%r4, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	stg	%r10, 24(rp)
-L(lm3):	stg	%r11, 16(rp)
-L(lm2):	srlg	%r12, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 24(up)
-	lg	%r7, 16(up)
-	ogr	%r13, %r12
-	srlg	%r4, %r8, 0(tnc)
-	sllg	%r10, %r8, 0(cnt)
-	ogr	%r11, %r4
-	stg	%r13, 8(rp)
-L(lm1):	stg	%r11, 0(rp)
-L(lm0):	srlg	%r12, %r7, 0(tnc)
-	aghi	rp, -32
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 8(up)
-	lg	%r7, 0(up)
-	aghi	up, -32
-	ogr	%r10, %r12
-	brctg	%r0, L(top)
-
-L(end):	srlg	%r4, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	stg	%r10, 24(rp)
-	stg	%r11, 16(rp)
-	srlg	%r12, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	ogr	%r13, %r12
-	stg	%r13, 8(rp)
-	stg	%r11, 0(rp)
-	lgr	%r2, %r9
-
-	lmg	%r6, %r13, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/lshiftc.asm b/gmp/mpn/s390_64/lshiftc.asm
deleted file mode 100644
index 92552d529a..0000000000
--- a/gmp/mpn/s390_64/lshiftc.asm
+++ /dev/null
@@ -1,207 +0,0 @@
-dnl  S/390-64 mpn_lshiftc.
-
-dnl  Copyright 2011, 2014 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 9
-C z990           3.5
-C z9		 ?
-C z10		 7
-C z196		 ?
-
-C NOTES
-C  * See notes in lshift.asm.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`cnt',	`%r5')
-
-define(`tnc',	`%r6')
-
-ASM_START()
-PROLOGUE(mpn_lshiftc)
-	cghi	n, 3
-	jh	L(gt1)
-
-	stmg	%r6, %r8, 48(%r15)
-	larl	%r1, L(tab)-4
-	lcgr	tnc, cnt
-	sllg	n, n, 2
-	lghi	%r8, -1
-	b	0(n,%r1)
-L(tab):	j	L(n1)
-	j	L(n2)
-	j	L(n3)
-
-L(n1):	lg	%r1, 0(up)
-	sllg	%r0, %r1, 0(cnt)
-	xgr	%r0, %r8
-	stg	%r0, 0(rp)
-	srlg	%r2, %r1, 0(tnc)
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-
-L(n2):	lg	%r1, 8(up)
-	srlg	%r4, %r1, 0(tnc)
-	sllg	%r0, %r1, 0(cnt)
-	j	L(cj)
-
-L(n3):	lg	%r1, 16(up)
-	srlg	%r4, %r1, 0(tnc)
-	sllg	%r0, %r1, 0(cnt)
-	lg	%r1, 8(up)
-	srlg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	sllg	%r0, %r1, 0(cnt)
-	xgr	%r7, %r8
-	stg	%r7, 16(rp)
-L(cj):	lg	%r1, 0(up)
-	srlg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	sllg	%r0, %r1, 0(cnt)
-	xgr	%r7, %r8
-	xgr	%r0, %r8
-	stg	%r7, 8(rp)
-	stg	%r0, 0(rp)
-	lgr	%r2, %r4
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-
-L(gt1):	stmg	%r6, %r14, 48(%r15)
-	lcgr	tnc, cnt		C tnc = -cnt
-
-	sllg	%r1, n, 3
-	srlg	%r0, n, 2		C loop count
-
-	agr	up, %r1			C point up at end of U
-	agr	rp, %r1			C point rp at end of R
-	aghi	up, -56
-	aghi	rp, -40
-
-	lghi	%r7, 3
-	lghi	%r14, -1
-	ngr	%r7, n
-	je	L(b0)
-	cghi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	lg	%r7, 48(up)
-	srlg	%r9, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 40(up)
-	lg	%r7, 32(up)
-	srlg	%r4, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	la	rp, 16(rp)
-	xgr	%r11, %r14
-	j	L(lm3)
-
-L(b2):	lg	%r8, 48(up)
-	lg	%r7, 40(up)
-	srlg	%r9, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	la	rp, 24(rp)
-	la	up, 8(up)
-	j	L(lm2)
-
-L(b1):	lg	%r7, 48(up)
-	srlg	%r9, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 40(up)
-	lg	%r7, 32(up)
-	srlg	%r4, %r8, 0(tnc)
-	sllg	%r10, %r8, 0(cnt)
-	ogr	%r11, %r4
-	la	rp, 32(rp)
-	la	up, 16(up)
-	xgr	%r11, %r14
-	j	L(lm1)
-
-L(b0):	lg	%r8, 48(up)
-	lg	%r7, 40(up)
-	srlg	%r9, %r8, 0(tnc)
-	sllg	%r10, %r8, 0(cnt)
-	la	rp, 40(rp)
-	la	up, 24(up)
-	j	L(lm0)
-
-	ALIGN(8)
-L(top):	srlg	%r4, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	xgr	%r10, %r14
-	xgr	%r11, %r14
-	stg	%r10, 24(rp)
-L(lm3):	stg	%r11, 16(rp)
-L(lm2):	srlg	%r12, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 24(up)
-	lg	%r7, 16(up)
-	ogr	%r13, %r12
-	srlg	%r4, %r8, 0(tnc)
-	sllg	%r10, %r8, 0(cnt)
-	ogr	%r11, %r4
-	xgr	%r13, %r14
-	xgr	%r11, %r14
-	stg	%r13, 8(rp)
-L(lm1):	stg	%r11, 0(rp)
-L(lm0):	srlg	%r12, %r7, 0(tnc)
-	aghi	rp, -32
-	sllg	%r11, %r7, 0(cnt)
-	lg	%r8, 8(up)
-	lg	%r7, 0(up)
-	aghi	up, -32
-	ogr	%r10, %r12
-	brctg	%r0, L(top)
-
-L(end):	srlg	%r4, %r8, 0(tnc)
-	sllg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	xgr	%r10, %r14
-	xgr	%r11, %r14
-	stg	%r10, 24(rp)
-	stg	%r11, 16(rp)
-	srlg	%r12, %r7, 0(tnc)
-	sllg	%r11, %r7, 0(cnt)
-	ogr	%r13, %r12
-	xgr	%r13, %r14
-	xgr	%r11, %r14
-	stg	%r13, 8(rp)
-	stg	%r11, 0(rp)
-	lgr	%r2, %r9
-
-	lmg	%r6, %r14, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/mod_34lsub1.asm b/gmp/mpn/s390_64/mod_34lsub1.asm
deleted file mode 100644
index fd40011a8c..0000000000
--- a/gmp/mpn/s390_64/mod_34lsub1.asm
+++ /dev/null
@@ -1,109 +0,0 @@
-dnl  S/390-64 mpn_mod_34lsub1
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 5.8
-C z990           2
-C z9		 ?
-C z10		 4.5
-C z196		 ?
-
-C TODO
-C  * Optimise summation code, see x86_64.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`n',	`%r3')
-
-ASM_START()
-PROLOGUE(mpn_mod_34lsub1)
-	stmg	%r7, %r12, 56(%r15)
-	lghi	%r11, 0
-	lghi	%r12, 0
-	lghi	%r0, 0
-	lghi	%r8, 0
-	lghi	%r9, 0
-	lghi	%r10, 0
-	lghi	%r7, 0
-	aghi	%r3, -3
-	jl	.L3
-
-L(top):	alg	%r0, 0(%r2)
-	alcg	%r12, 8(%r2)
-	alcg	%r11, 16(%r2)
-	alcgr	%r8, %r7
-	la	%r2, 24(%r2)
-	aghi	%r3, -3
-	jnl	L(top)
-
-	lgr	%r7, %r8
-	srlg	%r1, %r11, 16
-	nihh	%r7, 0			C 0xffffffffffff
-	agr	%r7, %r1
-	srlg	%r8, %r8, 48
-	agr	%r7, %r8
-	sllg	%r11, %r11, 32
-	nihh	%r11, 0
-	agr	%r7, %r11
-.L3:
-	cghi	%r3, -3
-	je	.L6
-	alg	%r0, 0(%r2)
-	alcgr	%r10, %r10
-	cghi	%r3, -2
-	je	.L6
-	alg	%r12, 8(%r2)
-	alcgr	%r9, %r9
-.L6:
-	srlg	%r1, %r0, 48
-	nihh	%r0, 0			C 0xffffffffffff
-	agr	%r0, %r1
-	agr	%r0, %r7
-	srlg	%r1, %r12, 32
-	agr	%r0, %r1
-	srlg	%r1, %r10, 32
-	agr	%r0, %r1
-	llgfr	%r12, %r12
-	srlg	%r1, %r9, 16
-	sllg	%r12, %r12, 16
-	llgfr	%r10, %r10
-	agr	%r0, %r1
-	llill	%r2, 65535
-	agr	%r0, %r12
-	sllg	%r10, %r10, 16
-	ngr	%r2, %r9
-	agr	%r0, %r10
-	sllg	%r2, %r2, 32
-	agr	%r2, %r0
-	lmg	%r7, %r12, 56(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/mul_1.asm b/gmp/mpn/s390_64/mul_1.asm
deleted file mode 100644
index a8f6da9a0f..0000000000
--- a/gmp/mpn/s390_64/mul_1.asm
+++ /dev/null
@@ -1,66 +0,0 @@
-dnl  S/390-64 mpn_mul_1
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		29
-C z990		22
-C z9		 ?
-C z10		20
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`v0',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	stmg	%r11, %r12, 88(%r15)
-	lghi	%r12, 0			C zero index register
-	aghi	%r12, 0			C clear carry flag
-	lghi	%r11, 0			C clear carry limb
-
-L(top):	lg	%r1, 0(%r12,up)
-	mlgr	%r0, v0
-	alcgr	%r1, %r11
-	lgr	%r11, %r0		C copy high part to carry limb
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
-	brctg	n, L(top)
-
-	lghi	%r2, 0
-	alcgr	%r2, %r11
-
-	lmg	%r11, %r12, 88(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/mul_basecase.asm b/gmp/mpn/s390_64/mul_basecase.asm
deleted file mode 100644
index 7d14ea98d2..0000000000
--- a/gmp/mpn/s390_64/mul_basecase.asm
+++ /dev/null
@@ -1,130 +0,0 @@
-dnl  S/390-64 mpn_mul_basecase.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 ?
-C z990		23
-C z9		 ?
-C z10		28
-C z196		 ?
-
-C TODO
-C  * Perhaps add special case for un <= 2.
-C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
-C    up by about 10%.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`un',	`%r4')
-define(`vp',	`%r5')
-define(`vn',	`%r6')
-
-define(`zero',	`%r8')
-
-ASM_START()
-PROLOGUE(mpn_mul_basecase)
-	cghi	un, 2
-	jhe	L(ge2)
-
-C un = vn = 1
-	lg	%r1, 0(vp)
-	mlg	%r0, 0(up)
-	stg	%r1, 0(rp)
-	stg	%r0, 8(rp)
-	br	%r14
-
-L(ge2):	C jne	L(gen)
-
-
-L(gen):
-C mul_1 =======================================================================
-
-	stmg	%r6, %r12, 48(%r15)
-	lghi	zero, 0
-	aghi	un, -1
-
-	lg	%r7, 0(vp)
-	lg	%r11, 0(up)
-	lghi	%r12, 8			C init index register
-	mlgr	%r10, %r7
-	lgr	%r9, un
-	stg	%r11, 0(rp)
-	cr	%r15, %r15		C clear carry flag
-
-L(tm):	lg	%r1, 0(%r12,up)
-	mlgr	%r0, %r7
-	alcgr	%r1, %r10
-	lgr	%r10, %r0		C copy high part to carry limb
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
-	brctg	%r9, L(tm)
-
-	alcgr	%r0, zero
-	stg	%r0, 0(%r12,rp)
-
-C addmul_1 loop ===============================================================
-
-	aghi	vn, -1
-	je	L(outer_end)
-L(outer_loop):
-
-	la	rp, 8(rp)		C rp += 1
-	la	vp, 8(vp)		C up += 1
-	lg	%r7, 0(vp)
-	lg	%r11, 0(up)
-	lghi	%r12, 8			C init index register
-	mlgr	%r10, %r7
-	lgr	%r9, un
-	alg	%r11, 0(rp)
-	stg	%r11, 0(rp)
-
-L(tam):	lg	%r1, 0(%r12,up)
-	lg	%r11, 0(%r12,rp)
-	mlgr	%r0, %r7
-	alcgr	%r1, %r11
-	alcgr	%r0, zero
-	algr	%r1, %r10
-	lgr	%r10, %r0
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
-	brctg	%r9, L(tam)
-
-	alcgr	%r0, zero
-	stg	%r0, 0(%r12,rp)
-
-	brctg	vn, L(outer_loop)
-L(outer_end):
-
-	lmg	%r6, %r12, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/rshift.asm b/gmp/mpn/s390_64/rshift.asm
deleted file mode 100644
index e870971650..0000000000
--- a/gmp/mpn/s390_64/rshift.asm
+++ /dev/null
@@ -1,195 +0,0 @@
-dnl  S/390-64 mpn_rshift.
-
-dnl  Copyright 2011, 2014 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 7
-C z990           3
-C z9		 ?
-C z10		 6
-C z196		 ?
-
-C NOTES
-C  * See notes in lshift.asm.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`cnt',	`%r5')
-
-define(`tnc',	`%r6')
-
-ASM_START()
-PROLOGUE(mpn_rshift)
-	cghi	n, 3
-	jh	L(gt1)
-
-	stmg	%r6, %r7, 48(%r15)
-	larl	%r1, L(tab)-4
-	lcgr	tnc, cnt
-	sllg	n, n, 2
-	b	0(n,%r1)
-L(tab):	j	L(n1)
-	j	L(n2)
-	j	L(n3)
-
-L(n1):	lg	%r1, 0(up)
-	srlg	%r0, %r1, 0(cnt)
-	stg	%r0, 0(rp)
-	sllg	%r2, %r1, 0(tnc)
-	lg	%r6, 48(%r15)		C restoring r7 not needed
-	br	%r14
-
-L(n2):	lg	%r1, 0(up)
-	sllg	%r4, %r1, 0(tnc)
-	srlg	%r0, %r1, 0(cnt)
-	lg	%r1, 8(up)
-	sllg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	srlg	%r0, %r1, 0(cnt)
-	stg	%r7, 0(rp)
-	stg	%r0, 8(rp)
-	lgr	%r2, %r4
-	lmg	%r6, %r7, 48(%r15)
-	br	%r14
-
-
-L(n3):	lg	%r1, 0(up)
-	sllg	%r4, %r1, 0(tnc)
-	srlg	%r0, %r1, 0(cnt)
-	lg	%r1, 8(up)
-	sllg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	srlg	%r0, %r1, 0(cnt)
-	stg	%r7, 0(rp)
-	lg	%r1, 16(up)
-	sllg	%r7, %r1, 0(tnc)
-	ogr	%r7, %r0
-	srlg	%r0, %r1, 0(cnt)
-	stg	%r7, 8(rp)
-	stg	%r0, 16(rp)
-	lgr	%r2, %r4
-	lmg	%r6, %r7, 48(%r15)
-	br	%r14
-
-L(gt1):	stmg	%r6, %r13, 48(%r15)
-	lcgr	tnc, cnt		C tnc = -cnt
-
-	sllg	%r1, n, 3
-	srlg	%r0, n, 2		C loop count
-
-	lghi	%r7, 3
-	ngr	%r7, n
-	je	L(b0)
-	cghi	%r7, 2
-	jl	L(b1)
-	je	L(b2)
-
-L(b3):	aghi	rp, -8
-	lg	%r7, 0(up)
-	sllg	%r9, %r7, 0(tnc)
-	srlg	%r11, %r7, 0(cnt)
-	lg	%r8, 8(up)
-	lg	%r7, 16(up)
-	sllg	%r4, %r8, 0(tnc)
-	srlg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	la	up, 24(up)
-	j	L(lm3)
-
-L(b2):	aghi	rp, -16
-	lg	%r8, 0(up)
-	lg	%r7, 8(up)
-	sllg	%r9, %r8, 0(tnc)
-	srlg	%r13, %r8, 0(cnt)
-	la	up, 16(up)
-	j	L(lm2)
-
-L(b1):	aghi	rp, -24
-	lg	%r7, 0(up)
-	sllg	%r9, %r7, 0(tnc)
-	srlg	%r11, %r7, 0(cnt)
-	lg	%r8, 8(up)
-	lg	%r7, 16(up)
-	sllg	%r4, %r8, 0(tnc)
-	srlg	%r10, %r8, 0(cnt)
-	ogr	%r11, %r4
-	la	up, 8(up)
-	j	L(lm1)
-
-L(b0):	aghi	rp, -32
-	lg	%r8, 0(up)
-	lg	%r7, 8(up)
-	sllg	%r9, %r8, 0(tnc)
-	srlg	%r10, %r8, 0(cnt)
-	j	L(lm0)
-
-	ALIGN(8)
-L(top):	sllg	%r4, %r8, 0(tnc)
-	srlg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	stg	%r10, 0(rp)
-L(lm3):	stg	%r11, 8(rp)
-L(lm2):	sllg	%r12, %r7, 0(tnc)
-	srlg	%r11, %r7, 0(cnt)
-	lg	%r8, 0(up)
-	lg	%r7, 8(up)
-	ogr	%r13, %r12
-	sllg	%r4, %r8, 0(tnc)
-	srlg	%r10, %r8, 0(cnt)
-	ogr	%r11, %r4
-	stg	%r13, 16(rp)
-L(lm1):	stg	%r11, 24(rp)
-L(lm0):	sllg	%r12, %r7, 0(tnc)
-	aghi	rp, 32
-	srlg	%r11, %r7, 0(cnt)
-	lg	%r8, 16(up)
-	lg	%r7, 24(up)
-	aghi	up, 32
-	ogr	%r10, %r12
-	brctg	%r0, L(top)
-
-L(end):	sllg	%r4, %r8, 0(tnc)
-	srlg	%r13, %r8, 0(cnt)
-	ogr	%r11, %r4
-	stg	%r10, 0(rp)
-	stg	%r11, 8(rp)
-	sllg	%r12, %r7, 0(tnc)
-	srlg	%r11, %r7, 0(cnt)
-	ogr	%r13, %r12
-	stg	%r13, 16(rp)
-	stg	%r11, 24(rp)
-	lgr	%r2, %r9
-
-	lmg	%r6, %r13, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/sqr_basecase.asm b/gmp/mpn/s390_64/sqr_basecase.asm
deleted file mode 100644
index bf31bd5546..0000000000
--- a/gmp/mpn/s390_64/sqr_basecase.asm
+++ /dev/null
@@ -1,203 +0,0 @@
-dnl  S/390-64 mpn_sqr_basecase.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		 ?
-C z990		23
-C z9		 ?
-C z10		28
-C z196		 ?
-
-C TODO
-C  * Clean up.
-C  * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
-C    This will ask for basecase handling of n = 3.
-C  * Update counters and pointers more straightforwardly, possibly lowering
-C    register usage.
-C  * Should we use this allocation-free style for more sqr_basecase asm
-C    implementations?  The only disadvantage is that it requires R != U.
-C  * Replace loops by faster code.  The mul_1 and addmul_1 loops could be sped
-C    up by about 10%.  The sqr_diag_addlsh1 loop could probably be sped up even
-C    more.
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-
-define(`zero',	`%r8')
-define(`rp_saved',	`%r9')
-define(`up_saved',	`%r13')
-define(`n_saved',	`%r14')
-
-ASM_START()
-PROLOGUE(mpn_sqr_basecase)
-	aghi	n, -2
-	jhe	L(ge2)
-
-C n = 1
-	lg	%r5, 0(up)
-	mlgr	%r4, %r5
-	stg	%r5, 0(rp)
-	stg	%r4, 8(rp)
-	br	%r14
-
-L(ge2):	jne	L(gen)
-
-C n = 2
-	stmg	%r6, %r8, 48(%r15)
-	lghi	zero, 0
-
-	lg	%r5, 0(up)
-	mlgr	%r4, %r5		C u0 * u0
-	lg	%r1, 8(up)
-	mlgr	%r0, %r1		C u1 * u1
-	stg	%r5, 0(rp)
-
-	lg	%r7, 0(up)
-	mlg	%r6, 8(up)		C u0 * u1
-	algr	%r7, %r7
-	alcgr	%r6, %r6
-	alcgr	%r0, zero
-
-	algr	%r4, %r7
-	alcgr	%r1, %r6
-	alcgr	%r0, zero
-	stg	%r4, 8(rp)
-	stg	%r1, 16(rp)
-	stg	%r0, 24(rp)
-
-	lmg	%r6, %r8, 48(%r15)
-	br	%r14
-
-L(gen):
-C mul_1 =======================================================================
-
-	stmg	%r6, %r14, 48(%r15)
-	lghi	zero, 0
-	lgr	up_saved, up
-	lgr	rp_saved, rp
-	lgr	n_saved, n
-
-	lg	%r6, 0(up)
-	lg	%r11, 8(up)
-	lghi	%r12, 16		C init index register
-	mlgr	%r10, %r6
-	lgr	%r5, n
-	stg	%r11, 8(rp)
-	cr	%r15, %r15		C clear carry flag
-
-L(tm):	lg	%r1, 0(%r12,up)
-	mlgr	%r0, %r6
-	alcgr	%r1, %r10
-	lgr	%r10, %r0		C copy high part to carry limb
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
-	brctg	%r5, L(tm)
-
-	alcgr	%r0, zero
-	stg	%r0, 0(%r12,rp)
-
-C addmul_1 loop ===============================================================
-
-	aghi	n, -1
-	je	L(outer_end)
-L(outer_loop):
-
-	la	rp, 16(rp)		C rp += 2
-	la	up, 8(up)		C up += 1
-	lg	%r6, 0(up)
-	lg	%r11, 8(up)
-	lghi	%r12, 16		C init index register
-	mlgr	%r10, %r6
-	lgr	%r5, n
-	alg	%r11, 8(rp)
-	stg	%r11, 8(rp)
-
-L(tam):	lg	%r1, 0(%r12,up)
-	lg	%r7, 0(%r12,rp)
-	mlgr	%r0, %r6
-	alcgr	%r1, %r7
-	alcgr	%r0, zero
-	algr	%r1, %r10
-	lgr	%r10, %r0
-	stg	%r1, 0(%r12,rp)
-	la	%r12, 8(%r12)
-	brctg	%r5, L(tam)
-
-	alcgr	%r0, zero
-	stg	%r0, 0(%r12,rp)
-
-	brctg	n, L(outer_loop)
-L(outer_end):
-
-	lg	%r6, 8(up)
-	lg	%r1, 16(up)
-	lgr	%r7, %r0		C Same as: lg %r7, 24(,rp)
-	mlgr	%r0, %r6
-	algr	%r1, %r7
-	alcgr	%r0, zero
-	stg	%r1, 24(rp)
-	stg	%r0, 32(rp)
-
-C sqr_diag_addlsh1 ============================================================
-
-define(`up', `up_saved')
-define(`rp', `rp_saved')
-	la	n, 1(n_saved)
-
-	lg	%r1, 0(up)
-	mlgr	%r0, %r1
-	stg	%r1, 0(rp)
-C	clr	%r15, %r15		C clear carry (already clear per above)
-
-L(top):	lg	%r11, 8(up)
-	la	up, 8(up)
-	lg	%r6, 8(rp)
-	lg	%r7, 16(rp)
-	mlgr	%r10, %r11
-	alcgr	%r6, %r6
-	alcgr	%r7, %r7
-	alcgr	%r10, zero		C propagate carry to high product limb
-	algr	%r6, %r0
-	alcgr	%r7, %r11
-	stmg	%r6, %r7, 8(rp)
-	la	rp, 16(rp)
-	lgr	%r0, %r10		C copy carry limb
-	brctg	n, L(top)
-
-	alcgr	%r0, zero
-	stg	%r0, 8(rp)
-
-	lmg	%r6, %r14, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/sublsh1_n.asm b/gmp/mpn/s390_64/sublsh1_n.asm
deleted file mode 100644
index 50f127acef..0000000000
--- a/gmp/mpn/s390_64/sublsh1_n.asm
+++ /dev/null
@@ -1,169 +0,0 @@
-dnl  S/390-64 mpn_sublsh1_n
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		10
-C z990		 5
-C z9		 ?
-C z10		12
-C z196		 ?
-
-C TODO
-C  * Optimise for small n
-C  * Compute RETVAL for sublsh1_n less stupidly
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`vp',	`%r4')
-define(`n',	`%r5')
-
-ifdef(`OPERATION_addlsh1_n',`
-  define(ADSBR,		algr)
-  define(ADSBCR,	alcgr)
-  define(INITCY,	`lghi	%r13, -1')
-  define(RETVAL,	`la	%r2, 2(%r1,%r13)')
-  define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_sublsh1_n',`
-  define(ADSBR,		slgr)
-  define(ADSBCR,	slbgr)
-  define(INITCY,	`lghi	%r13, 0')
-  define(RETVAL,`dnl
-	slgr	%r1, %r13
-	lghi	%r2, 1
-	algr	%r2, %r1')
-  define(func, mpn_sublsh1_n)
-')
-
-ASM_START()
-PROLOGUE(mpn_sublsh1_n)
-	stmg	%r6, %r13, 48(%r15)
-
-	aghi	n, 3
-	lghi	%r7, 3
-	srlg	%r0, n, 2
-	ngr	%r7, n			C n mod 4
-	je	L(b1)
-	cghi	%r7, 2
-	jl	L(b2)
-	jne	L(b0)
-
-L(b3):	lmg	%r5, %r7, 0(up)
-	la	up, 24(up)
-	lmg	%r9, %r11, 0(vp)
-	la	vp, 24(vp)
-
-	algr	%r9, %r9
-	alcgr	%r10, %r10
-	alcgr	%r11, %r11
-	slbgr	%r1, %r1
-
-	ADSBR	%r5, %r9
-	ADSBCR	%r6, %r10
-	ADSBCR	%r7, %r11
-	slbgr	%r13, %r13
-
-	stmg	%r5, %r7, 0(rp)
-	la	rp, 24(rp)
-	brctg	%r0, L(top)
-	j	L(end)
-
-L(b0):	lghi	%r1, -1
-	INITCY
-	j	L(top)
-
-L(b1):	lg	%r5, 0(up)
-	la	up, 8(up)
-	lg	%r9, 0(vp)
-	la	vp, 8(vp)
-
-	algr	%r9, %r9
-	slbgr	%r1, %r1
-	ADSBR	%r5, %r9
-	slbgr	%r13, %r13
-
-	stg	%r5, 0(rp)
-	la	rp, 8(rp)
-	brctg	%r0, L(top)
-	j	L(end)
-
-L(b2):	lmg	%r5, %r6, 0(up)
-	la	up, 16(up)
-	lmg	%r9, %r10, 0(vp)
-	la	vp, 16(vp)
-
-	algr	%r9, %r9
-	alcgr	%r10, %r10
-	slbgr	%r1, %r1
-
-	ADSBR	%r5, %r9
-	ADSBCR	%r6, %r10
-	slbgr	%r13, %r13
-
-	stmg	%r5, %r6, 0(rp)
-	la	rp, 16(rp)
-	brctg	%r0, L(top)
-	j	L(end)
-
-L(top):	lmg	%r9, %r12, 0(vp)
-	la	vp, 32(vp)
-
-	aghi	%r1, 1			C restore carry
-
-	alcgr	%r9, %r9
-	alcgr	%r10, %r10
-	alcgr	%r11, %r11
-	alcgr	%r12, %r12
-
-	slbgr	%r1, %r1		C save carry
-
-	lmg	%r5, %r8, 0(up)
-	la	up, 32(up)
-
-	aghi	%r13, 1			C restore carry
-
-	ADSBCR	%r5, %r9
-	ADSBCR	%r6, %r10
-	ADSBCR	%r7, %r11
-	ADSBCR	%r8, %r12
-
-	slbgr	%r13, %r13		C save carry
-
-	stmg	%r5, %r8, 0(rp)
-	la	rp, 32(rp)
-	brctg	%r0, L(top)
-
-L(end):	RETVAL
-	lmg	%r6, %r13, 48(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/submul_1.asm b/gmp/mpn/s390_64/submul_1.asm
deleted file mode 100644
index 91c4b06631..0000000000
--- a/gmp/mpn/s390_64/submul_1.asm
+++ /dev/null
@@ -1,70 +0,0 @@
-dnl  S/390-64 mpn_submul_1
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C            cycles/limb
-C z900		35
-C z990		24
-C z9		 ?
-C z10		28
-C z196		 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%r2')
-define(`up',	`%r3')
-define(`n',	`%r4')
-define(`v0',	`%r5')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	stmg	%r9, %r12, 72(%r15)
-	lghi	%r12, 0
-	slgr	%r11, %r11
-
-L(top):	lg	%r1, 0(%r12, up)
-	lg	%r10, 0(%r12, rp)
-	mlgr	%r0, v0
-	slbgr	%r10, %r1
-	slbgr	%r9, %r9
-	slgr	%r0, %r9		C conditional incr
-	slgr	%r10, %r11
-	lgr	%r11, %r0
-	stg	%r10, 0(%r12, rp)
-	la	%r12, 8(%r12)
-	brctg	%r4,  L(top)
-
-	lgr	%r2, %r11
-	slbgr	%r9, %r9
-	slgr	%r2, %r9
-
-	lmg	%r9, %r12, 72(%r15)
-	br	%r14
-EPILOGUE()
diff --git a/gmp/mpn/s390_64/z10/gmp-mparam.h b/gmp/mpn/s390_64/z10/gmp-mparam.h
deleted file mode 100644
index c034f9b3b6..0000000000
--- a/gmp/mpn/s390_64/z10/gmp-mparam.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* S/390-64 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 4400 MHz IBM z10 */
-/* FFT tuning limit = 15000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.7 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            4
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        14
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        23
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     29
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           56
-
-#define MUL_TOOM22_THRESHOLD                 8
-#define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD                88
-#define MUL_TOOM6H_THRESHOLD               125
-#define MUL_TOOM8H_THRESHOLD               163
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      58
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      61
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      57
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      62
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      82
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 12
-#define SQR_TOOM3_THRESHOLD                 89
-#define SQR_TOOM4_THRESHOLD                130
-#define SQR_TOOM6_THRESHOLD                189
-#define SQR_TOOM8_THRESHOLD                260
-
-#define MULMID_TOOM42_THRESHOLD             24
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD                9
-
-#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    220, 5}, {      7, 4}, {     15, 5}, {      9, 6}, \
-    {      5, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {     11, 7}, {      6, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {     13, 8}, {      7, 7}, {     16, 8}, \
-    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
-    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
-    {     27,10}, {     15, 9}, {     39,10}, {     23,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     83,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 7}, {    511,10}, {     71, 9}, \
-    {    143, 8}, {    287, 7}, {    575,10}, {     79,11}, \
-    {     47,12}, {     31,11}, {     63,10}, {    127, 9}, \
-    {    255, 8}, {    511,10}, {    143, 9}, {    287, 8}, \
-    {    575,11}, {     79,10}, {    159, 9}, {    319, 8}, \
-    {    639,10}, {    175, 9}, {    351, 8}, {    703, 7}, \
-    {   1407, 6}, {   2815,10}, {    191, 9}, {    383, 8}, \
-    {    767, 9}, {    415,11}, {    111,10}, {    223, 9}, \
-    {    447, 8}, {    895,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575, 8}, {   1151,10}, {    319, 9}, {    639,11}, \
-    {    175, 9}, {    703, 8}, {   1407, 7}, {   2815,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    287,10}, {    575, 9}, {   1151,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
-    {   1407, 8}, {   2815,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,12}, {    223,10}, \
-    {    895, 9}, {   1791,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1023,12}, {    287,11}, \
-    {    575,10}, {   1151,12}, {    319,11}, {    639,12}, \
-    {    351,11}, {    703,10}, {   1407, 9}, {   2815,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,10}, \
-    {   1791, 9}, {   3583,12}, {    479,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    575,11}, \
-    {   1151,13}, {    319,12}, {    703,11}, {   1407,10}, \
-    {   2815,13}, {    383,12}, {    767,11}, {   1535,12}, \
-    {    831,11}, {   1663,13}, {    447,12}, {    895,11}, \
-    {   1791,10}, {   3583,14}, {    255,13}, {    511,12}, \
-    {   1023,13}, {    575,12}, {   1151,13}, {    639,12}, \
-    {   1279,13}, {    703,12}, {   1407,11}, {   2815,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1663,13}, {    895,12}, {   1791,11}, {   3583,15}, \
-    {    255,14}, {    511,13}, {   1151,14}, {    639,13}, \
-    {   1279,12}, {   2559,13}, {   1407,12}, {   2815,13}, \
-    {   1471,14}, {    767,13}, {   1663,14}, {    895,13}, \
-    {   1791,12}, {   3583,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 205
-#define MUL_FFT_THRESHOLD                 1728
-
-#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    212, 5}, {      7, 4}, {     15, 5}, {     11, 6}, \
-    {      6, 5}, {     13, 6}, {      7, 5}, {     15, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
-    {      7, 7}, {     16, 8}, {      9, 7}, {     19, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
-    {     15, 8}, {     31, 9}, {     19, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     39,10}, {     23,11}, \
-    {     15,10}, {     31, 9}, {     63,10}, {     39, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287, 7}, {    575,10}, {     79,11}, {     47,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575,11}, \
-    {     79,10}, {    159, 9}, {    319, 8}, {    639,10}, \
-    {    175, 9}, {    351, 8}, {    703,10}, {    191, 9}, \
-    {    383, 8}, {    767,10}, {    207,11}, {    111,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1151,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
-    {    351, 9}, {    703,12}, {     95,11}, {    191,10}, \
-    {    383, 9}, {    767,11}, {    207,10}, {    415,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    287,10}, {    575, 9}, {   1151,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,10}, {    895, 9}, \
-    {   1791,13}, {    127,12}, {    255,11}, {    511,12}, \
-    {    287,11}, {    575,10}, {   1151,12}, {    319,11}, \
-    {    639,12}, {    351,11}, {    703,10}, {   1407,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,10}, \
-    {   1791, 9}, {   3583,12}, {    479,11}, {    959,10}, \
-    {   1919,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    575,11}, {   1151,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    703,11}, {   1407,10}, \
-    {   2815,13}, {    383,12}, {    767,11}, {   1535,12}, \
-    {    831,11}, {   1663,13}, {    447,12}, {    895,11}, \
-    {   1791,12}, {    959,11}, {   1919,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
-    {    639,12}, {   1279,13}, {    703,12}, {   1407,11}, \
-    {   2815,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    895,12}, {   1791,11}, \
-    {   3583,13}, {    959,12}, {   1919,15}, {    255,14}, \
-    {    511,13}, {   1023,12}, {   2047,13}, {   1151,14}, \
-    {    639,13}, {   1279,12}, {   2559,13}, {   1407,12}, \
-    {   2815,14}, {    767,13}, {   1663,14}, {    895,13}, \
-    {   1791,12}, {   3583,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 201
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  34
-#define MULLO_MUL_N_THRESHOLD             3176
-
-#define DC_DIV_QR_THRESHOLD                 39
-#define DC_DIVAPPR_Q_THRESHOLD             151
-#define DC_BDIV_QR_THRESHOLD                44
-#define DC_BDIV_Q_THRESHOLD                107
-
-#define INV_MULMOD_BNM1_THRESHOLD           14
-#define INV_NEWTON_THRESHOLD               163
-#define INV_APPR_THRESHOLD                 154
-
-#define BINV_NEWTON_THRESHOLD              171
-#define REDC_1_TO_REDC_N_THRESHOLD          46
-
-#define MU_DIV_QR_THRESHOLD                792
-#define MU_DIVAPPR_Q_THRESHOLD             807
-#define MUPI_DIV_QR_THRESHOLD               81
-#define MU_BDIV_QR_THRESHOLD               654
-#define MU_BDIV_Q_THRESHOLD                792
-
-#define POWM_SEC_TABLE  3,19,194,946,2424
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     117
-#define HGCD_APPR_THRESHOLD                145
-#define HGCD_REDUCE_THRESHOLD             1329
-#define GCD_DC_THRESHOLD                   318
-#define GCDEXT_DC_THRESHOLD                265
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        35
-#define SET_STR_DC_THRESHOLD              1015
-#define SET_STR_PRECOMPUTE_THRESHOLD      2047
-
-#define FAC_DSC_THRESHOLD                  330
-#define FAC_ODD_THRESHOLD                   23
diff --git a/gmp/mpn/sh/add_n.asm b/gmp/mpn/sh/add_n.asm
deleted file mode 100644
index 79d17d0129..0000000000
--- a/gmp/mpn/sh/add_n.asm
+++ /dev/null
@@ -1,59 +0,0 @@
-dnl  SH mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
-dnl  in a third limb vector.
-
-dnl  Copyright 1995, 1997, 2000, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-C rp		r4
-C up		r5
-C vp		r6
-C n		r7
-
-changecom(blah)			C disable # to make all C comments below work
-
-ASM_START()
-PROLOGUE(mpn_add_n)
-	mov	#0,r3		C clear cy save reg
-
-L(top):	mov.l	@r5+,r1
-	mov.l	@r6+,r2
-	shlr	r3		C restore cy
-	addc	r2,r1
-	movt	r3		C save cy
-	mov.l	r1,@r4
-	dt	r7
-	bf.s	L(top)
-	 add	#4,r4
-
-	rts
-	mov	r3,r0		C return carry-out from most significant limb
-EPILOGUE()
diff --git a/gmp/mpn/sh/add_n.s b/gmp/mpn/sh/add_n.s
new file mode 100644
index 0000000000..914fb4fc70
--- /dev/null
+++ b/gmp/mpn/sh/add_n.s
@@ -0,0 +1,45 @@
+! SH __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright 1995, 1997, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! s2_ptr	r6
+! size		r7
+
+	.text
+	.align 2
+	.global	___gmpn_add_n
+___gmpn_add_n:
+	mov	#0,r3		! clear cy save reg
+
+Loop:	mov.l	@r5+,r1
+	mov.l	@r6+,r2
+	shlr	r3		! restore cy
+	addc	r2,r1
+	movt	r3		! save cy
+	mov.l	r1,@r4
+	dt	r7
+	bf.s	Loop
+	 add	#4,r4
+
+	rts
+	mov	r3,r0		! return carry-out from most sign. limb
diff --git a/gmp/mpn/sh/sh2/addmul_1.asm b/gmp/mpn/sh/sh2/addmul_1.asm
deleted file mode 100644
index c914b29541..0000000000
--- a/gmp/mpn/sh/sh2/addmul_1.asm
+++ /dev/null
@@ -1,65 +0,0 @@
-dnl  SH2 mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
-dnl  to a second limb vector.
-
-dnl  Copyright 1995, 2000, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-C res_ptr	r4
-C s1_ptr	r5
-C size		r6
-C s2_limb	r7
-
-changecom(blah)			C disable # to make all C comments below work
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	mov	#0,r2		C cy_limb = 0
-	mov	#0,r0		C Keep r0 = 0 for entire loop
-	clrt
-
-L(top):	mov.l	@r5+,r3
-	dmulu.l	r3,r7
-	sts	macl,r1
-	addc	r2,r1		C lo_prod += old cy_limb
-	sts	mach,r2		C new cy_limb = hi_prod
-	mov.l	@r4,r3
-	addc	r0,r2		C cy_limb += T, T = 0
-	addc	r3,r1
-	addc	r0,r2		C cy_limb += T, T = 0
-	dt	r6
-	mov.l	r1,@r4
-	bf.s	L(top)
-	add	#4,r4
-
-	rts
-	mov	r2,r0
-EPILOGUE()
diff --git a/gmp/mpn/sh/sh2/addmul_1.s b/gmp/mpn/sh/sh2/addmul_1.s
new file mode 100644
index 0000000000..df22deaf5c
--- /dev/null
+++ b/gmp/mpn/sh/sh2/addmul_1.s
@@ -0,0 +1,51 @@
+! SH2 __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! size		r6
+! s2_limb	r7
+
+	.text
+	.align 1
+	.global	___gmpn_addmul_1
+___gmpn_addmul_1:
+	mov	#0,r2		! cy_limb = 0
+	mov	#0,r0		! Keep r0 = 0 for entire loop
+	clrt
+
+Loop:	mov.l	@r5+,r3
+	dmulu.l	r3,r7
+	sts	macl,r1
+	addc	r2,r1		! lo_prod += old cy_limb
+	sts	mach,r2		! new cy_limb = hi_prod
+	mov.l	@r4,r3
+	addc	r0,r2		! cy_limb += T, T = 0
+	addc	r3,r1
+	addc	r0,r2		! cy_limb += T, T = 0
+	dt	r6
+	mov.l	r1,@r4
+	bf.s	Loop
+	add	#4,r4
+
+	rts
+	mov	r2,r0
diff --git a/gmp/mpn/sh/sh2/mul_1.asm b/gmp/mpn/sh/sh2/mul_1.asm
deleted file mode 100644
index 83548a6953..0000000000
--- a/gmp/mpn/sh/sh2/mul_1.asm
+++ /dev/null
@@ -1,62 +0,0 @@
-dnl  SH2 mpn_mul_1 -- Multiply a limb vector with a limb and store the result
-dnl  in a second limb vector.
-
-dnl  Copyright 1995, 2000, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-C res_ptr	r4
-C s1_ptr	r5
-C size		r6
-C s2_limb	r7
-
-changecom(blah)			C disable # to make all C comments below work
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	mov	#0,r2		C cy_limb = 0
-	mov	#0,r0		C Keep r0 = 0 for entire loop
-	clrt
-
-L(top):	mov.l	@r5+,r3
-	dmulu.l	r3,r7
-	sts	macl,r1
-	addc	r2,r1
-	sts	mach,r2
-	addc	r0,r2		C propagate carry to cy_limb (dt clobbers T)
-	dt	r6
-	mov.l	r1,@r4
-	bf.s	L(top)
-	add	#4,r4
-
-	rts
-	mov	r2,r0
-EPILOGUE()
diff --git a/gmp/mpn/sh/sh2/mul_1.s b/gmp/mpn/sh/sh2/mul_1.s
new file mode 100644
index 0000000000..aa41bf2421
--- /dev/null
+++ b/gmp/mpn/sh/sh2/mul_1.s
@@ -0,0 +1,48 @@
+! SH2 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! size		r6
+! s2_limb	r7
+
+	.text
+	.align 1
+	.global	___gmpn_mul_1
+___gmpn_mul_1:
+	mov	#0,r2		! cy_limb = 0
+	mov	#0,r0		! Keep r0 = 0 for entire loop
+	clrt
+
+Loop:	mov.l	@r5+,r3
+	dmulu.l	r3,r7
+	sts	macl,r1
+	addc	r2,r1
+	sts	mach,r2
+	addc	r0,r2		! propagate carry to cy_limb (dt clobbers T)
+	dt	r6
+	mov.l	r1,@r4
+	bf.s	Loop
+	add	#4,r4
+
+	rts
+	mov	r2,r0
diff --git a/gmp/mpn/sh/sh2/submul_1.asm b/gmp/mpn/sh/sh2/submul_1.asm
deleted file mode 100644
index bef2abd9b2..0000000000
--- a/gmp/mpn/sh/sh2/submul_1.asm
+++ /dev/null
@@ -1,65 +0,0 @@
-dnl  SH2 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
-dnl  result from a second limb vector.
-
-dnl  Copyright 1995, 2000, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-C res_ptr	r4
-C s1_ptr	r5
-C size		r6
-C s2_limb	r7
-
-changecom(blah)			C disable # to make all C comments below work
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	mov	#0,r2		C cy_limb = 0
-	mov	#0,r0		C Keep r0 = 0 for entire loop
-	clrt
-
-L(top):	mov.l	@r5+,r3
-	dmulu.l	r3,r7
-	sts	macl,r1
-	addc	r2,r1		C lo_prod += old cy_limb
-	sts	mach,r2		C new cy_limb = hi_prod
-	mov.l	@r4,r3
-	addc	r0,r2		C cy_limb += T, T = 0
-	subc	r1,r3
-	addc	r0,r2		C cy_limb += T, T = 0
-	dt	r6
-	mov.l	r3,@r4
-	bf.s	L(top)
-	add	#4,r4
-
-	rts
-	mov	r2,r0
-EPILOGUE()
diff --git a/gmp/mpn/sh/sh2/submul_1.s b/gmp/mpn/sh/sh2/submul_1.s
new file mode 100644
index 0000000000..a1149c54fc
--- /dev/null
+++ b/gmp/mpn/sh/sh2/submul_1.s
@@ -0,0 +1,51 @@
+! SH2 __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! size		r6
+! s2_limb	r7
+
+	.text
+	.align 1
+	.global	___gmpn_submul_1
+___gmpn_submul_1:
+	mov	#0,r2		! cy_limb = 0
+	mov	#0,r0		! Keep r0 = 0 for entire loop
+	clrt
+
+Loop:	mov.l	@r5+,r3
+	dmulu.l	r3,r7
+	sts	macl,r1
+	addc	r2,r1		! lo_prod += old cy_limb
+	sts	mach,r2		! new cy_limb = hi_prod
+	mov.l	@r4,r3
+	addc	r0,r2		! cy_limb += T, T = 0
+	subc	r3,r1
+	addc	r0,r2		! cy_limb += T, T = 0
+	dt	r6
+	mov.l	r1,@r4
+	bf.s	Loop
+	add	#4,r4
+
+	rts
+	mov	r2,r0
diff --git a/gmp/mpn/sh/sub_n.asm b/gmp/mpn/sh/sub_n.asm
deleted file mode 100644
index 465bc806fa..0000000000
--- a/gmp/mpn/sh/sub_n.asm
+++ /dev/null
@@ -1,59 +0,0 @@
-dnl  SH mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
-dnl  difference in a third limb vector.
-
-dnl  Copyright 1995, 1997, 2000, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-C rp		r4
-C up		r5
-C vp		r6
-C n		r7
-
-changecom(blah)			C disable # to make all C comments below work
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
-	mov	#0,r3		C clear cy save reg
-
-L(top):	mov.l	@r5+,r1
-	mov.l	@r6+,r2
-	shlr	r3		C restore cy
-	subc	r2,r1
-	movt	r3		C save cy
-	mov.l	r1,@r4
-	dt	r7
-	bf.s	L(top)
-	 add	#4,r4
-
-	rts
-	mov	r3,r0		C return carry-out from most significant limb
-EPILOGUE()
diff --git a/gmp/mpn/sh/sub_n.s b/gmp/mpn/sh/sub_n.s
new file mode 100644
index 0000000000..b06e09a727
--- /dev/null
+++ b/gmp/mpn/sh/sub_n.s
@@ -0,0 +1,45 @@
+! SH __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+! difference in a third limb vector.
+
+! Copyright 1995, 1997, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! s2_ptr	r6
+! size		r7
+
+	.text
+	.align 2
+	.global	___gmpn_sub_n
+___gmpn_sub_n:
+	mov	#0,r3		! clear cy save reg
+
+Loop:	mov.l	@r5+,r1
+	mov.l	@r6+,r2
+	shlr	r3		! restore cy
+	subc	r2,r1
+	movt	r3		! save cy
+	mov.l	r1,@r4
+	dt	r7
+	bf.s	Loop
+	 add	#4,r4
+
+	rts
+	mov	r3,r0		! return carry-out from most sign. limb
diff --git a/gmp/mpn/sparc32/README b/gmp/mpn/sparc32/README
index f2dd1160f7..825a1ace8e 100644
--- a/gmp/mpn/sparc32/README
+++ b/gmp/mpn/sparc32/README
@@ -3,28 +3,17 @@ Copyright 1996, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/sparc32/add_n.asm b/gmp/mpn/sparc32/add_n.asm
index 8549195d92..7c8a9c41e8 100644
--- a/gmp/mpn/sparc32/add_n.asm
+++ b/gmp/mpn/sparc32/add_n.asm
@@ -4,30 +4,19 @@ dnl  sum in a third limb vector.
 dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/addmul_1.asm b/gmp/mpn/sparc32/addmul_1.asm
index 92d5d78d51..d73529e304 100644
--- a/gmp/mpn/sparc32/addmul_1.asm
+++ b/gmp/mpn/sparc32/addmul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC mpn_addmul_1 -- Multiply a limb vector with a limb and add the
 dnl  result to a second limb vector.
 
-dnl  Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/gmp-mparam.h b/gmp/mpn/sparc32/gmp-mparam.h
index a3bc612543..3bc6cd6db4 100644
--- a/gmp/mpn/sparc32/gmp-mparam.h
+++ b/gmp/mpn/sparc32/gmp-mparam.h
@@ -1,40 +1,30 @@
 /* SPARC v7 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* Generated by tuneup.c, 2002-03-13, gcc 2.95, Weitek 8701 */
 
-#define MUL_TOOM22_THRESHOLD              8
-#define MUL_TOOM33_THRESHOLD            466
+#define MUL_KARATSUBA_THRESHOLD           8
+#define MUL_TOOM3_THRESHOLD             466
 
 #define SQR_BASECASE_THRESHOLD            4
-#define SQR_TOOM2_THRESHOLD              16
+#define SQR_KARATSUBA_THRESHOLD          16
 #define SQR_TOOM3_THRESHOLD             258
 
 #define DIV_SB_PREINV_THRESHOLD           4
diff --git a/gmp/mpn/sparc32/lshift.asm b/gmp/mpn/sparc32/lshift.asm
index 8321343d6b..00004f87a7 100644
--- a/gmp/mpn/sparc32/lshift.asm
+++ b/gmp/mpn/sparc32/lshift.asm
@@ -3,30 +3,19 @@ dnl  SPARC mpn_lshift -- Shift a number left.
 dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/mul_1.asm b/gmp/mpn/sparc32/mul_1.asm
index 42b4168be1..147db11157 100644
--- a/gmp/mpn/sparc32/mul_1.asm
+++ b/gmp/mpn/sparc32/mul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC mpn_mul_1 -- Multiply a limb vector with a limb and store
 dnl  the result in a second limb vector.
 
-dnl  Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/rshift.asm b/gmp/mpn/sparc32/rshift.asm
index e1554766fe..26db4419d8 100644
--- a/gmp/mpn/sparc32/rshift.asm
+++ b/gmp/mpn/sparc32/rshift.asm
@@ -3,30 +3,19 @@ dnl  SPARC mpn_rshift -- Shift a number right.
 dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/sparc-defs.m4 b/gmp/mpn/sparc32/sparc-defs.m4
index 5a0d425204..10a4a86a63 100644
--- a/gmp/mpn/sparc32/sparc-defs.m4
+++ b/gmp/mpn/sparc32/sparc-defs.m4
@@ -3,36 +3,25 @@ divert(-1)
 dnl  m4 macros for SPARC assembler (32 and 64 bit).
 
 
-dnl  Copyright 2002, 2011, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl  Copyright 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
-changecom(;)	dnl cannot use default # since that's used in REGISTER decls
+changecom(!)
 
 
 dnl  Usage: REGISTER(reg,attr)
@@ -47,33 +36,4 @@ m4_assert_defined(`HAVE_REGISTER')
 `.register `$1',`$2'')')
 
 
-C Testing mechanism for running newer code on older processors
-ifdef(`FAKE_T3',`
-  include_mpn(`sparc64/ultrasparct3/missing.m4')
-',`
-  define(`addxccc',	``addxccc'	$1, $2, $3')
-  define(`addxc',	``addxc'	$1, $2, $3')
-  define(`umulxhi',	``umulxhi'	$1, $2, $3')
-  define(`lzcnt',	``lzd'	$1, $2')
-')
-
-dnl  Usage: LEA64(symbol,reg,pic_reg)
-dnl
-dnl  Use whatever 64-bit code sequence is appropriate to load "symbol" into
-dnl  register "reg", potentially using register "pic_reg" to perform the
-dnl  calculations.
-
-define(LEA64,
-m4_assert_numargs(3)
-m4_assert_defined(`HAVE_GOTDATA')
-`ifdef(`PIC',`
-	rd	%pc, %`$2'
-	sethi	%hi(_GLOBAL_OFFSET_TABLE_+4), %`$3'
-	add	%`$3', %lo(_GLOBAL_OFFSET_TABLE_+8), %`$3'
-	add	%`$2', %`$3', %`$3'
-	sethi	%hi(`$1'), %`$2'
-	or	%`$2', %lo(`$1'), %`$2'
-	ldx	[%`$3' + %`$2'], %`$2'',`
-	setx	`$1', %`$3', %`$2'')')
-
 divert
diff --git a/gmp/mpn/sparc32/sub_n.asm b/gmp/mpn/sparc32/sub_n.asm
index 24a576d82b..4fc759dcbf 100644
--- a/gmp/mpn/sparc32/sub_n.asm
+++ b/gmp/mpn/sparc32/sub_n.asm
@@ -4,30 +4,19 @@ dnl  store difference in a third limb vector.
 dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/submul_1.asm b/gmp/mpn/sparc32/submul_1.asm
index 73f9377006..9cde45fc3b 100644
--- a/gmp/mpn/sparc32/submul_1.asm
+++ b/gmp/mpn/sparc32/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC mpn_submul_1 -- Multiply a limb vector with a limb and subtract
 dnl  the result from a second limb vector.
 
-dnl  Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/udiv.asm b/gmp/mpn/sparc32/udiv.asm
index 23ab3de1db..fc520f67e2 100644
--- a/gmp/mpn/sparc32/udiv.asm
+++ b/gmp/mpn/sparc32/udiv.asm
@@ -4,30 +4,19 @@ dnl  This is for v7 CPUs with a floating-point unit.
 dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/udiv_nfp.asm b/gmp/mpn/sparc32/udiv_nfp.asm
index ebbb820639..a7513ead6c 100644
--- a/gmp/mpn/sparc32/udiv_nfp.asm
+++ b/gmp/mpn/sparc32/udiv_nfp.asm
@@ -4,30 +4,19 @@ dnl  This is for v7 CPUs without a floating-point unit.
 dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/ultrasparct1/add_n.asm b/gmp/mpn/sparc32/ultrasparct1/add_n.asm
deleted file mode 100644
index c781596dad..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/add_n.asm
+++ /dev/null
@@ -1,70 +0,0 @@
-dnl  SPARC T1 32-bit mpn_add_n.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-define(`rp',  %o0)
-define(`ap',  %o1)
-define(`bp',  %o2)
-define(`n',   %o3)
-define(`cy',  %o4)
-
-define(`i',   %o3)
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc)
-
-ASM_START()
-PROLOGUE(mpn_add_nc)
-	b	L(ent)
-	srl	cy, 0, cy	C strip any bogus high bits
-EPILOGUE()
-
-PROLOGUE(mpn_add_n)
-	mov	0, cy
-L(ent):	srl	n, 0, n		C strip any bogus high bits
-	sll	n, 2, n
-	add	ap, n, ap
-	add	bp, n, bp
-	add	rp, n, rp
-	neg	n, i
-
-L(top):	lduw	[ap+i], %g1
-	lduw	[bp+i], %g2
-	add	%g1, %g2, %g3
-	add	%g3, cy, %g3
-	stw	%g3, [rp+i]
-	add	i, 4, i
-	brnz	i, L(top)
-	srlx	%g3, 32, cy
-
-	retl
-	mov	cy, %o0		C return value
-EPILOGUE()
diff --git a/gmp/mpn/sparc32/ultrasparct1/addmul_1.asm b/gmp/mpn/sparc32/ultrasparct1/addmul_1.asm
deleted file mode 100644
index 89da186457..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/addmul_1.asm
+++ /dev/null
@@ -1,90 +0,0 @@
-dnl  SPARC T1 32-bit mpn_addmul_1.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:       24
-C UltraSPARC T2:       19
-C UltraSPARC T3:       19
-C UltraSPARC T4:       5
-
-C INPUT PARAMETERS
-define(`rp',	`%i0')
-define(`up',	`%i1')
-define(`n',	`%i2')
-define(`v0',	`%i3')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	save	%sp, -96, %sp
-	srl	n, 0, %o4
-	srl	v0, 0, %g1
-	subcc	%o4, 1, %o4
-	be	L(final_one)
-	 clr	%o5
-
-L(top):	lduw	[up+0], %l0
-	lduw	[rp+0], %l2
-	lduw	[up+4], %l1
-	lduw	[rp+4], %l3
-	mulx	%l0, %g1, %g3
-	add	up, 8, up
-	mulx	%l1, %g1, %o3
-	sub	%o4, 2, %o4
-	add	rp, 8, rp
-	add	%l2, %g3, %g3
-	add	%o5, %g3, %g3
-	stw	%g3, [rp-8]
-	srlx	%g3, 32, %o5
-	add	%l3, %o3, %o3
-	add	%o5, %o3, %o3
-	stw	%o3, [rp-4]
-	brgz	%o4, L(top)
-	 srlx	%o3, 32, %o5
-
-	brlz,pt	%o4, L(done)
-	 nop
-
-L(final_one):
-	lduw	[up+0], %l0
-	lduw	[rp+0], %l2
-	mulx	%l0, %g1, %g3
-	add	%l2, %g3, %g3
-	add	%o5, %g3, %g3
-	stw	%g3, [rp+0]
-	srlx	%g3, 32, %o5
-
-L(done):
-	ret
-	 restore %o5, 0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc32/ultrasparct1/gmp-mparam.h b/gmp/mpn/sparc32/ultrasparct1/gmp-mparam.h
deleted file mode 100644
index 6f9d5a44ca..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/gmp-mparam.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/* UltraSPARC T 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            3
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         10
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        21
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     22
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD           35
-
-#define MUL_TOOM22_THRESHOLD                14
-#define MUL_TOOM33_THRESHOLD                98
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               226
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     139
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      98
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     120
-
-#define SQR_BASECASE_THRESHOLD               6
-#define SQR_TOOM2_THRESHOLD                 34
-#define SQR_TOOM3_THRESHOLD                110
-#define SQR_TOOM4_THRESHOLD                178
-#define SQR_TOOM6_THRESHOLD                240
-#define SQR_TOOM8_THRESHOLD                333
-
-#define MULMID_TOOM42_THRESHOLD             22
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD               13
-
-#define MUL_FFT_MODF_THRESHOLD             280  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    280, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     13, 7}, {      7, 6}, \
-    {     17, 7}, {      9, 6}, {     20, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 9}, {      7, 8}, {     15, 7}, \
-    {     33, 8}, {     19, 7}, {     41, 8}, {     23, 7}, \
-    {     49, 8}, {     27, 9}, {     15, 8}, {     31, 7}, \
-    {     63, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47,10}, {     31, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255,10}, {     79, 9}, {    159, 8}, {    319,10}, \
-    {     95, 9}, {    191, 8}, {    383,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
-    {    159, 9}, {    319,10}, {    175,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 66
-#define MUL_FFT_THRESHOLD                 3712
-
-#define SQR_FFT_MODF_THRESHOLD             240  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    240, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
-    {     20, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
-    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
-    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 7}, {     47, 8}, {     27, 9}, \
-    {     15, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     63, 9}, {     39, 8}, \
-    {     79, 9}, {     47,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    143,10}, {     79, 9}, {    159, 8}, \
-    {    319, 9}, {    175,10}, {     95, 9}, {    191, 8}, \
-    {    383, 9}, {    207,11}, {     63,10}, {    127, 9}, \
-    {    255,10}, {    143, 9}, {    287,10}, {    159, 9}, \
-    {    319,10}, {    175,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 70
-#define SQR_FFT_THRESHOLD                 2624
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  51
-#define MULLO_MUL_N_THRESHOLD             6633
-
-#define DC_DIV_QR_THRESHOLD                 51
-#define DC_DIVAPPR_Q_THRESHOLD             202
-#define DC_BDIV_QR_THRESHOLD                47
-#define DC_BDIV_Q_THRESHOLD                124
-
-#define INV_MULMOD_BNM1_THRESHOLD           26
-#define INV_NEWTON_THRESHOLD               266
-#define INV_APPR_THRESHOLD                 222
-
-#define BINV_NEWTON_THRESHOLD              296
-#define REDC_1_TO_REDC_N_THRESHOLD          59
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1499
-#define MUPI_DIV_QR_THRESHOLD              116
-#define MU_BDIV_QR_THRESHOLD              1057
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define POWM_SEC_TABLE  6,35,213,724,2618
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                      84
-#define HGCD_APPR_THRESHOLD                101
-#define HGCD_REDUCE_THRESHOLD             1437
-#define GCD_DC_THRESHOLD                   372
-#define GCDEXT_DC_THRESHOLD                253
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               399
-#define SET_STR_PRECOMPUTE_THRESHOLD       885
-
-#define FAC_DSC_THRESHOLD                  179
-#define FAC_ODD_THRESHOLD                   29
diff --git a/gmp/mpn/sparc32/ultrasparct1/mul_1.asm b/gmp/mpn/sparc32/ultrasparct1/mul_1.asm
deleted file mode 100644
index 0239cd28cd..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/mul_1.asm
+++ /dev/null
@@ -1,83 +0,0 @@
-dnl  SPARC T1 32-bit mpn_mul_1.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:       20
-C UltraSPARC T2:       18
-C UltraSPARC T3:       18
-C UltraSPARC T4:       4
-
-C INPUT PARAMETERS
-define(`rp',	`%o0')
-define(`up',	`%o1')
-define(`n',	`%o2')
-define(`v0',	`%o3')
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	srl	n, 0, n
-	srl	v0, 0, v0
-	subcc	n, 1, n
-	be	L(final_one)
-	 clr	%o5
-
-L(top):	lduw	[up+0], %g1
-	lduw	[up+4], %g2
-	mulx	%g1, v0, %g3
-	add	up, 8, up
-	mulx	%g2, v0, %o4
-	sub	n, 2, n
-	add	rp, 8, rp
-	add	%o5, %g3, %g3
-	stw	%g3, [rp-8]
-	srlx	%g3, 32, %o5
-	add	%o5, %o4, %o4
-	stw	%o4, [rp-4]
-	brgz	n, L(top)
-	 srlx	%o4, 32, %o5
-
-	brlz,pt	n, L(done)
-	 nop
-
-L(final_one):
-	lduw	[up+0], %g1
-	mulx	%g1, v0, %g3
-	add	%o5, %g3, %g3
-	stw	%g3, [rp+0]
-	srlx	%g3, 32, %o5
-
-L(done):
-	retl
-	 mov	%o5, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc32/ultrasparct1/sqr_diagonal.asm b/gmp/mpn/sparc32/ultrasparct1/sqr_diagonal.asm
deleted file mode 100644
index 3b906ef202..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/sqr_diagonal.asm
+++ /dev/null
@@ -1,55 +0,0 @@
-dnl  SPARC T1 32-bit mpn_sqr_diagonal.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-define(`rp',	`%o0')
-define(`up',	`%o1')
-define(`n',	`%o2')
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
-	deccc	n			C n--
-	nop
-
-L(top):	lduw	[up+0], %g1
-	add	up, 4, up		C up++
-	mulx	%g1, %g1, %g3
-	stw	%g3, [rp+0]
-	srlx	%g3, 32, %g4
-	stw	%g4, [rp+4]
-	add	rp, 8, rp		C rp += 2
-	bnz	%icc, L(top)
-	deccc	n			C n--
-
-	retl
-	nop
-EPILOGUE()
diff --git a/gmp/mpn/sparc32/ultrasparct1/sub_n.asm b/gmp/mpn/sparc32/ultrasparct1/sub_n.asm
deleted file mode 100644
index 946bc3ff8e..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/sub_n.asm
+++ /dev/null
@@ -1,70 +0,0 @@
-dnl  SPARC T1 32-bit mpn_sub_n.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-define(`rp',  %o0)
-define(`ap',  %o1)
-define(`bp',  %o2)
-define(`n',   %o3)
-define(`cy',  %o4)
-
-define(`i',   %o3)
-
-MULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-PROLOGUE(mpn_sub_nc)
-	b	L(ent)
-	srl	cy, 0, cy	C strip any bogus high bits
-EPILOGUE()
-
-PROLOGUE(mpn_sub_n)
-	mov	0, cy
-L(ent):	srl	n, 0, n		C strip any bogus high bits
-	sll	n, 2, n
-	add	ap, n, ap
-	add	bp, n, bp
-	add	rp, n, rp
-	neg	n, i
-
-L(top):	lduw	[ap+i], %g1
-	lduw	[bp+i], %g2
-	sub	%g1, %g2, %g3
-	sub	%g3, cy, %g3
-	stw	%g3, [rp+i]
-	add	i, 4, i
-	brnz	i, L(top)
-	srlx	%g3, 63, cy
-
-	retl
-	mov	cy, %o0		C return value
-EPILOGUE()
diff --git a/gmp/mpn/sparc32/ultrasparct1/submul_1.asm b/gmp/mpn/sparc32/ultrasparct1/submul_1.asm
deleted file mode 100644
index 89200709c4..0000000000
--- a/gmp/mpn/sparc32/ultrasparct1/submul_1.asm
+++ /dev/null
@@ -1,91 +0,0 @@
-dnl  SPARC T1 32-bit mpn_submul_1.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2010, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:       24
-C UltraSPARC T2:       19
-C UltraSPARC T3:       19
-C UltraSPARC T4:       5
-
-C INPUT PARAMETERS
-define(`rp',	`%i0')
-define(`up',	`%i1')
-define(`n',	`%i2')
-define(`v0',	`%i3')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	save	%sp, -96, %sp
-	srl	n, 0, %o4
-	srl	v0, 0, %g1
-	subcc	%o4, 1, %o4
-	be	L(final_one)
-	 subcc	%g0, 0, %o5
-
-L(top):	lduw	[up+0], %l0
-	lduw	[rp+0], %l2
-	lduw	[up+4], %l1
-	lduw	[rp+4], %l3
-	mulx	%l0, %g1, %g3
-	add	up, 8, up
-	mulx	%l1, %g1, %o3
-	sub	%o4, 2, %o4
-	add	rp, 8, rp
-	addx	%o5, %g3, %g3
-	srlx	%g3, 32, %o5
-	subcc	%l2, %g3, %g3
-	stw	%g3, [rp-8]
-	addx	%o5, %o3, %o3
-	srlx	%o3, 32, %o5
-	subcc	%l3, %o3, %o3
-	brgz	%o4, L(top)
-	 stw	%o3, [rp-4]
-
-	brlz,pt	%o4, L(done)
-	 nop
-
-L(final_one):
-	lduw	[up+0], %l0
-	lduw	[rp+0], %l2
-	mulx	%l0, %g1, %g3
-	addx	%o5, %g3, %g3
-	srlx	%g3, 32, %o5
-	subcc	%l2, %g3, %g3
-	stw	%g3, [rp+0]
-
-L(done):
-	addx	%o5, 0, %o5
-	ret
-	 restore %o5, 0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc32/umul.asm b/gmp/mpn/sparc32/umul.asm
index 3a20b95cb5..80c82122d9 100644
--- a/gmp/mpn/sparc32/umul.asm
+++ b/gmp/mpn/sparc32/umul.asm
@@ -3,30 +3,19 @@ dnl  SPARC mpn_umul_ppmm -- support for longlong.h for non-gcc.
 dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v8/addmul_1.asm b/gmp/mpn/sparc32/v8/addmul_1.asm
index 0052092784..6e5e78865b 100644
--- a/gmp/mpn/sparc32/v8/addmul_1.asm
+++ b/gmp/mpn/sparc32/v8/addmul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC v8 mpn_addmul_1 -- Multiply a limb vector with a limb and
 dnl  add the result to a second limb vector.
 
-dnl  Copyright 1992-1995, 2000 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v8/gmp-mparam.h b/gmp/mpn/sparc32/v8/gmp-mparam.h
index e57897b439..f042c19e5e 100644
--- a/gmp/mpn/sparc32/v8/gmp-mparam.h
+++ b/gmp/mpn/sparc32/v8/gmp-mparam.h
@@ -1,44 +1,34 @@
 /* SPARC v8 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 /* Generated by tuneup.c, 2004-02-07, gcc 2.95 */
 
-#define MUL_TOOM22_THRESHOLD             10
-#define MUL_TOOM33_THRESHOLD             65
+#define MUL_KARATSUBA_THRESHOLD          10
+#define MUL_TOOM3_THRESHOLD              65
 
 #define SQR_BASECASE_THRESHOLD            4
-#define SQR_TOOM2_THRESHOLD              18
+#define SQR_KARATSUBA_THRESHOLD          18
 #define SQR_TOOM3_THRESHOLD              65
 
 #define DIV_SB_PREINV_THRESHOLD           5
diff --git a/gmp/mpn/sparc32/v8/mul_1.asm b/gmp/mpn/sparc32/v8/mul_1.asm
index e26c853aed..d428debf15 100644
--- a/gmp/mpn/sparc32/v8/mul_1.asm
+++ b/gmp/mpn/sparc32/v8/mul_1.asm
@@ -4,30 +4,19 @@ dnl  store the product in a second limb vector.
 dnl  Copyright 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v8/submul_1.asm b/gmp/mpn/sparc32/v8/submul_1.asm
index 187314ecef..4dde012808 100644
--- a/gmp/mpn/sparc32/v8/submul_1.asm
+++ b/gmp/mpn/sparc32/v8/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC v8 mpn_submul_1 -- Multiply a limb vector with a limb and
 dnl  subtract the result from a second limb vector.
 
-dnl  Copyright 1992-1994, 2000 Free Software Foundation, Inc.
+dnl  Copyright 1992, 1993, 1994, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v8/supersparc/gmp-mparam.h b/gmp/mpn/sparc32/v8/supersparc/gmp-mparam.h
index 1ac9239e3c..feb90ef408 100644
--- a/gmp/mpn/sparc32/v8/supersparc/gmp-mparam.h
+++ b/gmp/mpn/sparc32/v8/supersparc/gmp-mparam.h
@@ -1,44 +1,34 @@
 /* SuperSPARC gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 /* Generated by tuneup.c, 2004-02-10, gcc 3.3 */
 
-#define MUL_TOOM22_THRESHOLD             14
-#define MUL_TOOM33_THRESHOLD             81
+#define MUL_KARATSUBA_THRESHOLD          14
+#define MUL_TOOM3_THRESHOLD              81
 
 #define SQR_BASECASE_THRESHOLD            5
-#define SQR_TOOM2_THRESHOLD              28
+#define SQR_KARATSUBA_THRESHOLD          28
 #define SQR_TOOM3_THRESHOLD              86
 
 #define DIV_SB_PREINV_THRESHOLD           0  /* always */
diff --git a/gmp/mpn/sparc32/v8/supersparc/udiv.asm b/gmp/mpn/sparc32/v8/supersparc/udiv.asm
index 12f66ce6a2..2ce3b8f15c 100644
--- a/gmp/mpn/sparc32/v8/supersparc/udiv.asm
+++ b/gmp/mpn/sparc32/v8/supersparc/udiv.asm
@@ -5,30 +5,19 @@ dnl  udiv instruction.
 dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v8/udiv.asm b/gmp/mpn/sparc32/v8/udiv.asm
index 12f66ce6a2..2ce3b8f15c 100644
--- a/gmp/mpn/sparc32/v8/udiv.asm
+++ b/gmp/mpn/sparc32/v8/udiv.asm
@@ -5,30 +5,19 @@ dnl  udiv instruction.
 dnl  Copyright 1993, 1994, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v8/umul.asm b/gmp/mpn/sparc32/v8/umul.asm
index 1a2e84b1f6..569a4e8dd3 100644
--- a/gmp/mpn/sparc32/v8/umul.asm
+++ b/gmp/mpn/sparc32/v8/umul.asm
@@ -3,30 +3,19 @@ dnl  SPARC v8 mpn_umul_ppmm -- support for longlong.h for non-gcc.
 dnl  Copyright 1995, 1996, 2000 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v9/add_n.asm b/gmp/mpn/sparc32/v9/add_n.asm
index 7bd5974fd3..a21cf10d55 100644
--- a/gmp/mpn/sparc32/v9/add_n.asm
+++ b/gmp/mpn/sparc32/v9/add_n.asm
@@ -4,30 +4,19 @@ dnl  sum in a third limb vector.
 dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v9/addmul_1.asm b/gmp/mpn/sparc32/v9/addmul_1.asm
index 2adf7a8a2f..18b9a72d1a 100644
--- a/gmp/mpn/sparc32/v9/addmul_1.asm
+++ b/gmp/mpn/sparc32/v9/addmul_1.asm
@@ -4,30 +4,19 @@ dnl  the result to a second limb vector.
 dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/sparc32/v9/gmp-mparam.h b/gmp/mpn/sparc32/v9/gmp-mparam.h
index f909e2cf18..5d06398192 100644
--- a/gmp/mpn/sparc32/v9/gmp-mparam.h
+++ b/gmp/mpn/sparc32/v9/gmp-mparam.h
@@ -1,204 +1,73 @@
 /* SPARC v9 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2009-2011, 2014 Free Software
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1593 MHz ultrasparc3 running Solaris 10 (swift.nada.kth.se) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-16, gcc 3.4 */
-
-#define DIVREM_1_NORM_THRESHOLD              3
-#define DIVREM_1_UNNORM_THRESHOLD            4
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         12
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     32
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              4
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                43
-#define MUL_TOOM44_THRESHOLD               126
-#define MUL_TOOM6H_THRESHOLD               161
-#define MUL_TOOM8H_THRESHOLD               208
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      80
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      85
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      55
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      72
-
-#define SQR_BASECASE_THRESHOLD               4
-#define SQR_TOOM2_THRESHOLD                 64
-#define SQR_TOOM3_THRESHOLD                 85
-#define SQR_TOOM4_THRESHOLD                152
-#define SQR_TOOM6_THRESHOLD                185
-#define SQR_TOOM8_THRESHOLD                324
-
-#define MULMID_TOOM42_THRESHOLD             64
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               16
-
-#define MUL_FFT_MODF_THRESHOLD             288  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    288, 5}, {      9, 4}, {     19, 5}, {     11, 6}, \
-    {      6, 5}, {     14, 6}, {      8, 5}, {     17, 6}, \
-    {      9, 5}, {     20, 6}, {     13, 7}, {      7, 6}, \
-    {     16, 7}, {      9, 6}, {     19, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 8}, {     11, 7}, {     23, 9}, \
-    {      7, 8}, {     15, 7}, {     31, 8}, {     19, 7}, \
-    {     39, 8}, {     27, 9}, {     15, 8}, {     31, 7}, \
-    {     63, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47,10}, {     31, 9}, {     71, 8}, \
-    {    143, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    135, 8}, {    271, 9}, \
-    {    143, 8}, {    287,10}, {     79, 9}, {    175,10}, \
-    {     95, 9}, {    191, 8}, {    383,10}, {    111,11}, \
-    {     63,10}, {    143, 9}, {    287, 8}, {    575,10}, \
-    {    175,11}, {     95,10}, {    191, 9}, {    415, 8}, \
-    {    831,12}, {     63,11}, {    127,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    351, 9}, {    703,11}, \
-    {    191,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447, 9}, {    895, 8}, {   1791,12}, {    127,11}, \
-    {    287,10}, {    607, 9}, {   1215, 8}, {   2431,11}, \
-    {    319, 9}, {   1279,11}, {    351,12}, {    191,11}, \
-    {    415,10}, {    831,11}, {    447,10}, {    895, 9}, \
-    {   1791,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    575,10}, {   1151,11}, {    607,12}, {    319,11}, \
-    {    703,12}, {    383,11}, {    831,12}, {    447,11}, \
-    {    895,10}, {   1791,11}, {    959,13}, {    255,12}, \
-    {    575,11}, {   1215,10}, {   2431,12}, {    703,13}, \
-    {    383,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1087,11}, {   2175,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1407,11}, {   2943,13}, {    895,12}, \
-    {   1919,14}, {    511,13}, {   1151,12}, {   2431,13}, \
-    {   1407,14}, {    767,13}, {   1791,15}, {    511,14}, \
-    {   1023,13}, {   2431,14}, {   1279,13}, {   2943,12}, \
-    {   5887,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 143
-#define MUL_FFT_THRESHOLD                 2240
-
-#define SQR_FFT_MODF_THRESHOLD             244  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    244, 5}, {      8, 4}, {     17, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     17, 7}, {      9, 6}, \
-    {     20, 7}, {     11, 6}, {     23, 7}, {     13, 8}, \
-    {      7, 7}, {     19, 8}, {     11, 7}, {     25, 9}, \
-    {      7, 8}, {     15, 7}, {     33, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 9}, {     15, 8}, {     39, 9}, \
-    {     23,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
-    {     47,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     71, 8}, {    143, 7}, {    287, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    143, 8}, {    287,10}, {     79, 9}, \
-    {    159, 8}, {    319, 9}, {    175, 8}, {    351, 7}, \
-    {    703,10}, {     95, 9}, {    191, 8}, {    383, 9}, \
-    {    207, 8}, {    415, 9}, {    223,11}, {     63,10}, \
-    {    127, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
-    {    575,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351, 8}, {    703,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207, 9}, {    415, 8}, {    831,10}, \
-    {    223,12}, {     63,11}, {    127,10}, {    271, 9}, \
-    {    543,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    351, 9}, {    703, 8}, \
-    {   1407,11}, {    191,10}, {    415, 9}, {    831,11}, \
-    {    223,10}, {    447, 9}, {    895,10}, {    479,12}, \
-    {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
-    {    575,11}, {    319,10}, {    639,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    415,10}, {    831,11}, \
-    {    447,10}, {    895, 9}, {   1791,13}, {    127,12}, \
-    {    255,11}, {    575,12}, {    319,11}, {    703,10}, \
-    {   1407,12}, {    383,11}, {    831,12}, {    447,11}, \
-    {    959,10}, {   1919, 9}, {   3839,13}, {    255,12}, \
-    {    575,11}, {   1151,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1215,11}, {   2431,13}, {    639,12}, {   1407,13}, \
-    {    767,12}, {   1599,13}, {    895,12}, {   1919,14}, \
-    {    511,13}, {   1151,12}, {   2431,13}, {   1407,12}, \
-    {   2815,14}, {    767,13}, {   1535,12}, {   3071,13}, \
-    {   1919,15}, {    511,14}, {   1023,13}, {   2431,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 153
-#define SQR_FFT_THRESHOLD                 2112
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                 144
-#define MULLO_MUL_N_THRESHOLD             4292
-
-#define DC_DIV_QR_THRESHOLD                 74
-#define DC_DIVAPPR_Q_THRESHOLD             406
-#define DC_BDIV_QR_THRESHOLD                63
-#define DC_BDIV_Q_THRESHOLD                363
-
-#define INV_MULMOD_BNM1_THRESHOLD          108
-#define INV_NEWTON_THRESHOLD               351
-#define INV_APPR_THRESHOLD                 303
-
-#define BINV_NEWTON_THRESHOLD              354
-#define REDC_1_TO_REDC_N_THRESHOLD          61
-
-#define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD            1099
-#define MUPI_DIV_QR_THRESHOLD              118
-#define MU_BDIV_QR_THRESHOLD               807
-#define MU_BDIV_Q_THRESHOLD                979
-
-#define POWM_SEC_TABLE  3,22,127,624,779,2351
-
-#define MATRIX22_STRASSEN_THRESHOLD          7
-#define HGCD_THRESHOLD                      90
-#define HGCD_APPR_THRESHOLD                123
-#define HGCD_REDUCE_THRESHOLD             1494
-#define GCD_DC_THRESHOLD                   283
-#define GCDEXT_DC_THRESHOLD                192
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               290
-#define SET_STR_PRECOMPUTE_THRESHOLD       634
-
-#define FAC_DSC_THRESHOLD                  156
-#define FAC_ODD_THRESHOLD                   25
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2009-02-12, gcc 2.95 */
+
+#define MUL_KARATSUBA_THRESHOLD          28
+#define MUL_TOOM3_THRESHOLD              97
+#define MUL_TOOM44_THRESHOLD            136
+
+#define SQR_BASECASE_THRESHOLD            8
+#define SQR_KARATSUBA_THRESHOLD          60
+#define SQR_TOOM3_THRESHOLD             138
+#define SQR_TOOM4_THRESHOLD             278
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD             111
+#define MULLOW_MUL_N_THRESHOLD          434
+
+#define DIV_SB_PREINV_THRESHOLD           7
+#define DIV_DC_THRESHOLD                122
+#define POWM_THRESHOLD                  154
+
+#define MATRIX22_STRASSEN_THRESHOLD      12
+#define HGCD_THRESHOLD                  155
+#define GCD_DC_THRESHOLD                614
+#define GCDEXT_DC_THRESHOLD             438
+#define JACOBI_BASE_METHOD                2
+
+#define DIVREM_1_NORM_THRESHOLD           5
+#define DIVREM_1_UNNORM_THRESHOLD        14
+#define MOD_1_NORM_THRESHOLD              4
+#define MOD_1_UNNORM_THRESHOLD            5
+#define MOD_1_1_THRESHOLD                 7
+#define MOD_1_2_THRESHOLD                 8
+#define MOD_1_4_THRESHOLD                16
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD      MP_SIZE_T_MAX  /* never */
+
+#define GET_STR_DC_THRESHOLD             12
+#define GET_STR_PRECOMPUTE_THRESHOLD     19
+#define SET_STR_DC_THRESHOLD            802
+#define SET_STR_PRECOMPUTE_THRESHOLD   1647
+
+#define MUL_FFT_TABLE  { 304, 736, 1152, 3584, 10240, 24576, 98304, 393216, 0 }
+#define MUL_FFT_MODF_THRESHOLD          264
+#define MUL_FFT_THRESHOLD              2304
+
+#define SQR_FFT_TABLE  { 336, 800, 1408, 3584, 10240, 24576, 98304, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD          248
+#define SQR_FFT_THRESHOLD              2304
diff --git a/gmp/mpn/sparc32/v9/mul_1.asm b/gmp/mpn/sparc32/v9/mul_1.asm
index 40aeffad4f..881f46fb62 100644
--- a/gmp/mpn/sparc32/v9/mul_1.asm
+++ b/gmp/mpn/sparc32/v9/mul_1.asm
@@ -4,30 +4,19 @@ dnl  the result in a second limb vector.
 dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/sparc32/v9/sqr_diagonal.asm b/gmp/mpn/sparc32/v9/sqr_diagonal.asm
index e024279849..e4a78c5de7 100644
--- a/gmp/mpn/sparc32/v9/sqr_diagonal.asm
+++ b/gmp/mpn/sparc32/v9/sqr_diagonal.asm
@@ -3,30 +3,19 @@ dnl  SPARC v9 32-bit mpn_sqr_diagonal.
 dnl  Copyright 2001, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v9/sub_n.asm b/gmp/mpn/sparc32/v9/sub_n.asm
index 636c73bf35..cea474326c 100644
--- a/gmp/mpn/sparc32/v9/sub_n.asm
+++ b/gmp/mpn/sparc32/v9/sub_n.asm
@@ -4,30 +4,19 @@ dnl  store difference in a third limb vector.
 dnl  Copyright 2001 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc32/v9/submul_1.asm b/gmp/mpn/sparc32/v9/submul_1.asm
index 92d0ce7db9..e5823b1e4b 100644
--- a/gmp/mpn/sparc32/v9/submul_1.asm
+++ b/gmp/mpn/sparc32/v9/submul_1.asm
@@ -4,30 +4,19 @@ dnl  subtract the result from a second limb vector.
 dnl  Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/sparc32/v9/udiv.asm b/gmp/mpn/sparc32/v9/udiv.asm
index 61dde97a66..0957b8c225 100644
--- a/gmp/mpn/sparc32/v9/udiv.asm
+++ b/gmp/mpn/sparc32/v9/udiv.asm
@@ -3,30 +3,19 @@ dnl  SPARC v9 32-bit mpn_udiv_qrnnd - division support for longlong.h.
 dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/mpn/sparc64/README b/gmp/mpn/sparc64/README
index e2c051a02b..19072996de 100644
--- a/gmp/mpn/sparc64/README
+++ b/gmp/mpn/sparc64/README
@@ -1,30 +1,19 @@
-Copyright 1997, 1999-2002 Free Software Foundation, Inc.
+Copyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -76,7 +65,7 @@ Integer conditional move instructions cannot dual-issue with other integer
 instructions.  No conditional move can issue 1-5 cycles after a load.  (This
 might have been fixed for UltraSPARC-3.)
 
-The UltraSPARC-3 pipeline is very simular to the one of UltraSPARC-1/2 , but is
+The UltraSPARC-3 pipeline is very simular to he one of UltraSPARC-1/2 , but is
 somewhat slower.  Branches execute slower, and there may be other new stalls.
 But integer multiply doesn't stall the entire CPU and also has a much lower
 latency.  But it's still not pipelined, and thus useless for our needs.
diff --git a/gmp/mpn/sparc64/ultrasparc1234/add_n.asm b/gmp/mpn/sparc64/add_n.asm
index 92374d2552..c3e5b46ddf 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/add_n.asm
+++ b/gmp/mpn/sparc64/add_n.asm
@@ -1,33 +1,22 @@
 dnl  SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
 dnl  store sum in a third limb vector.
 
-dnl  Copyright 2001-2003, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -43,19 +32,19 @@ C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
 C Therefore, it seems futile to try to optimize this any further...
 
 C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`vp', `%i2')
-define(`n',  `%i3')
-
-define(`u0', `%l0')
-define(`u1', `%l2')
-define(`u2', `%l4')
-define(`u3', `%l6')
-define(`v0', `%l1')
-define(`v1', `%l3')
-define(`v2', `%l5')
-define(`v3', `%l7')
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
 
 define(`cy',`%i4')
 
@@ -65,24 +54,14 @@ define(`fmnop',`fmuld %f0,%f0,%f4')	dnl  A quasi nop running in the FM pipe
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_add_nc)
-	save	%sp,-160,%sp
-
-	fitod	%f0,%f0		C make sure f0 contains small, quiet number
-	subcc	n,4,%g0
-	bl,pn	%xcc,.Loop0
-	nop
-	b,a	L(com)
-EPILOGUE()
-
 PROLOGUE(mpn_add_n)
 	save	%sp,-160,%sp
 
 	fitod	%f0,%f0		C make sure f0 contains small, quiet number
 	subcc	n,4,%g0
-	bl,pn	%xcc,.Loop0
+	bl,pn	%icc,.Loop0
 	mov	0,cy
-L(com):
+
 	ldx	[up+0],u0
 	ldx	[vp+0],v0
 	add	up,32,up
@@ -95,15 +74,15 @@ L(com):
 	ldx	[vp-8],v3
 	subcc	n,8,n
 	add	u0,v0,%g1	C main add
-	add	%g1,cy,%g5	C carry add
+	add	%g1,cy,%g4	C carry add
 	or	u0,v0,%g2
-	bl,pn	%xcc,.Lend4567
+	bl,pn	%icc,.Lend4567
 	fanop
 	b,a	.Loop
 
 	.align	16
 C START MAIN LOOP
-.Loop:	andn	%g2,%g5,%g2
+.Loop:	andn	%g2,%g4,%g2
 	and	u0,v0,%g3
 	ldx	[up+0],u0
 	fanop
@@ -115,15 +94,15 @@ C --
 C --
 	srlx	%g2,63,cy
 	add	u1,v1,%g1
-	stx	%g5,[rp+0]
+	stx	%g4,[rp+0]
 	fanop
 C --
-	add	%g1,cy,%g5
+	add	%g1,cy,%g4
 	or	u1,v1,%g2
 	fmnop
 	fanop
 C --
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u1,v1,%g3
 	ldx	[up-24],u1
 	fanop
@@ -135,15 +114,15 @@ C --
 C --
 	srlx	%g2,63,cy
 	add	u2,v2,%g1
-	stx	%g5,[rp+8]
+	stx	%g4,[rp+8]
 	fanop
 C --
-	add	%g1,cy,%g5
+	add	%g1,cy,%g4
 	or	u2,v2,%g2
 	fmnop
 	fanop
 C --
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u2,v2,%g3
 	ldx	[up-16],u2
 	fanop
@@ -155,15 +134,15 @@ C --
 C --
 	srlx	%g2,63,cy
 	add	u3,v3,%g1
-	stx	%g5,[rp-16]
+	stx	%g4,[rp-16]
 	fanop
 C --
-	add	%g1,cy,%g5
+	add	%g1,cy,%g4
 	or	u3,v3,%g2
 	fmnop
 	fanop
 C --
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u3,v3,%g3
 	ldx	[up-8],u3
 	fanop
@@ -175,48 +154,48 @@ C --
 C --
 	srlx	%g2,63,cy
 	add	u0,v0,%g1
-	stx	%g5,[rp-8]
+	stx	%g4,[rp-8]
 	fanop
 C --
-	add	%g1,cy,%g5
+	add	%g1,cy,%g4
 	or	u0,v0,%g2
-	bge,pt	%xcc,.Loop
+	bge,pt	%icc,.Loop
 	fanop
 C END MAIN LOOP
 .Lend4567:
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u0,v0,%g3
 	or	%g3,%g2,%g2
 	srlx	%g2,63,cy
 	add	u1,v1,%g1
-	stx	%g5,[rp+0]
-	add	%g1,cy,%g5
+	stx	%g4,[rp+0]
+	add	%g1,cy,%g4
 	or	u1,v1,%g2
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u1,v1,%g3
 	or	%g3,%g2,%g2
 	srlx	%g2,63,cy
 	add	u2,v2,%g1
-	stx	%g5,[rp+8]
-	add	%g1,cy,%g5
+	stx	%g4,[rp+8]
+	add	%g1,cy,%g4
 	or	u2,v2,%g2
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u2,v2,%g3
 	or	%g3,%g2,%g2
 	add	rp,32,rp
 	srlx	%g2,63,cy
 	add	u3,v3,%g1
-	stx	%g5,[rp-16]
-	add	%g1,cy,%g5
+	stx	%g4,[rp-16]
+	add	%g1,cy,%g4
 	or	u3,v3,%g2
-	andn	%g2,%g5,%g2
+	andn	%g2,%g4,%g2
 	and	u3,v3,%g3
 	or	%g3,%g2,%g2
 	srlx	%g2,63,cy
-	stx	%g5,[rp-8]
+	stx	%g4,[rp-8]
 
 	addcc	n,4,n
-	bz,pn	%xcc,.Lret
+	bz,pn	%icc,.Lret
 	fanop
 
 .Loop0:	ldx	[up],u0
@@ -227,15 +206,15 @@ C END MAIN LOOP
 	subcc	n,1,n
 	add	u0,v0,%g1
 	or	u0,v0,%g2
-	add	%g1,cy,%g5
+	add	%g1,cy,%g4
 	and	u0,v0,%g3
-	andn	%g2,%g5,%g2
-	stx	%g5,[rp-8]
+	andn	%g2,%g4,%g2
+	stx	%g4,[rp-8]
 	or	%g3,%g2,%g2
-	bnz,pt	%xcc,.Loop0
+	bnz,pt	%icc,.Loop0
 	srlx	%g2,63,cy
 
 .Lret:	mov	cy,%i0
 	ret
 	restore
-EPILOGUE()
+EPILOGUE(mpn_add_n)
diff --git a/gmp/mpn/sparc64/ultrasparc1234/addmul_1.asm b/gmp/mpn/sparc64/addmul_1.asm
index 48a94146ff..bd83c6562c 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/addmul_1.asm
+++ b/gmp/mpn/sparc64/addmul_1.asm
@@ -1,33 +1,23 @@
 dnl  SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
 dnl  the result to a second limb vector.
 
-dnl  Copyright 1998, 2000-2004 Free Software Foundation, Inc.
+dnl  Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl  Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -148,7 +138,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	fmuld	u32, v00, r32
 	fmuld	u00, v48, p48
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .L_two_or_more
+	bnz,pt	%icc, .L_two_or_more
 	fmuld	u32, v16, r48
 
 .L_one:
@@ -226,7 +216,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	faddd	p16, r80, a16
 	fmuld	u00, v48, p48
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .L_three_or_more
+	bnz,pt	%icc, .L_three_or_more
 	fmuld	u32, v16, r48
 
 .L_two:
@@ -308,7 +298,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	faddd	p16, r80, a16
 	fmuld	u00, v48, p48
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .L_four_or_more
+	bnz,pt	%icc, .L_four_or_more
 	fmuld	u32, v16, r48
 
 .L_three:
@@ -396,7 +386,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	fmuld	u00, v48, p48
 	add	cy, %g5, %o4		C x = prev(i00) + cy
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .Loop
+	bnz,pt	%icc, .Loop
 	fmuld	u32, v16, r48
 
 .L_four:
@@ -473,7 +463,7 @@ C 12
 C 13
 	add	cy, %g5, %o4		C x = prev(i00) + cy
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .Loop
+	bnz,pt	%icc, .Loop
 	fmuld	u32, v16, r48
 C END MAIN LOOP
 
diff --git a/gmp/mpn/sparc64/ultrasparc1234/addmul_2.asm b/gmp/mpn/sparc64/addmul_2.asm
index 37674d7423..65efb5159a 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/addmul_2.asm
+++ b/gmp/mpn/sparc64/addmul_2.asm
@@ -4,30 +4,19 @@ dnl  number and add the result to a n limb vector.
 dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/sparc64/copyd.asm b/gmp/mpn/sparc64/copyd.asm
index ab105d39c7..8a73dba8f0 100644
--- a/gmp/mpn/sparc64/copyd.asm
+++ b/gmp/mpn/sparc64/copyd.asm
@@ -1,41 +1,27 @@
 dnl  SPARC v9 mpn_copyd -- Copy a limb vector, decrementing.
 
-dnl  Copyright 1999-2003 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC 1&2:	 2
-C UltraSPARC 3:		 2.5
-C UltraSPARC T1:	17
-C UltraSPARC T3:	 6
-C UltraSPARC T4/T5:	 2
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:	      2.5
 
 C INPUT PARAMETERS
 C rptr	%o0
@@ -50,7 +36,7 @@ PROLOGUE(mpn_copyd)
 	add	%g1,%o0,%o0
 	add	%g1,%o1,%o1
 	addcc	%o2,-8,%o2
-	bl,pt	%xcc,L(end01234567)
+	bl,pt	%icc,L(end01234567)
 	nop
 L(loop1):
 	ldx	[%o1-8],%g1
@@ -71,18 +57,18 @@ L(loop1):
 	stx	%o4,[%o0-56]
 	stx	%o5,[%o0-64]
 	addcc	%o2,-8,%o2
-	bge,pt	%xcc,L(loop1)
+	bge,pt	%icc,L(loop1)
 	add	%o0,-64,%o0
 L(end01234567):
 	addcc	%o2,8,%o2
-	bz,pn	%xcc,L(end)
+	bz,pn	%icc,L(end)
 	nop
 L(loop2):
 	ldx	[%o1-8],%g1
 	add	%o1,-8,%o1
 	addcc	%o2,-1,%o2
 	stx	%g1,[%o0-8]
-	bg,pt	%xcc,L(loop2)
+	bg,pt	%icc,L(loop2)
 	add	%o0,-8,%o0
 L(end):	retl
 	nop
diff --git a/gmp/mpn/sparc64/copyi.asm b/gmp/mpn/sparc64/copyi.asm
index 45663dc2a3..3158357c0b 100644
--- a/gmp/mpn/sparc64/copyi.asm
+++ b/gmp/mpn/sparc64/copyi.asm
@@ -1,41 +1,27 @@
 dnl  SPARC v9 mpn_copyi -- Copy a limb vector, incrementing.
 
-dnl  Copyright 1999-2003 Free Software Foundation, Inc.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C		   cycles/limb
-C UltraSPARC 1&2:	 2
-C UltraSPARC 3:		 2.5
-C UltraSPARC T1:	17
-C UltraSPARC T3:	 6
-C UltraSPARC T4/T5:	 2
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:	      2.5
 
 C INPUT PARAMETERS
 C rptr	%o0
@@ -47,7 +33,7 @@ ASM_START()
 	REGISTER(%g3,#scratch)
 PROLOGUE(mpn_copyi)
 	addcc	%o2,-8,%o2
-	bl,pt	%xcc,L(end01234567)
+	bl,pt	%icc,L(end01234567)
 	nop
 L(loop1):
 	ldx	[%o1+0],%g1
@@ -68,18 +54,18 @@ L(loop1):
 	stx	%o4,[%o0+48]
 	stx	%o5,[%o0+56]
 	addcc	%o2,-8,%o2
-	bge,pt	%xcc,L(loop1)
+	bge,pt	%icc,L(loop1)
 	add	%o0,64,%o0
 L(end01234567):
 	addcc	%o2,8,%o2
-	bz,pn	%xcc,L(end)
+	bz,pn	%icc,L(end)
 	nop
 L(loop2):
 	ldx	[%o1+0],%g1
 	add	%o1,8,%o1
 	addcc	%o2,-1,%o2
 	stx	%g1,[%o0+0]
-	bg,pt	%xcc,L(loop2)
+	bg,pt	%icc,L(loop2)
 	add	%o0,8,%o0
 L(end):	retl
 	nop
diff --git a/gmp/mpn/sparc64/dive_1.c b/gmp/mpn/sparc64/dive_1.c
index c3fbf01b14..6f3d7c447c 100644
--- a/gmp/mpn/sparc64/dive_1.c
+++ b/gmp/mpn/sparc64/dive_1.c
@@ -9,28 +9,17 @@ Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/sparc64/divrem_1.c b/gmp/mpn/sparc64/divrem_1.c
index 531494a94f..06de9a6040 100644
--- a/gmp/mpn/sparc64/divrem_1.c
+++ b/gmp/mpn/sparc64/divrem_1.c
@@ -1,33 +1,22 @@
 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
 
-Copyright 1991, 1993, 1994, 1996, 1998-2001, 2003 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/sparc64/gcd_1.asm b/gmp/mpn/sparc64/gcd_1.asm
deleted file mode 100644
index e4d8de6a28..0000000000
--- a/gmp/mpn/sparc64/gcd_1.asm
+++ /dev/null
@@ -1,135 +0,0 @@
-dnl  SPARC64 mpn_gcd_1.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for SPARC by Torbjörn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		  cycles/bit (approx)
-C UltraSPARC 1&2:	 5.1
-C UltraSPARC 3:		 5.0
-C UltraSPARC T1:	11.4
-C UltraSPARC T3:	10
-C UltraSPARC T4:	 6
-C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
-
-C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-
-deflit(MAXSHIFT, 7)
-deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
-
-	RODATA
-	TYPE(ctz_table,object)
-ctz_table:
-	.byte	MAXSHIFT
-forloop(i,1,MASK,
-`	.byte	m4_count_trailing_zeros(i)
-')
-	SIZE(ctz_table,.-ctz_table)
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 14)
-
-C INPUT PARAMETERS
-define(`up',    `%i0')
-define(`n',     `%i1')
-define(`v0',    `%i2')
-
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_gcd_1)
-	save	%sp, -192, %sp
-	ldx	[up+0], %g1		C U low limb
-	mov	-1, %i4
-	or	v0, %g1, %g2		C x | y
-
-L(twos):
-	inc	%i4
-	andcc	%g2, 1, %g0
-	bz,a	%xcc, L(twos)
-	 srlx	%g2, 1, %g2
-
-L(divide_strip_y):
-	andcc	v0, 1, %g0
-	bz,a	%xcc, L(divide_strip_y)
-	 srlx	v0, 1, v0
-
-	cmp	n, 1			C if n > 1 we need
-	bnz	%xcc, L(bmod)		C to call bmod_1
-	 nop
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	srlx	%g1, BMOD_THRES_LOG2, %g2
-	cmp	%g2, v0
-	bleu	%xcc, L(noreduce)
-	 mov	%g1, %o0
-
-L(bmod):
-	mov	up, %o0
-	mov	n, %o1
-	mov	v0, %o2
-	call	mpn_modexact_1c_odd
-	 mov	0, %o3
-
-L(noreduce):
-
-	LEA64(ctz_table, i5, g4)
-
-	cmp	%o0, 0
-	bnz	%xcc, L(mid)
-	 and	%o0, MASK, %g3		C
-
-	return	%i7+8
-	 sllx	%o2, %o4, %o0		C CAUTION: v0 alias for o2
-
-	ALIGN(16)
-L(top):	movcc	%xcc, %l4, v0		C v = min(u,v)
-	movcc	%xcc, %l2, %o0		C u = |v - u]
-L(mid):	ldub	[%i5+%g3], %g5		C
-	brz,a,pn %g3, L(shift_alot)	C
-	 srlx	%o0, MAXSHIFT, %o0
-	srlx	%o0, %g5, %l4		C new u, odd
-	subcc	v0, %l4, %l2		C v - u, set flags for branch and movcc
-	sub	%l4, v0, %o0		C u - v
-	bnz,pt	%xcc, L(top)		C
-	 and	%l2, MASK, %g3		C extract low MAXSHIFT bits from (v-u)
-
-	return	%i7+8
-	 sllx	%o2, %o4, %o0		C CAUTION: v0 alias for o2
-
-L(shift_alot):
-	b	L(mid)
-	 and	%o0, MASK, %g3		C
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/gmp-mparam.h b/gmp/mpn/sparc64/gmp-mparam.h
index 5ac2c461c5..abf523951d 100644
--- a/gmp/mpn/sparc64/gmp-mparam.h
+++ b/gmp/mpn/sparc64/gmp-mparam.h
@@ -1,139 +1,80 @@
 /* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2006, 2008-2010 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2006, 2008, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 500 MHz ultrasparc2 running GNU/Linux */
-
-#define DIVREM_1_NORM_THRESHOLD              3
-#define DIVREM_1_UNNORM_THRESHOLD            4
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         22
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        27
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
-#define USE_PREINV_DIVREM_1                  1
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                30
-#define MUL_TOOM33_THRESHOLD               187
-#define MUL_TOOM44_THRESHOLD               278
-#define MUL_TOOM6H_THRESHOLD               278
-#define MUL_TOOM8H_THRESHOLD               357
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     201
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     199
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     154
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     107
-
-#define SQR_BASECASE_THRESHOLD              13
-#define SQR_TOOM2_THRESHOLD                 69
-#define SQR_TOOM3_THRESHOLD                116
-#define SQR_TOOM4_THRESHOLD                336
-#define SQR_TOOM6_THRESHOLD                336
-#define SQR_TOOM8_THRESHOLD                454
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               23
-
-#define MUL_FFT_MODF_THRESHOLD             248  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    248, 5}, {      9, 4}, {     19, 6}, {      5, 5}, \
-    {     15, 6}, {      8, 5}, {     17, 6}, {     21, 7}, \
-    {     19, 8}, {     11, 7}, {     25, 8}, {     15, 7}, \
-    {     31, 8}, {     27, 9}, {     15, 8}, {     33, 9}, \
-    {     19, 8}, {     39, 9}, {     27,10}, {     15, 9}, \
-    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
-    {     31, 9}, {     67,10}, {     39, 9}, {     79,10}, \
-    {     47,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255,10}, {     71, 9}, {    143, 8}, {    287,10}, \
-    {     79,11}, {     47,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 50
-#define MUL_FFT_THRESHOLD                 1984
-
-#define SQR_FFT_MODF_THRESHOLD             236  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    236, 5}, {      8, 4}, {     17, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     19, 7}, {     10, 6}, \
-    {     21, 7}, {     21, 8}, {     21, 9}, {     11, 8}, \
-    {     23, 9}, {     19, 8}, {     43, 9}, {     23,10}, \
-    {     15, 9}, {     43,10}, {     23,11}, {     15,10}, \
-    {     31, 9}, {     63,10}, {     47, 8}, {    191,11}, \
-    {     31,10}, {     63, 8}, {    255, 7}, {    511, 9}, \
-    {    135, 8}, {    271,10}, {     71, 9}, {    143, 8}, \
-    {    287, 7}, {    575,11}, {     47, 9}, {    191, 8}, \
-    {    383,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 49
-#define SQR_FFT_THRESHOLD                 1120
-
-#define MULLO_BASECASE_THRESHOLD            16
-#define MULLO_DC_THRESHOLD                  41
-#define MULLO_MUL_N_THRESHOLD             3791
-
-#define DC_DIV_QR_THRESHOLD                 27
-#define DC_DIVAPPR_Q_THRESHOLD             100
-#define DC_BDIV_QR_THRESHOLD                47
-#define DC_BDIV_Q_THRESHOLD                174
-
-#define INV_MULMOD_BNM1_THRESHOLD           58
-#define INV_NEWTON_THRESHOLD                13
-#define INV_APPR_THRESHOLD                   9
-
-#define BINV_NEWTON_THRESHOLD              187
-#define REDC_1_TO_REDC_2_THRESHOLD          10
-#define REDC_2_TO_REDC_N_THRESHOLD         115
-
-#define MU_DIV_QR_THRESHOLD                680
-#define MU_DIVAPPR_Q_THRESHOLD             618
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD               748
-#define MU_BDIV_Q_THRESHOLD                889
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                      53
-#define GCD_DC_THRESHOLD                   283
-#define GCDEXT_DC_THRESHOLD                186
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        16
-#define SET_STR_DC_THRESHOLD               390
-#define SET_STR_PRECOMPUTE_THRESHOLD      1665
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+
+/* Tell the toom3 multiply implementation to call low-level mpn
+   functions instead of open-coding operations in C.  */
+#ifndef USE_MORE_MPN
+#define USE_MORE_MPN 1
+#endif
+
+/* Generated by tuneup.c, 2009-01-15, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          33
+#define MUL_TOOM3_THRESHOLD             189
+#define MUL_TOOM44_THRESHOLD            256
+
+#define SQR_BASECASE_THRESHOLD            9
+#define SQR_KARATSUBA_THRESHOLD          70
+#define SQR_TOOM3_THRESHOLD             226
+#define SQR_TOOM4_THRESHOLD             345
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              46
+#define MULLOW_MUL_N_THRESHOLD          143
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 34
+#define POWM_THRESHOLD                  116
+
+#define MATRIX22_STRASSEN_THRESHOLD      18
+#define HGCD_THRESHOLD                   51
+#define GCD_DC_THRESHOLD                293
+#define GCDEXT_DC_THRESHOLD             198
+#define JACOBI_BASE_METHOD                3
+
+#define DIVREM_1_NORM_THRESHOLD           3
+#define DIVREM_1_UNNORM_THRESHOLD         3
+#define MOD_1_NORM_THRESHOLD              3
+#define MOD_1_UNNORM_THRESHOLD            3
+#define MOD_1_1_THRESHOLD                12
+#define MOD_1_2_THRESHOLD                13
+#define MOD_1_4_THRESHOLD                16
+#define USE_PREINV_DIVREM_1               1
+#define USE_PREINV_MOD_1                  1
+#define DIVREM_2_THRESHOLD                6
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             13
+#define GET_STR_PRECOMPUTE_THRESHOLD     21
+#define SET_STR_DC_THRESHOLD            638
+#define SET_STR_PRECOMPUTE_THRESHOLD   1889
+
+#define MUL_FFT_TABLE  { 304, 608, 1600, 2816, 7168, 20480, 81920, 196608, 786432, 0 }
+#define MUL_FFT_MODF_THRESHOLD          216
+#define MUL_FFT_THRESHOLD              1664
+
+#define SQR_FFT_TABLE  { 336, 736, 1600, 2816, 7168, 20480, 49152, 196608, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD          216
+#define SQR_FFT_THRESHOLD              1312
+
diff --git a/gmp/mpn/sparc64/lshift.asm b/gmp/mpn/sparc64/lshift.asm
index 90bbb454f0..b3bbd9dd99 100644
--- a/gmp/mpn/sparc64/lshift.asm
+++ b/gmp/mpn/sparc64/lshift.asm
@@ -1,140 +1,152 @@
 dnl  SPARC v9 mpn_lshift
 
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
+dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
 
 include(`../config.m4')
 
-C		    cycles/limb
-C UltraSPARC 1&2:	 2
-C UltraSPARC 3:		 2.5
-C UltraSPARC T1:	17.5
-C UltraSPARC T3:	 8
-C UltraSPARC T4:	 3
+C		   cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:	      3.25
 
 C INPUT PARAMETERS
-define(`rp',     `%i0')
-define(`up',     `%i1')
-define(`n',      `%i2')
-define(`cnt',    `%i3')
-
-define(`tcnt',   `%i4')
-define(`retval', `%i5')
-define(`u0',     `%l0')
-define(`u1',     `%l1')
-define(`r0',     `%l6')
-define(`r1',     `%l7')
-define(`u0_off', `%o0')
-define(`u1_off', `%o1')
-define(`r0_off', `%o2')
-define(`r1_off', `%o3')
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`n',`%i2')
+define(`cnt',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')		dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')	dnl  A quasi nop running in the FM pipe
 
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
 PROLOGUE(mpn_lshift)
-	save	%sp, -176, %sp
-
-	sllx	n, 3, n
-	sub	%g0, cnt, tcnt
-
-	sub	up, 8, u1_off
-	add	rp, (5 * 8), r1_off
-
-	ldx	[n + u1_off], u1	C WAS: up - 8
-	add	u1_off, (3 * 8), u1_off
-
-	sub	r1_off, 8, r0_off
-	sub	u1_off, 8, u0_off
-
-	subcc	n, (3 * 8), n
-	srlx	u1, tcnt, retval
-
-	bl,pn	%xcc, L(end12)
-	 sllx	u1, cnt, %l3
-
-	ldx	[n + u0_off], u0	C WAS: up - 16
-	subcc	n, (2 * 8), n
-
-	ldx	[n + u1_off], u1	C WAS: up - 24
-
-	bl,pn	%xcc, L(end34)
-	 srlx	u0, tcnt, %l4
-
-	b,a	L(top)
-	ALIGN(16)
-L(top):
-	sllx	u0, cnt, %l2
-	or	%l4, %l3, r0
-
-	ldx	[n + u0_off], u0	C WAS: up - 16
-	srlx	u1, tcnt, %l5
-
-	stx	r0, [n + r0_off]	C WAS: rp - 8
-	subcc	n, (2 * 8), n
-
-	sllx	u1, cnt, %l3
-	or	%l2, %l5, r1
-
-	ldx	[n + u1_off], u1	C WAS: up - 24
-	srlx	u0, tcnt, %l4
-
-	bge,pt	%xcc, L(top)
-	 stx	r1, [n + r1_off]	C WAS: rp - 16
-
-L(end34):
-	sllx	u0, cnt, %l2
-	or	%l4, %l3, r0
-
-	srlx	u1, tcnt, %l5
-	stx	r0, [n + r0_off]	C WAS: rp - 8
-
-	or	%l2, %l5, r1
-	sub	n, (2 * 8), %o5
-
-	sllx	u1, cnt, %l3
-	stx	r1, [%o5 + r1_off]	C WAS: rp - 16
-
-L(end12):
-	andcc	n, 8, %g0
-	bz,pn	%xcc, L(done)
-	 nop
-
-	ldx	[n + u0_off], u1
-	srlx	u1, tcnt, %l4
-	or	%l4, %l3, r0
-	stx	r0, [r0_off - 24]
-	sllx	u1, cnt, %l3
-L(done):
-	stx	%l3, [r0_off - 32]
-
+	save	%sp,-160,%sp
+
+	sllx	n,3,%g1
+	sub	%g0,cnt,tnc		C negate shift count
+	add	up,%g1,up		C make %o1 point at end of src
+	add	rp,%g1,rp		C make %o0 point at end of res
+	ldx	[up-8],u3		C load first limb
+	subcc	n,5,n
+	srlx	u3,tnc,%i5		C compute function result
+	sllx	u3,cnt,%g3
+	bl,pn	%icc,.Lend1234
+	fanop
+
+	subcc	n,4,n
+	ldx	[up-16],u0
+	ldx	[up-24],u1
+	add	up,-32,up
+	ldx	[up-0],u2
+	ldx	[up-8],u3
+	srlx	u0,tnc,%g2
+
+	bl,pn	%icc,.Lend5678
+	fanop
+
+	b,a	.Loop
+	.align	16
+.Loop:
+	sllx	u0,cnt,%g1
+	or	%g3,%g2,%g3
+	ldx	[up-16],u0
+	fanop
+C --
+	srlx	u1,tnc,%g2
+	subcc	n,4,n
+	stx	%g3,[rp-8]
+	fanop
+C --
+	sllx	u1,cnt,%g3
+	or	%g1,%g2,%g1
+	ldx	[up-24],u1
+	fanop
+C --
+	srlx	u2,tnc,%g2
+	stx	%g1,[rp-16]
+	add	up,-32,up
+	fanop
+C --
+	sllx	u2,cnt,%g1
+	or	%g3,%g2,%g3
+	ldx	[up-0],u2
+	fanop
+C --
+	srlx	u3,tnc,%g2
+	stx	%g3,[rp-24]
+	add	rp,-32,rp
+	fanop
+C --
+	sllx	u3,cnt,%g3
+	or	%g1,%g2,%g1
+	ldx	[up-8],u3
+	fanop
+C --
+	srlx	u0,tnc,%g2
+	stx	%g1,[rp-0]
+	bge,pt	%icc,.Loop
+	fanop
+C --
+.Lend5678:
+	sllx	u0,cnt,%g1
+	or	%g3,%g2,%g3
+	srlx	u1,tnc,%g2
+	stx	%g3,[rp-8]
+	sllx	u1,cnt,%g3
+	or	%g1,%g2,%g1
+	srlx	u2,tnc,%g2
+	stx	%g1,[rp-16]
+	sllx	u2,cnt,%g1
+	or	%g3,%g2,%g3
+	srlx	u3,tnc,%g2
+	stx	%g3,[rp-24]
+	add	rp,-32,rp
+	sllx	u3,cnt,%g3		C carry...
+	or	%g1,%g2,%g1
+	stx	%g1,[rp-0]
+
+.Lend1234:
+	addcc	n,4,n
+	bz,pn	%icc,.Lret
+	fanop
+.Loop0:
+	add	rp,-8,rp
+	subcc	n,1,n
+	ldx	[up-16],u3
+	add	up,-8,up
+	srlx	u3,tnc,%g2
+	or	%g3,%g2,%g3
+	stx	%g3,[rp]
+	sllx	u3,cnt,%g3
+	bnz,pt	%icc,.Loop0
+	fanop
+.Lret:
+	stx	%g3,[rp-8]
+	mov	%i5,%i0
 	ret
-	restore retval, 0, %o0
-EPILOGUE()
+	restore
+EPILOGUE(mpn_lshift)
diff --git a/gmp/mpn/sparc64/lshiftc.asm b/gmp/mpn/sparc64/lshiftc.asm
deleted file mode 100644
index 4a0f0a3e40..0000000000
--- a/gmp/mpn/sparc64/lshiftc.asm
+++ /dev/null
@@ -1,147 +0,0 @@
-dnl  SPARC v9 mpn_lshiftc
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C UltraSPARC 1&2:	 3
-C UltraSPARC 3:		 3
-C UltraSPARC T1:	17
-C UltraSPARC T3:	10
-C UltraSPARC T4:	 3.5
-
-C INPUT PARAMETERS
-define(`rp',     `%i0')
-define(`up',     `%i1')
-define(`n',      `%i2')
-define(`cnt',    `%i3')
-
-define(`tcnt',   `%i4')
-define(`retval', `%i5')
-define(`u0',     `%l0')
-define(`u1',     `%l1')
-define(`r0',     `%l6')
-define(`r1',     `%l7')
-define(`u0_off', `%o0')
-define(`u1_off', `%o1')
-define(`r0_off', `%o2')
-define(`r1_off', `%o3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_lshiftc)
-	save	%sp, -176, %sp
-
-	sllx	n, 3, n
-	sub	%g0, cnt, tcnt
-
-	sub	up, 8, u1_off
-	add	rp, (5 * 8), r1_off
-
-	ldx	[n + u1_off], u1	C WAS: up - 8
-	add	u1_off, (3 * 8), u1_off
-
-	sub	r1_off, 8, r0_off
-	sub	u1_off, 8, u0_off
-
-	subcc	n, (3 * 8), n
-	srlx	u1, tcnt, retval
-
-	bl,pn	%xcc, L(end12)
-	 sllx	u1, cnt, %l3
-
-	ldx	[n + u0_off], u0	C WAS: up - 16
-	subcc	n, (2 * 8), n
-
-	ldx	[n + u1_off], u1	C WAS: up - 24
-
-	bl,pn	%xcc, L(end34)
-	 srlx	u0, tcnt, %l4
-
-	b,a	L(top)
-	ALIGN(16)
-L(top):
-	not	%l3, %l3
-	sllx	u0, cnt, %l2
-
-	andn	%l3, %l4, r0
-	ldx	[n + u0_off], u0	C WAS: up - 16
-
-	srlx	u1, tcnt, %l5
-	stx	r0, [n + r0_off]	C WAS: rp - 8
-
-	subcc	n, (2 * 8), n
-	not	%l2, %l2
-
-	sllx	u1, cnt, %l3
-	andn	%l2, %l5, r1
-
-	ldx	[n + u1_off], u1	C WAS: up - 24
-	srlx	u0, tcnt, %l4
-
-	bge,pt	%xcc, L(top)
-	 stx	r1, [n + r1_off]	C WAS: rp - 16
-
-L(end34):
-	not	%l3, %l3
-	sllx	u0, cnt, %l2
-
-	andn	%l3, %l4, r0
-	srlx	u1, tcnt, %l5
-
-	stx	r0, [n + r0_off]	C WAS: rp - 8
-	not	%l2, %l2
-
-	andn	%l2, %l5, r1
-	sub	n, (2 * 8), %o5
-
-	sllx	u1, cnt, %l3
-	stx	r1, [%o5 + r1_off]	C WAS: rp - 16
-
-L(end12):
-	andcc	n, 8, %g0
-	bz	%xcc, L(done)+4
-	 not	%l3, %l3
-
-	ldx	[n + u0_off], u1
-	srlx	u1, tcnt, %l4
-	andn	%l3, %l4, r0
-	stx	r0, [r0_off - 24]
-	sllx	u1, cnt, %l3
-L(done):
-	not	%l3, %l3
-	stx	%l3, [r0_off - 32]
-
-	ret
-	restore retval, 0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/mod_1.c b/gmp/mpn/sparc64/mod_1.c
index f1c51970d9..757ae01b95 100644
--- a/gmp/mpn/sparc64/mod_1.c
+++ b/gmp/mpn/sparc64/mod_1.c
@@ -1,33 +1,22 @@
 /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
 
-Copyright 1991, 1993, 1994, 1999-2001, 2003, 2010 Free Software Foundation,
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation,
 Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -57,8 +46,8 @@ see https://www.gnu.org/licenses/.  */
    sizes, but at size==2 it was only about the same speed and at size==3 was
    slower.  */
 
-static mp_limb_t
-mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+mp_limb_t
+mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
 {
   int        norm, norm_rshift;
   mp_limb_t  src_high_limb;
@@ -186,54 +175,3 @@ mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb
       return r >> norm;
     }
 }
-
-mp_limb_t
-mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
-  ASSERT (n >= 0);
-  ASSERT (b != 0);
-
-  /* Should this be handled at all?  Rely on callers?  Note un==0 is currently
-     required by mpz/fdiv_r_ui.c and possibly other places.  */
-  if (n == 0)
-    return 0;
-
-  if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
-    {
-      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
-	{
-	  return mpn_mod_1_anynorm (ap, n, b);
-	}
-      else
-	{
-	  mp_limb_t pre[4];
-	  mpn_mod_1_1p_cps (pre, b);
-	  return mpn_mod_1_1p (ap, n, b, pre);
-	}
-    }
-  else
-    {
-      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
-	{
-	  return mpn_mod_1_anynorm (ap, n, b);
-	}
-      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
-	{
-	  mp_limb_t pre[4];
-	  mpn_mod_1_1p_cps (pre, b);
-	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
-	}
-      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
-	{
-	  mp_limb_t pre[5];
-	  mpn_mod_1s_2p_cps (pre, b);
-	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
-	}
-      else
-	{
-	  mp_limb_t pre[7];
-	  mpn_mod_1s_4p_cps (pre, b);
-	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
-	}
-    }
-}
diff --git a/gmp/mpn/sparc64/mod_1_4.c b/gmp/mpn/sparc64/mod_1_4.c
deleted file mode 100644
index cc1b9484bc..0000000000
--- a/gmp/mpn/sparc64/mod_1_4.c
+++ /dev/null
@@ -1,236 +0,0 @@
-/* mpn_mod_1s_4p (ap, n, b, cps)
-   Divide (ap,,n) by b.  Return the single-limb remainder.
-   Requires that d < B / 4.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2008-2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#include "mpn/sparc64/sparc64.h"
-
-void
-mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
-{
-  mp_limb_t bi;
-  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
-  int cnt;
-
-  ASSERT (b <= (~(mp_limb_t) 0) / 4);
-
-  count_leading_zeros (cnt, b);
-
-  b <<= cnt;
-  invert_limb (bi, b);
-
-  cps[0] = bi;
-  cps[1] = cnt;
-
-  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-  ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
-
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
-  cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
-  cps[4] = B3modb >> cnt;
-
-  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
-  cps[5] = B4modb >> cnt;
-
-  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
-  cps[6] = B5modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 6; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
-}
-
-mp_limb_t
-mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
-{
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
-  mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
-  mp_size_t i;
-  int cnt;
-
-  ASSERT (n >= 1);
-
-  B1modb = cps[2];
-  B2modb = cps[3];
-  B3modb = cps[4];
-  B4modb = cps[5];
-  B5modb = cps[6];
-
-  if ((b >> 32) == 0)
-    {
-      switch (n & 3)
-	{
-	case 0:
-	  umul_ppmm_s (ph, pl, ap[n - 3], B1modb);
-	  add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
-	  umul_ppmm_s (ch, cl, ap[n - 2], B2modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-	  umul_ppmm_s (rh, rl, ap[n - 1], B3modb);
-	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
-	  n -= 4;
-	  break;
-	case 1:
-	  rh = 0;
-	  rl = ap[n - 1];
-	  n -= 1;
-	  break;
-	case 2:
-	  rh = ap[n - 1];
-	  rl = ap[n - 2];
-	  n -= 2;
-	  break;
-	case 3:
-	  umul_ppmm_s (ph, pl, ap[n - 2], B1modb);
-	  add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
-	  umul_ppmm_s (rh, rl, ap[n - 1], B2modb);
-	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
-	  n -= 3;
-	  break;
-	}
-
-      for (i = n - 4; i >= 0; i -= 4)
-	{
-	  /* rr = ap[i]				< B
-		+ ap[i+1] * (B mod b)		<= (B-1)(b-1)
-		+ ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
-		+ ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
-		+ LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
-		+ HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
-	  */
-	  umul_ppmm_s (ph, pl, ap[i + 1], B1modb);
-	  add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
-
-	  umul_ppmm_s (ch, cl, ap[i + 2], B2modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-
-	  umul_ppmm_s (ch, cl, ap[i + 3], B3modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-
-	  umul_ppmm_s (ch, cl, rl, B4modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-
-	  umul_ppmm_s (rh, rl, rh, B5modb);
-	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
-	}
-
-      umul_ppmm_s (rh, cl, rh, B1modb);
-      add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
-    }
-  else
-    {
-      switch (n & 3)
-	{
-	case 0:
-	  umul_ppmm (ph, pl, ap[n - 3], B1modb);
-	  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
-	  umul_ppmm (ch, cl, ap[n - 2], B2modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-	  umul_ppmm (rh, rl, ap[n - 1], B3modb);
-	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
-	  n -= 4;
-	  break;
-	case 1:
-	  rh = 0;
-	  rl = ap[n - 1];
-	  n -= 1;
-	  break;
-	case 2:
-	  rh = ap[n - 1];
-	  rl = ap[n - 2];
-	  n -= 2;
-	  break;
-	case 3:
-	  umul_ppmm (ph, pl, ap[n - 2], B1modb);
-	  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
-	  umul_ppmm (rh, rl, ap[n - 1], B2modb);
-	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
-	  n -= 3;
-	  break;
-	}
-
-      for (i = n - 4; i >= 0; i -= 4)
-	{
-	  /* rr = ap[i]				< B
-		+ ap[i+1] * (B mod b)		<= (B-1)(b-1)
-		+ ap[i+2] * (B^2 mod b)		<= (B-1)(b-1)
-		+ ap[i+3] * (B^3 mod b)		<= (B-1)(b-1)
-		+ LO(rr)  * (B^4 mod b)		<= (B-1)(b-1)
-		+ HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
-	  */
-	  umul_ppmm (ph, pl, ap[i + 1], B1modb);
-	  add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
-
-	  umul_ppmm (ch, cl, ap[i + 2], B2modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-
-	  umul_ppmm (ch, cl, ap[i + 3], B3modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-
-	  umul_ppmm (ch, cl, rl, B4modb);
-	  add_ssaaaa (ph, pl, ph, pl, ch, cl);
-
-	  umul_ppmm (rh, rl, rh, B5modb);
-	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
-	}
-
-      umul_ppmm (rh, cl, rh, B1modb);
-      add_ssaaaa (rh, rl, rh, rl, 0, cl);
-    }
-
-  bi = cps[0];
-  cnt = cps[1];
-
-  r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
-
-  return r >> cnt;
-}
diff --git a/gmp/mpn/sparc64/mode1o.c b/gmp/mpn/sparc64/mode1o.c
index 7c8fc1cf3d..5ec97c5cd4 100644
--- a/gmp/mpn/sparc64/mode1o.c
+++ b/gmp/mpn/sparc64/mode1o.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2003 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/sparc64/ultrasparc1234/mul_1.asm b/gmp/mpn/sparc64/mul_1.asm
index 871d562fcb..e57e822bae 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/mul_1.asm
+++ b/gmp/mpn/sparc64/mul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
 dnl  the result in a second limb vector.
 
-dnl  Copyright 1998, 2000-2003 Free Software Foundation, Inc.
+dnl  Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -147,7 +136,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	fmuld	u32, v00, r32
 	fmuld	u00, v48, p48
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .L_two_or_more
+	bnz,pt	%icc, .L_two_or_more
 	fmuld	u32, v16, r48
 
 .L_one:
@@ -222,7 +211,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	faddd	p16, r80, a16
 	fmuld	u00, v48, p48
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .L_three_or_more
+	bnz,pt	%icc, .L_three_or_more
 	fmuld	u32, v16, r48
 
 .L_two:
@@ -299,7 +288,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	faddd	p16, r80, a16
 	fmuld	u00, v48, p48
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .L_four_or_more
+	bnz,pt	%icc, .L_four_or_more
 	fmuld	u32, v16, r48
 
 .L_three:
@@ -381,7 +370,7 @@ C The software pipeline is very deep, requiring 4 feed-in stages.
 	fmuld	u00, v48, p48
 	add	cy, %g5, %o4		C x = prev(i00) + cy
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .Loop
+	bnz,pt	%icc, .Loop
 	fmuld	u32, v16, r48
 
 .L_four:
@@ -455,7 +444,7 @@ C 12
 C 13
 	add	cy, %g5, %o4		C x = prev(i00) + cy
 	addcc	%i2, 8, %i2
-	bnz,pt	%xcc, .Loop
+	bnz,pt	%icc, .Loop
 	fmuld	u32, v16, r48
 C END MAIN LOOP
 
diff --git a/gmp/mpn/sparc64/rshift.asm b/gmp/mpn/sparc64/rshift.asm
index 3f8e11fee7..691fe012d3 100644
--- a/gmp/mpn/sparc64/rshift.asm
+++ b/gmp/mpn/sparc64/rshift.asm
@@ -1,142 +1,149 @@
 dnl  SPARC v9 mpn_rshift
 
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
+dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
 
 include(`../config.m4')
 
-C		    cycles/limb
-C UltraSPARC 1&2:	 2
-C UltraSPARC 3:		 2.5
-C UltraSPARC T1:	17.5
-C UltraSPARC T3:	 8
-C UltraSPARC T4:	 3
+C		   cycles/limb
+C UltraSPARC 1&2:     2
+C UltraSPARC 3:	      3.25
 
 C INPUT PARAMETERS
-define(`rp',     `%i0')
-define(`up',     `%i1')
-define(`n',      `%i2')
-define(`cnt',    `%i3')
-
-define(`tcnt',   `%i4')
-define(`retval', `%i5')
-define(`u0',     `%l0')
-define(`u1',     `%l1')
-define(`r0',     `%l6')
-define(`r1',     `%l7')
-define(`u0_off', `%o0')
-define(`u1_off', `%o1')
-define(`r0_off', `%o2')
-define(`r1_off', `%o3')
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`n',`%i2')
+define(`cnt',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2')		dnl  A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4')	dnl  A quasi nop running in the FM pipe
 
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
 PROLOGUE(mpn_rshift)
-	save	%sp, -176, %sp
-
-	sllx	n, 3, n
-	sub	%g0, cnt, tcnt
-
-	add	up, n, up
-	add	rp, n, rp
-
-	neg	n, n
-	sub	up, (2 * 8), u0_off
-	sub	rp, (5 * 8), r0_off
-
-	ldx	[n + up], u1		C WAS: up + 0
-	sub	u0_off, (1 * 8), u1_off
-	sub	r0_off, (1 * 8), r1_off
-
-	subcc	n, -(3 * 8), n
-	sllx	u1, tcnt, retval
-
-	bg,pn	%xcc, L(end12)
-	 srlx	u1, cnt, %l3
-
-	ldx	[n + u0_off], u0	C WAS: up + 0
-	subcc	n, -(2 * 8), n
-
-	ldx	[n + u1_off], u1	C WAS: up + 8
-
-	bg,pn	%xcc, L(end34)
-	 sllx	u0, tcnt, %l4
-
-	b,a	L(top)
-	ALIGN(16)
-L(top):
-	srlx	u0, cnt, %l2
-	or	%l3, %l4, r0
-
-	ldx	[n + u0_off], u0	C WAS: up + 0
-	sllx	u1, tcnt, %l5
-
-	stx	r0, [n + r0_off]	C WAS: rp + 0
-	subcc	n, -(2 * 8), n
-
-	srlx	u1, cnt, %l3
-	or	%l2, %l5, r1
-
-	ldx	[n + u1_off], u1	C WAS: up + 8
-	sllx	u0, tcnt, %l4
-
-	ble,pt	%xcc, L(top)
-	 stx	r1, [n + r1_off]	C WAS: rp + 8
-
-L(end34):
-	srlx	u0, cnt, %l2
-	or	%l3, %l4, r0
-
-	sllx	u1, tcnt, %l5
-	stx	r0, [n + r0_off]	C WAS: rp + 0
-
-	or	%l2, %l5, r1
-	sub	n, -(2 * 8), %o5
-
-	srlx	u1, cnt, %l3
-	stx	r1, [%o5 + r1_off]	C WAS: rp + 8
-
-L(end12):
-	andcc	n, 8, %g0
-	bz,pn	%xcc, L(done)
-	 nop
-
-	ldx	[n + u0_off], u1
-	sllx	u1, tcnt, %l4
-	or	%l3, %l4, r0
-	stx	r0, [r0_off + 24]
-	srlx	u1, cnt, %l3
-L(done):
-	stx	%l3, [r0_off + 32]
-
+	save	%sp,-160,%sp
+
+	sub	%g0,cnt,tnc		C negate shift count
+	ldx	[up],u3			C load first limb
+	subcc	n,5,n
+	sllx	u3,tnc,%i5		C compute function result
+	srlx	u3,cnt,%g3
+	bl,pn	%icc,.Lend1234
+	fanop
+
+	subcc	n,4,n
+	ldx	[up+8],u0
+	ldx	[up+16],u1
+	add	up,32,up
+	ldx	[up-8],u2
+	ldx	[up+0],u3
+	sllx	u0,tnc,%g2
+
+	bl,pn	%icc,.Lend5678
+	fanop
+
+	b,a	.Loop
+	.align	16
+.Loop:
+	srlx	u0,cnt,%g1
+	or	%g3,%g2,%g3
+	ldx	[up+8],u0
+	fanop
+C --
+	sllx	u1,tnc,%g2
+	subcc	n,4,n
+	stx	%g3,[rp+0]
+	fanop
+C --
+	srlx	u1,cnt,%g3
+	or	%g1,%g2,%g1
+	ldx	[up+16],u1
+	fanop
+C --
+	sllx	u2,tnc,%g2
+	stx	%g1,[rp+8]
+	add	up,32,up
+	fanop
+C --
+	srlx	u2,cnt,%g1
+	or	%g3,%g2,%g3
+	ldx	[up-8],u2
+	fanop
+C --
+	sllx	u3,tnc,%g2
+	stx	%g3,[rp+16]
+	add	rp,32,rp
+	fanop
+C --
+	srlx	u3,cnt,%g3
+	or	%g1,%g2,%g1
+	ldx	[up+0],u3
+	fanop
+C --
+	sllx	u0,tnc,%g2
+	stx	%g1,[rp-8]
+	bge,pt	%icc,.Loop
+	fanop
+C --
+.Lend5678:
+	srlx	u0,cnt,%g1
+	or	%g3,%g2,%g3
+	sllx	u1,tnc,%g2
+	stx	%g3,[rp+0]
+	srlx	u1,cnt,%g3
+	or	%g1,%g2,%g1
+	sllx	u2,tnc,%g2
+	stx	%g1,[rp+8]
+	srlx	u2,cnt,%g1
+	or	%g3,%g2,%g3
+	sllx	u3,tnc,%g2
+	stx	%g3,[rp+16]
+	add	rp,32,rp
+	srlx	u3,cnt,%g3		C carry...
+	or	%g1,%g2,%g1
+	stx	%g1,[rp-8]
+
+.Lend1234:
+	addcc	n,4,n
+	bz,pn	%icc,.Lret
+	fanop
+.Loop0:
+	add	rp,8,rp
+	subcc	n,1,n
+	ldx	[up+8],u3
+	add	up,8,up
+	sllx	u3,tnc,%g2
+	or	%g3,%g2,%g3
+	stx	%g3,[rp-8]
+	srlx	u3,cnt,%g3
+	bnz,pt	%icc,.Loop0
+	fanop
+.Lret:
+	stx	%g3,[rp+0]
+	mov	%i5,%i0
 	ret
-	restore retval, 0, %o0
-EPILOGUE()
+	restore
+EPILOGUE(mpn_rshift)
diff --git a/gmp/mpn/sparc64/sec_tabselect.asm b/gmp/mpn/sparc64/sec_tabselect.asm
deleted file mode 100644
index 22e0dc5ef1..0000000000
--- a/gmp/mpn/sparc64/sec_tabselect.asm
+++ /dev/null
@@ -1,162 +0,0 @@
-dnl  SPARC v9 mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund and David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC 1&2:	 2 hopefully
-C UltraSPARC 3:		 3
-C UltraSPARC T1:	17
-C UltraSPARC T3:	 ?
-C UltraSPARC T4/T5:	 2.25 hopefully
-
-C INPUT PARAMETERS
-define(`rp',     `%i0')
-define(`tp',     `%i1')
-define(`n',      `%i2')
-define(`nents',  `%i3')
-define(`which',  `%i4')
-
-define(`i',      `%g1')
-define(`j',      `%g3')
-define(`stride', `%g4')
-define(`tporig', `%g5')
-define(`mask',   `%o0')
-
-define(`data0',  `%l0')
-define(`data1',  `%l1')
-define(`data2',  `%l2')
-define(`data3',  `%l3')
-define(`t0',     `%l4')
-define(`t1',     `%l5')
-define(`t2',     `%l6')
-define(`t3',     `%l7')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sec_tabselect)
-	save	%sp, -176, %sp
-
-	sllx	n, 3, stride
-	sub	n, 4, j
-	brlz	j, L(outer_end)
-	 mov	tp, tporig
-
-L(outer_loop):
-	clr	data0
-	clr	data1
-	clr	data2
-	clr	data3
-	mov	tporig, tp
-	mov	nents, i
-	mov	which, %o1
-
-L(top):	subcc	%o1, 1, %o1		C set carry iff o1 = 0
-	ldx	[tp + 0], t0
-	subc	%g0, %g0, mask
-	ldx	[tp + 8], t1
-	sub	i, 1, i
-	ldx	[tp + 16], t2
-	ldx	[tp + 24], t3
-	add	tp, stride, tp
-	and	t0, mask, t0
-	and	t1, mask, t1
-	or	t0, data0, data0
-	and	t2, mask, t2
-	or	t1, data1, data1
-	and	t3, mask, t3
-	or	t2, data2, data2
-	brnz	i, L(top)
-	 or	t3, data3, data3
-
-	stx	data0, [rp + 0]
-	subcc	j, 4, j
-	stx	data1, [rp + 8]
-	stx	data2, [rp + 16]
-	stx	data3, [rp + 24]
-	add	tporig, (4 * 8), tporig
-
-	brgez	j, L(outer_loop)
-	 add	rp, (4 * 8), rp
-L(outer_end):
-
-
-	andcc	n, 2, %g0
-	be	L(b0x)
-	 nop
-L(b1x):	clr	data0
-	clr	data1
-	mov	tporig, tp
-	mov	nents, i
-	mov	which, %o1
-
-L(tp2):	subcc	%o1, 1, %o1
-	ldx	[tp + 0], t0
-	subc	%g0, %g0, mask
-	ldx	[tp + 8], t1
-	sub	i, 1, i
-	add	tp, stride, tp
-	and	t0, mask, t0
-	and	t1, mask, t1
-	or	t0, data0, data0
-	brnz	i, L(tp2)
-	 or	t1, data1, data1
-
-	stx	data0, [rp + 0]
-	stx	data1, [rp + 8]
-	add	tporig, (2 * 8), tporig
-	add	rp, (2 * 8), rp
-
-
-L(b0x):	andcc	n, 1, %g0
-	be	L(b00)
-	 nop
-L(b01):	clr	data0
-	mov	tporig, tp
-	mov	nents, i
-	mov	which, %o1
-
-L(tp1):	subcc	%o1, 1, %o1
-	ldx	[tp + 0], t0
-	subc	%g0, %g0, mask
-	sub	i, 1, i
-	add	tp, stride, tp
-	and	t0, mask, t0
-	brnz	i, L(tp1)
-	 or	t0, data0, data0
-
-	stx	data0, [rp + 0]
-
-L(b00):	 ret
-	  restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/sparc64.h b/gmp/mpn/sparc64/sparc64.h
index 09fc16d46a..945e422f5a 100644
--- a/gmp/mpn/sparc64/sparc64.h
+++ b/gmp/mpn/sparc64/sparc64.h
@@ -9,28 +9,17 @@ Copyright 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #define LOW32(x)   ((x) & 0xFFFFFFFF)
@@ -140,24 +129,6 @@ Error, error, unknown limb endianness;
 #endif
 
 
-/* Multiply u anv v, where v < 2^32.  */
-#define umul_ppmm_s(w1, w0, u, v)					\
-  do {									\
-    UWtype __x0, __x2;							\
-    UWtype __ul, __vl, __uh;						\
-    UWtype __u = (u), __v = (v);					\
-									\
-    __ul = __ll_lowpart (__u);						\
-    __uh = __ll_highpart (__u);						\
-    __vl = __ll_lowpart (__v);						\
-									\
-    __x0 = (UWtype) __ul * __vl;					\
-    __x2 = (UWtype) __uh * __vl;					\
-									\
-    (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2;		\
-    (w0) = (__x2 << W_TYPE_SIZE/2) + __x0;				\
-  } while (0)
-
 /* Count the leading zeros on a limb, but assuming it fits in 32 bits.
    The count returned will be in the range 32 to 63.
    This is the 32-bit generic C count_leading_zeros from longlong.h. */
diff --git a/gmp/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm b/gmp/mpn/sparc64/sqr_diagonal.asm
index 43c69d31d1..fbbb4ff456 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/sqr_diagonal.asm
+++ b/gmp/mpn/sparc64/sqr_diagonal.asm
@@ -3,30 +3,19 @@ dnl  SPARC v9 64-bit mpn_sqr_diagonal.
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/sparc64/ultrasparc1234/sub_n.asm b/gmp/mpn/sparc64/sub_n.asm
index 9fb7f70747..e6fe9ee62c 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/sub_n.asm
+++ b/gmp/mpn/sparc64/sub_n.asm
@@ -1,33 +1,22 @@
 dnl  SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
 dnl  store difference in a third limb vector.
 
-dnl  Copyright 2001-2003, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -65,24 +54,14 @@ define(`fmnop',`fmuld %f0,%f0,%f4')	dnl  A quasi nop running in the FM pipe
 ASM_START()
 	REGISTER(%g2,#scratch)
 	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sub_nc)
-	save	%sp,-160,%sp
-
-	fitod	%f0,%f0		C make sure f0 contains small, quiet number
-	subcc	n,4,%g0
-	bl,pn	%xcc,.Loop0
-	nop
-	b,a	L(com)
-EPILOGUE()
-
 PROLOGUE(mpn_sub_n)
 	save	%sp,-160,%sp
 
 	fitod	%f0,%f0		C make sure f0 contains small, quiet number
 	subcc	n,4,%g0
-	bl,pn	%xcc,.Loop0
+	bl,pn	%icc,.Loop0
 	mov	0,cy
-L(com):
+
 	ldx	[up+0],u0
 	ldx	[vp+0],v0
 	add	up,32,up
@@ -95,15 +74,15 @@ L(com):
 	ldx	[vp-8],v3
 	subcc	n,8,n
 	sub	u0,v0,%g1	C main sub
-	sub	%g1,cy,%g5	C carry sub
+	sub	%g1,cy,%g4	C carry sub
 	orn	u0,v0,%g2
-	bl,pn	%xcc,.Lend4567
+	bl,pn	%icc,.Lend4567
 	fanop
 	b,a	.Loop
 
 	.align	16
 C START MAIN LOOP
-.Loop:	orn	%g5,%g2,%g2
+.Loop:	orn	%g4,%g2,%g2
 	andn	u0,v0,%g3
 	ldx	[up+0],u0
 	fanop
@@ -115,15 +94,15 @@ C --
 C --
 	srlx	%g2,63,cy
 	sub	u1,v1,%g1
-	stx	%g5,[rp+0]
+	stx	%g4,[rp+0]
 	fanop
 C --
-	sub	%g1,cy,%g5
+	sub	%g1,cy,%g4
 	orn	u1,v1,%g2
 	fmnop
 	fanop
 C --
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u1,v1,%g3
 	ldx	[up-24],u1
 	fanop
@@ -135,15 +114,15 @@ C --
 C --
 	srlx	%g2,63,cy
 	sub	u2,v2,%g1
-	stx	%g5,[rp+8]
+	stx	%g4,[rp+8]
 	fanop
 C --
-	sub	%g1,cy,%g5
+	sub	%g1,cy,%g4
 	orn	u2,v2,%g2
 	fmnop
 	fanop
 C --
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u2,v2,%g3
 	ldx	[up-16],u2
 	fanop
@@ -155,15 +134,15 @@ C --
 C --
 	srlx	%g2,63,cy
 	sub	u3,v3,%g1
-	stx	%g5,[rp-16]
+	stx	%g4,[rp-16]
 	fanop
 C --
-	sub	%g1,cy,%g5
+	sub	%g1,cy,%g4
 	orn	u3,v3,%g2
 	fmnop
 	fanop
 C --
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u3,v3,%g3
 	ldx	[up-8],u3
 	fanop
@@ -175,48 +154,48 @@ C --
 C --
 	srlx	%g2,63,cy
 	sub	u0,v0,%g1
-	stx	%g5,[rp-8]
+	stx	%g4,[rp-8]
 	fanop
 C --
-	sub	%g1,cy,%g5
+	sub	%g1,cy,%g4
 	orn	u0,v0,%g2
-	bge,pt	%xcc,.Loop
+	bge,pt	%icc,.Loop
 	fanop
 C END MAIN LOOP
 .Lend4567:
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u0,v0,%g3
 	andn	%g2,%g3,%g2
 	srlx	%g2,63,cy
 	sub	u1,v1,%g1
-	stx	%g5,[rp+0]
-	sub	%g1,cy,%g5
+	stx	%g4,[rp+0]
+	sub	%g1,cy,%g4
 	orn	u1,v1,%g2
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u1,v1,%g3
 	andn	%g2,%g3,%g2
 	srlx	%g2,63,cy
 	sub	u2,v2,%g1
-	stx	%g5,[rp+8]
-	sub	%g1,cy,%g5
+	stx	%g4,[rp+8]
+	sub	%g1,cy,%g4
 	orn	u2,v2,%g2
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u2,v2,%g3
 	andn	%g2,%g3,%g2
 	add	rp,32,rp
 	srlx	%g2,63,cy
 	sub	u3,v3,%g1
-	stx	%g5,[rp-16]
-	sub	%g1,cy,%g5
+	stx	%g4,[rp-16]
+	sub	%g1,cy,%g4
 	orn	u3,v3,%g2
-	orn	%g5,%g2,%g2
+	orn	%g4,%g2,%g2
 	andn	u3,v3,%g3
 	andn	%g2,%g3,%g2
 	srlx	%g2,63,cy
-	stx	%g5,[rp-8]
+	stx	%g4,[rp-8]
 
 	addcc	n,4,n
-	bz,pn	%xcc,.Lret
+	bz,pn	%icc,.Lret
 	fanop
 
 .Loop0:	ldx	[up],u0
@@ -227,12 +206,12 @@ C END MAIN LOOP
 	subcc	n,1,n
 	sub	u0,v0,%g1
 	orn	u0,v0,%g2
-	sub	%g1,cy,%g5
+	sub	%g1,cy,%g4
 	andn	u0,v0,%g3
-	orn	%g5,%g2,%g2
-	stx	%g5,[rp-8]
+	orn	%g4,%g2,%g2
+	stx	%g4,[rp-8]
 	andn	%g2,%g3,%g2
-	bnz,pt	%xcc,.Loop0
+	bnz,pt	%icc,.Loop0
 	srlx	%g2,63,cy
 
 .Lret:	mov	cy,%i0
diff --git a/gmp/mpn/sparc64/ultrasparc1234/submul_1.asm b/gmp/mpn/sparc64/submul_1.asm
index 0bdb566b9f..ba91200315 100644
--- a/gmp/mpn/sparc64/ultrasparc1234/submul_1.asm
+++ b/gmp/mpn/sparc64/submul_1.asm
@@ -1,33 +1,22 @@
 dnl  SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
 dnl  subtract the result from a second limb vector.
 
-dnl  Copyright 2001-2003 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/sparc64/ultrasparc1234/lshiftc.asm b/gmp/mpn/sparc64/ultrasparc1234/lshiftc.asm
deleted file mode 100644
index 47286d569e..0000000000
--- a/gmp/mpn/sparc64/ultrasparc1234/lshiftc.asm
+++ /dev/null
@@ -1,165 +0,0 @@
-dnl  SPARC v9 mpn_lshiftc
-
-dnl  Copyright 1996, 2000-2003, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC 1&2:     3
-C UltraSPARC 3:	      2.67
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`cnt',`%i3')
-
-define(`u0', `%l0')
-define(`u1', `%l2')
-define(`u2', `%l4')
-define(`u3', `%l6')
-
-define(`tnc',`%i4')
-
-define(`fanop',`fitod %f0,%f2')		dnl  A quasi nop running in the FA pipe
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_lshiftc)
-	save	%sp,-160,%sp
-
-	sllx	n,3,%g1
-	sub	%g0,cnt,tnc		C negate shift count
-	add	up,%g1,up		C make %o1 point at end of src
-	add	rp,%g1,rp		C make %o0 point at end of res
-	ldx	[up-8],u3		C load first limb
-	subcc	n,5,n
-	srlx	u3,tnc,%i5		C compute function result
-	bl,pn	%xcc,.Lend1234
-	sllx	u3,cnt,%g3
-
-	subcc	n,4,n
-	ldx	[up-16],u0
-	ldx	[up-24],u1
-	add	up,-32,up
-	ldx	[up-0],u2
-	ldx	[up-8],u3
-	srlx	u0,tnc,%g2
-	bl,pn	%xcc,.Lend5678
-	not	%g3, %g3
-
-	b,a	.Loop
-	ALIGN(16)
-.Loop:
-	sllx	u0,cnt,%g1
-	andn	%g3,%g2,%g3
-	ldx	[up-16],u0
-	fanop
-C --
-	srlx	u1,tnc,%g2
-	subcc	n,4,n
-	stx	%g3,[rp-8]
-	not	%g1, %g1
-C --
-	sllx	u1,cnt,%g3
-	andn	%g1,%g2,%g1
-	ldx	[up-24],u1
-	fanop
-C --
-	srlx	u2,tnc,%g2
-	stx	%g1,[rp-16]
-	add	up,-32,up
-	not	%g3, %g3
-C --
-	sllx	u2,cnt,%g1
-	andn	%g3,%g2,%g3
-	ldx	[up-0],u2
-	fanop
-C --
-	srlx	u3,tnc,%g2
-	stx	%g3,[rp-24]
-	add	rp,-32,rp
-	not	%g1, %g1
-C --
-	sllx	u3,cnt,%g3
-	andn	%g1,%g2,%g1
-	ldx	[up-8],u3
-	fanop
-C --
-	srlx	u0,tnc,%g2
-	stx	%g1,[rp-0]
-	bge,pt	%xcc,.Loop
-	not	%g3, %g3
-C --
-.Lend5678:
-	sllx	u0,cnt,%g1
-	andn	%g3,%g2,%g3
-	srlx	u1,tnc,%g2
-	stx	%g3,[rp-8]
-	not	%g1, %g1
-	sllx	u1,cnt,%g3
-	andn	%g1,%g2,%g1
-	srlx	u2,tnc,%g2
-	stx	%g1,[rp-16]
-	not	%g3, %g3
-	sllx	u2,cnt,%g1
-	andn	%g3,%g2,%g3
-	srlx	u3,tnc,%g2
-	stx	%g3,[rp-24]
-	add	rp,-32,rp
-	not	%g1, %g1
-	sllx	u3,cnt,%g3		C carry...
-	andn	%g1,%g2,%g1
-	stx	%g1,[rp-0]
-
-.Lend1234:
-	addcc	n,4,n
-	bz,pn	%xcc,.Lret
-	fanop
-.Loop0:
-	add	rp,-8,rp
-	subcc	n,1,n
-	ldx	[up-16],u3
-	add	up,-8,up
-	srlx	u3,tnc,%g2
-	not	%g3, %g3
-	andn	%g3,%g2,%g3
-	stx	%g3,[rp]
-	sllx	u3,cnt,%g3
-	bnz,pt	%xcc,.Loop0
-	fanop
-.Lret:
-	not	%g3, %g3
-	stx	%g3,[rp-8]
-	mov	%i5,%i0
-	ret
-	restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparc34/gmp-mparam.h b/gmp/mpn/sparc64/ultrasparc34/gmp-mparam.h
deleted file mode 100644
index 0c525bbdcf..0000000000
--- a/gmp/mpn/sparc64/ultrasparc34/gmp-mparam.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* ultrasparc3/4 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2006, 2008-2010, 2014 Free
-Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 1593 MHz ultrasparc3 running Solaris 10 (swift.nada.kth.se) */
-/* FFT tuning limit = 60000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 3.4 */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        20
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     25
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD            1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                93
-#define MUL_TOOM44_THRESHOLD               139
-#define MUL_TOOM6H_THRESHOLD               165
-#define MUL_TOOM8H_THRESHOLD               278
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     104
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      85
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      51
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      67
-
-#define SQR_BASECASE_THRESHOLD               7
-#define SQR_TOOM2_THRESHOLD                 71
-#define SQR_TOOM3_THRESHOLD                 98
-#define SQR_TOOM4_THRESHOLD                175
-#define SQR_TOOM6_THRESHOLD                190
-#define SQR_TOOM8_THRESHOLD                339
-
-#define MULMID_TOOM42_THRESHOLD             40
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD                9
-
-#define MUL_FFT_MODF_THRESHOLD             220  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    220, 5}, {     13, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
-    {     23, 8}, {     47, 9}, {     27,10}, {     15, 9}, \
-    {     39,10}, {     23, 9}, {     47,11}, {     15,10}, \
-    {     31, 9}, {     63, 8}, {    127, 7}, {    255, 9}, \
-    {     67,10}, {     39, 9}, {     79, 8}, {    159, 9}, \
-    {     83,10}, {     47, 9}, {     95, 8}, {    191, 7}, \
-    {    383,10}, {     55,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 7}, {    511,10}, {     71, 9}, \
-    {    143, 8}, {    287, 7}, {    575,10}, {     79, 9}, \
-    {    159, 8}, {    319,11}, {     47,10}, {     95, 9}, \
-    {    191, 8}, {    383,10}, {    103, 9}, {    207, 8}, \
-    {    415,10}, {    111,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287, 8}, \
-    {    575,11}, {     79,10}, {    175, 9}, {    351,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207, 9}, \
-    {    415,11}, {    111,10}, {    223,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
-    {    575, 8}, {   1151,11}, {    159,10}, {    319, 9}, \
-    {    639,11}, {    175,10}, {    351, 9}, {    703,12}, \
-    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
-    {    415,11}, {    223,10}, {    447,13}, {     63,12}, \
-    {    127,11}, {    287,10}, {    575,12}, {    159,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    415,12}, \
-    {    223,11}, {    479,13}, {    127,12}, {    287,11}, \
-    {    575,12}, {    351,13}, {    191,12}, {    415,11}, \
-    {    831,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    575,13}, {    319,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
-    {   1215,13}, {    703,14}, {    383,13}, {    831,12}, \
-    {   1663,13}, {    895,15}, {    255,14}, {    511,13}, \
-    {   1151,14}, {    639,13}, {   1407,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1791,15}, {    511,14}, \
-    {   1023,13}, {   2047,14}, {   1151,13}, {   2303,14}, \
-    {   1407,15}, {    767,14}, {   1791,16}, {    511,15}, \
-    {   1023,14}, {   2303,15}, {   1279,14}, {   2815,15}, \
-    {   1535,14}, {   3199,15}, {   1791,14}, {   3583,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 172
-#define MUL_FFT_THRESHOLD                 2240
-
-#define SQR_FFT_MODF_THRESHOLD             244  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    244, 5}, {      8, 4}, {     17, 5}, {     15, 6}, \
-    {      8, 5}, {     17, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     10, 6}, {     21, 7}, {     17, 8}, \
-    {      9, 7}, {     20, 8}, {     11, 7}, {     23, 8}, \
-    {     21, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
-    {     31, 9}, {     19, 8}, {     39, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
-    {     71, 9}, {    143, 8}, {    287, 7}, {    575,10}, \
-    {     79, 9}, {    159, 8}, {    319,11}, {     47, 9}, \
-    {    191, 8}, {    383, 7}, {    767, 9}, {    207,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
-    {    575,11}, {     79,10}, {    159, 9}, {    319, 8}, \
-    {    639,10}, {    175, 9}, {    351, 8}, {    703,10}, \
-    {    191, 9}, {    383, 8}, {    767,10}, {    207, 9}, \
-    {    415, 8}, {    831,10}, {    223, 9}, {    447, 8}, \
-    {    895,12}, {     63,11}, {    127,10}, {    271,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1215,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    175,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383,11}, \
-    {    207,10}, {    415, 9}, {    831, 8}, {   1663,10}, \
-    {    447,13}, {     63,12}, {    127,11}, {    271,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    575, 9}, \
-    {   1151,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    415,10}, \
-    {    831,12}, {    223,11}, {    447,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    415,11}, {    831,12}, {    479,14}, \
-    {    127,13}, {    255,12}, {    607,13}, {    319,12}, \
-    {    703,11}, {   1407,13}, {    383,12}, {    831,13}, \
-    {    447,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1023,13}, {    575,12}, {   1151,13}, {    639,12}, \
-    {   1279,13}, {    703,14}, {    383,13}, {    831,12}, \
-    {   1663,13}, {    895,15}, {    255,14}, {    511,13}, \
-    {   1151,14}, {    639,13}, {   1407,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1791,15}, {    511,14}, \
-    {   1023,13}, {   2047,14}, {   1151,13}, {   2303,14}, \
-    {   1407,15}, {    767,14}, {   1791,16}, {    511,15}, \
-    {   1023,14}, {   2303,15}, {   1279,14}, {   2815,15}, \
-    {   1535,14}, {   3199,15}, {   1791,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 183
-#define SQR_FFT_THRESHOLD                 1728
-
-#define MULLO_BASECASE_THRESHOLD            19
-#define MULLO_DC_THRESHOLD                   0  /* never mpn_mullo_basecase */
-#define MULLO_MUL_N_THRESHOLD             4392
-
-#define DC_DIV_QR_THRESHOLD                 15
-#define DC_DIVAPPR_Q_THRESHOLD              64
-#define DC_BDIV_QR_THRESHOLD                29
-#define DC_BDIV_Q_THRESHOLD                 86
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD                17
-#define INV_APPR_THRESHOLD                  17
-
-#define BINV_NEWTON_THRESHOLD              111
-#define REDC_1_TO_REDC_2_THRESHOLD           0  /* always */
-#define REDC_2_TO_REDC_N_THRESHOLD         115
-
-#define MU_DIV_QR_THRESHOLD                680
-#define MU_DIVAPPR_Q_THRESHOLD             618
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD               680
-#define MU_BDIV_Q_THRESHOLD                807
-
-#define POWM_SEC_TABLE  1,16,102,386,1985,2079
-
-#define MATRIX22_STRASSEN_THRESHOLD         12
-#define HGCD_THRESHOLD                      46
-#define HGCD_APPR_THRESHOLD                 50
-#define HGCD_REDUCE_THRESHOLD             1012
-#define GCD_DC_THRESHOLD                   124
-#define GCDEXT_DC_THRESHOLD                138
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                20
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               324
-#define SET_STR_PRECOMPUTE_THRESHOLD      1043
-
-#define FAC_DSC_THRESHOLD                  422
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/sparc64/ultrasparct1/add_n.asm b/gmp/mpn/sparc64/ultrasparct1/add_n.asm
deleted file mode 100644
index 954c7f6d35..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/add_n.asm
+++ /dev/null
@@ -1,68 +0,0 @@
-dnl  SPARC v9 mpn_add_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:	 ?
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n',  `%o3')
-define(`cy', `%o4')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_add_nc)
-	b,a	L(ent)
-EPILOGUE()
-PROLOGUE(mpn_add_n)
-	mov	0, cy
-L(ent):	cmp	%g0, cy
-L(top):	ldx	[up+0], %o4
-	add	up, 8, up
-	ldx	[vp+0], %o5
-	add	vp, 8, vp
-	add	rp, 8, rp
-	add	n, -1, n
-	srlx	%o4, 32, %g1
-	srlx	%o5, 32, %g2
-	addccc	%o4, %o5, %g3
-	addccc	%g1, %g2, %g0
-	brgz	n, L(top)
-	 stx	%g3, [rp-8]
-
-	retl
-	addc	%g0, %g0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/addlsh1_n.asm b/gmp/mpn/sparc64/ultrasparct1/addlsh1_n.asm
deleted file mode 100644
index 313479773f..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/addlsh1_n.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-dnl  SPARC v9 mpn_addlsh1_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,             1)
-define(RSH,             63)
-
-define(func, mpn_addlsh1_n)
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n)
-
-include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
diff --git a/gmp/mpn/sparc64/ultrasparct1/addlsh2_n.asm b/gmp/mpn/sparc64/ultrasparct1/addlsh2_n.asm
deleted file mode 100644
index ee1afd0116..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/addlsh2_n.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-dnl  SPARC v9 mpn_addlsh2_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,             2)
-define(RSH,             62)
-
-define(func, mpn_addlsh2_n)
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n)
-
-include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
diff --git a/gmp/mpn/sparc64/ultrasparct1/addlshC_n.asm b/gmp/mpn/sparc64/ultrasparct1/addlshC_n.asm
deleted file mode 100644
index 5be9a0d30a..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/addlshC_n.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-dnl  SPARC v9 mpn_addlshC_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C		   cycles/limb
-C UltraSPARC T1:	21
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n',  `%o3')
-define(`cy', `%o4')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	mov	0, cy
-	mov	0, %g5
-	cmp	%g0, cy
-L(top):	ldx	[up+0], %o4
-	add	up, 8, up
-	ldx	[vp+0], %o5
-	add	vp, 8, vp
-	add	rp, 8, rp
-
-	sllx	%o5, LSH, %g4
-	add	n, -1, n
-	or	%g5, %g4, %g4
-	srlx	%o5, RSH, %g5
-
-	srlx	%o4, 32, %g1
-	srlx	%g4, 32, %g2
-	addccc	%o4, %g4, %g3
-	addccc	%g1, %g2, %g0
-	brgz	n, L(top)
-	 stx	%g3, [rp-8]
-
-	retl
-	addc	%g5, %g0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/addmul_1.asm b/gmp/mpn/sparc64/ultrasparct1/addmul_1.asm
deleted file mode 100644
index 29dba966f3..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/addmul_1.asm
+++ /dev/null
@@ -1,86 +0,0 @@
-dnl  SPARC v9 mpn_addmul_1 for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:	74
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`v0', `%i3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_addmul_1)
-	save	%sp, -176, %sp
-	mov	1, %o2
-	mov	%i0, %g2
-	srlx	%i3, 32, %o4
-	sllx	%o2, 32, %o2
-	srl	%i3, 0, %i3
-	mov	0, %g3
-	mov	0, %i0
-
-L(top):	ldx	[%i1+%g3], %g1
-	srl	%g1, 0, %g4
-	mulx	%g4, %i3, %o5
-	srlx	%g1, 32, %g1
-	mulx	%g1, %i3, %g5
-	mulx	%g4, %o4, %g4
-	mulx	%g1, %o4, %g1
-	srlx	%o5, 32, %o1
-	add	%g5, %o1, %o1
-	addcc	%o1, %g4, %g4
-	srl	%o5, 0, %o0
-	ldx	[%g2+%g3], %o5
-	sllx	%g4, 32, %o1
-	add	%g1, %o2, %l1
-	movlu	%xcc, %l1, %g1
-	add	%o1, %o0, %l0
-	addcc	%l0, %i0, %g5
-	srlx	%g4, 32, %i0
-	add	%i0, 1, %g4
-	movlu	%xcc, %g4, %i0
-	addcc	%o5, %g5, %g5
-	stx	%g5, [%g2+%g3]
-	add	%i0, 1, %g4
-	movlu	%xcc, %g4, %i0
-	add	%i2, -1, %i2
-	add	%i0, %g1, %i0
-	brnz,pt	%i2, L(top)
-	 add	%g3, 8, %g3
-	return	%i7+8
-	 nop
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/gmp-mparam.h b/gmp/mpn/sparc64/ultrasparct1/gmp-mparam.h
deleted file mode 100644
index 99db78ac0f..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/gmp-mparam.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2006, 2008-2010 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 1000 MHz ultrasparc t1 running GNU/Linux */
-
-#define DIVREM_1_NORM_THRESHOLD              0  /* always */
-#define DIVREM_1_UNNORM_THRESHOLD            0  /* always */
-#define MOD_1_1P_METHOD                      2
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         13
-#define MOD_1U_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     34
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                 8
-#define MUL_TOOM33_THRESHOLD                50
-#define MUL_TOOM44_THRESHOLD                99
-#define MUL_TOOM6H_THRESHOLD               125
-#define MUL_TOOM8H_THRESHOLD               187
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      77
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      50
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      34
-
-#define SQR_BASECASE_THRESHOLD               0  /* always */
-#define SQR_TOOM2_THRESHOLD                 14
-#define SQR_TOOM3_THRESHOLD                 57
-#define SQR_TOOM4_THRESHOLD                133
-#define SQR_TOOM6_THRESHOLD                156
-#define SQR_TOOM8_THRESHOLD                260
-
-#define MULMID_TOOM42_THRESHOLD             12
-
-#define MULMOD_BNM1_THRESHOLD                7
-#define SQRMOD_BNM1_THRESHOLD                7
-
-#define MUL_FFT_MODF_THRESHOLD             176  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    176, 5}, {      7, 6}, {      4, 5}, {      9, 6}, \
-    {      5, 5}, {     11, 6}, {     11, 7}, {      6, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {      9, 8}, \
-    {      5, 7}, {     13, 8}, {      7, 7}, {     15, 6}, \
-    {     32, 7}, {     24, 8}, {     21, 9}, {     11, 8}, \
-    {     23,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
-    {     19, 8}, {     39, 9}, {     23,10}, {     15, 9}, \
-    {     43,10}, {     23,11}, {     15,10}, {     31, 9}, \
-    {     63, 8}, {    127, 9}, {     67,10}, {     39, 9}, \
-    {     79, 8}, {    159,10}, {     47, 9}, {     95,11}, \
-    {   2048,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 53
-#define MUL_FFT_THRESHOLD                 1728
-
-
-#define SQR_FFT_MODF_THRESHOLD             148  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    148, 5}, {      7, 6}, {      4, 5}, {      9, 6}, \
-    {      5, 5}, {     11, 6}, {     11, 7}, {      6, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
-    {      7, 7}, {     16, 8}, {      9, 6}, {     38, 7}, \
-    {     20, 8}, {     11, 7}, {     24, 8}, {     13, 9}, \
-    {      7, 7}, {     30, 8}, {     19, 9}, {     11, 8}, \
-    {     25,10}, {      7, 9}, {     15, 8}, {     31, 9}, \
-    {     19, 8}, {     39, 9}, {     27,10}, {     15, 9}, \
-    {     39,10}, {     23, 9}, {     47, 8}, {     95, 9}, \
-    {     51,11}, {     15,10}, {     31, 8}, {    127,10}, \
-    {     39, 9}, {     79, 8}, {    159,10}, {     47, 9}, \
-    {     95,11}, {   2048,12}, {   4096,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 58
-#define SQR_FFT_THRESHOLD                 1344
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  28
-#define MULLO_MUL_N_THRESHOLD             3176
-
-#define DC_DIV_QR_THRESHOLD                 27
-#define DC_DIVAPPR_Q_THRESHOLD             106
-#define DC_BDIV_QR_THRESHOLD                27
-#define DC_BDIV_Q_THRESHOLD                 62
-
-#define INV_MULMOD_BNM1_THRESHOLD           14
-#define INV_NEWTON_THRESHOLD               163
-#define INV_APPR_THRESHOLD                 117
-
-#define BINV_NEWTON_THRESHOLD              166
-#define REDC_1_TO_REDC_N_THRESHOLD          31
-
-#define MU_DIV_QR_THRESHOLD                734
-#define MU_DIVAPPR_Q_THRESHOLD             748
-#define MUPI_DIV_QR_THRESHOLD               67
-#define MU_BDIV_QR_THRESHOLD               562
-#define MU_BDIV_Q_THRESHOLD                734
-
-#define POWM_SEC_TABLE  4,29,188,643,2741
-
-#define MATRIX22_STRASSEN_THRESHOLD         11
-#define HGCD_THRESHOLD                      58
-#define HGCD_APPR_THRESHOLD                 55
-#define HGCD_REDUCE_THRESHOLD              637
-#define GCD_DC_THRESHOLD                   186
-#define GCDEXT_DC_THRESHOLD                140
-#define JACOBI_BASE_METHOD                   3
-
-#define GET_STR_DC_THRESHOLD                20
-#define GET_STR_PRECOMPUTE_THRESHOLD        33
-#define SET_STR_DC_THRESHOLD               268
-#define SET_STR_PRECOMPUTE_THRESHOLD       960
-
-#define FAC_DSC_THRESHOLD                  268
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/sparc64/ultrasparct1/mul_1.asm b/gmp/mpn/sparc64/ultrasparct1/mul_1.asm
deleted file mode 100644
index 1fea2a19ef..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/mul_1.asm
+++ /dev/null
@@ -1,82 +0,0 @@
-dnl  SPARC v9 mpn_mul_1 for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:	68
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`v0', `%i3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_mul_1)
-	save	%sp, -176, %sp
-	mov	1, %o2
-	mov	%i0, %g2
-	srlx	%i3, 32, %o4
-	sllx	%o2, 32, %o2
-	srl	%i3, 0, %i3
-	mov	0, %g3
-	mov	0, %i0
-
-L(top):	ldx	[%i1+%g3], %g1
-	srl	%g1, 0, %g4
-	mulx	%g4, %i3, %o5
-	srlx	%g1, 32, %g1
-	mulx	%g1, %i3, %g5
-	mulx	%g4, %o4, %g4
-	mulx	%g1, %o4, %g1
-	srlx	%o5, 32, %o1
-	add	%g5, %o1, %o1
-	addcc	%o1, %g4, %g4
-	srl	%o5, 0, %o0
-	sllx	%g4, 32, %o1
-	add	%g1, %o2, %l1
-	movlu	%xcc, %l1, %g1
-	add	%o1, %o0, %l0
-	addcc	%l0, %i0, %g5
-	srlx	%g4, 32, %i0
-	add	%i0, 1, %g4
-	movlu	%xcc, %g4, %i0
-	stx	%g5, [%g2+%g3]
-	add	%i2, -1, %i2
-	add	%i0, %g1, %i0
-	brnz,pt	%i2, L(top)
-	 add	%g3, 8, %g3
-	return	%i7+8
-	 nop
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/rsblsh1_n.asm b/gmp/mpn/sparc64/ultrasparct1/rsblsh1_n.asm
deleted file mode 100644
index 51bd4ab45b..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/rsblsh1_n.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-dnl  SPARC v9 mpn_rsblsh1_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,             1)
-define(RSH,             63)
-
-define(func, mpn_rsblsh1_n)
-
-MULFUNC_PROLOGUE(mpn_rsblsh1_n)
-
-include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
diff --git a/gmp/mpn/sparc64/ultrasparct1/rsblsh2_n.asm b/gmp/mpn/sparc64/ultrasparct1/rsblsh2_n.asm
deleted file mode 100644
index f0d208e198..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/rsblsh2_n.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-dnl  SPARC v9 mpn_rsblsh2_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,             2)
-define(RSH,             62)
-
-define(func, mpn_rsblsh2_n)
-
-MULFUNC_PROLOGUE(mpn_rsblsh2_n)
-
-include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
diff --git a/gmp/mpn/sparc64/ultrasparct1/rsblshC_n.asm b/gmp/mpn/sparc64/ultrasparct1/rsblshC_n.asm
deleted file mode 100644
index 7c03e9f97f..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/rsblshC_n.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-dnl  SPARC v9 mpn_rsblshC_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C		   cycles/limb
-C UltraSPARC T1:	21
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n',  `%o3')
-define(`cy', `%o4')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	mov	0, cy
-	mov	0, %g5
-	cmp	%g0, cy
-L(top):	ldx	[up+0], %o4
-	add	up, 8, up
-	ldx	[vp+0], %o5
-	add	vp, 8, vp
-	add	rp, 8, rp
-
-	sllx	%o5, LSH, %g4
-	add	n, -1, n
-	or	%g5, %g4, %g4
-	srlx	%o5, RSH, %g5
-
-	srlx	%o4, 32, %g1
-	srlx	%g4, 32, %g2
-	subccc	%g4, %o4, %g3
-	subccc	%g2, %g1, %g0
-	brgz	n, L(top)
-	 stx	%g3, [rp-8]
-
-	retl
-	subc	%g5, %g0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/sub_n.asm b/gmp/mpn/sparc64/ultrasparct1/sub_n.asm
deleted file mode 100644
index c2af89f08f..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/sub_n.asm
+++ /dev/null
@@ -1,68 +0,0 @@
-dnl  SPARC v9 mpn_sub_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:	 ?
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n',  `%o3')
-define(`cy', `%o4')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sub_nc)
-	b,a	L(ent)
-EPILOGUE()
-PROLOGUE(mpn_sub_n)
-	mov	0, cy
-L(ent):	cmp	%g0, cy
-L(top):	ldx	[up+0], %o4
-	add	up, 8, up
-	ldx	[vp+0], %o5
-	add	vp, 8, vp
-	add	rp, 8, rp
-	add	n, -1, n
-	srlx	%o4, 32, %g1
-	srlx	%o5, 32, %g2
-	subccc	%o4, %o5, %g3
-	subccc	%g1, %g2, %g0
-	brgz	n, L(top)
-	 stx	%g3, [rp-8]
-
-	retl
-	addc	%g0, %g0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/sublsh1_n.asm b/gmp/mpn/sparc64/ultrasparct1/sublsh1_n.asm
deleted file mode 100644
index 8c8fa80401..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/sublsh1_n.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-dnl  SPARC v9 mpn_sublsh1_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,             1)
-define(RSH,             63)
-
-define(func, mpn_sublsh1_n)
-
-MULFUNC_PROLOGUE(mpn_sublsh1_n)
-
-include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
diff --git a/gmp/mpn/sparc64/ultrasparct1/sublsh2_n.asm b/gmp/mpn/sparc64/ultrasparct1/sublsh2_n.asm
deleted file mode 100644
index 2fd5eee71a..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/sublsh2_n.asm
+++ /dev/null
@@ -1,41 +0,0 @@
-dnl  SPARC v9 mpn_sublsh2_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-define(LSH,             2)
-define(RSH,             62)
-
-define(func, mpn_sublsh2_n)
-
-MULFUNC_PROLOGUE(mpn_sublsh2_n)
-
-include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
diff --git a/gmp/mpn/sparc64/ultrasparct1/sublshC_n.asm b/gmp/mpn/sparc64/ultrasparct1/sublshC_n.asm
deleted file mode 100644
index 01eafef1bc..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/sublshC_n.asm
+++ /dev/null
@@ -1,69 +0,0 @@
-dnl  SPARC v9 mpn_sublshC_n for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C		   cycles/limb
-C UltraSPARC T1:	21
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%o0')
-define(`up', `%o1')
-define(`vp', `%o2')
-define(`n',  `%o3')
-define(`cy', `%o4')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	mov	0, cy
-	mov	0, %g5
-	cmp	%g0, cy
-L(top):	ldx	[up+0], %o4
-	add	up, 8, up
-	ldx	[vp+0], %o5
-	add	vp, 8, vp
-	add	rp, 8, rp
-
-	sllx	%o5, LSH, %g4
-	add	n, -1, n
-	or	%g5, %g4, %g4
-	srlx	%o5, RSH, %g5
-
-	srlx	%o4, 32, %g1
-	srlx	%g4, 32, %g2
-	subccc	%o4, %g4, %g3
-	subccc	%g1, %g2, %g0
-	brgz	n, L(top)
-	 stx	%g3, [rp-8]
-
-	retl
-	addc	%g5, %g0, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct1/submul_1.asm b/gmp/mpn/sparc64/ultrasparct1/submul_1.asm
deleted file mode 100644
index 4f553a8063..0000000000
--- a/gmp/mpn/sparc64/ultrasparct1/submul_1.asm
+++ /dev/null
@@ -1,86 +0,0 @@
-dnl  SPARC v9 mpn_submul_1 for T1/T2.
-
-dnl  Copyright 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T1:	74
-C UltraSPARC T2:	 ?
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`v0', `%i3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_submul_1)
-	save	%sp, -176, %sp
-	mov	1, %o2
-	mov	%i0, %g2
-	srlx	%i3, 32, %o4
-	sllx	%o2, 32, %o2
-	srl	%i3, 0, %i3
-	mov	0, %g3
-	mov	0, %i0
-
-L(top):	ldx	[%i1+%g3], %g1
-	srl	%g1, 0, %g4
-	mulx	%g4, %i3, %o5
-	srlx	%g1, 32, %g1
-	mulx	%g1, %i3, %g5
-	mulx	%g4, %o4, %g4
-	mulx	%g1, %o4, %g1
-	srlx	%o5, 32, %o1
-	add	%g5, %o1, %o1
-	addcc	%o1, %g4, %g4
-	srl	%o5, 0, %o0
-	ldx	[%g2+%g3], %o5
-	sllx	%g4, 32, %o1
-	add	%g1, %o2, %l1
-	movlu	%xcc, %l1, %g1
-	add	%o1, %o0, %l0
-	addcc	%l0, %i0, %g5
-	srlx	%g4, 32, %i0
-	add	%i0, 1, %g4
-	movlu	%xcc, %g4, %i0
-	subcc	%o5, %g5, %g5
-	stx	%g5, [%g2+%g3]
-	add	%i0, 1, %g4
-	movlu	%xcc, %g4, %i0
-	add	%i2, -1, %i2
-	add	%i0, %g1, %i0
-	brnz,pt	%i2, L(top)
-	 add	%g3, 8, %g3
-	return	%i7+8
-	 nop
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/add_n.asm b/gmp/mpn/sparc64/ultrasparct3/add_n.asm
deleted file mode 100644
index 0170746895..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/add_n.asm
+++ /dev/null
@@ -1,126 +0,0 @@
-dnl  SPARC v9 mpn_add_n for T3/T4.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	 8
-C UltraSPARC T4:	 3
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`vp', `%i2')
-define(`n',  `%i3')
-define(`cy', `%i4')
-
-define(`u0_off', `%l2')
-define(`u1_off', `%l3')
-define(`loop_n', `%l6')
-define(`tmp', `%l7')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_add_nc)
-	save	%sp, -176, %sp
-	b,a	L(ent)
-EPILOGUE()
-PROLOGUE(mpn_add_n)
-	save	%sp, -176, %sp
-
-	mov	0, cy
-L(ent):
-	subcc	n, 1, n
-	be	L(final_one)
-	 cmp	%g0, cy
-
-	ldx	[up + 0], %o4
-	sllx	n, 3, tmp
-
-	ldx	[vp + 0], %o5
-	add	up, tmp, u0_off
-
-	ldx	[up + 8], %g5
-	neg	tmp, loop_n
-
-	ldx	[vp + 8], %g1
-	add	u0_off, 8, u1_off
-
-	sub	loop_n, -(2 * 8), loop_n
-
-	brgez,pn loop_n, L(loop_tail)
-	 add	vp, (2 * 8), vp
-
-	b,a	L(top)
-	ALIGN(16)
-L(top):
-	addxccc(%o4, %o5, tmp)
-	ldx	[vp + 0], %o5
-
-	add	rp, (2 * 8), rp
-	ldx	[loop_n + u0_off], %o4
-
-	add	vp, (2 * 8), vp
-	stx	tmp, [rp - 16]
-
-	addxccc(%g1, %g5, tmp)
-	ldx	[vp - 8], %g1
-
-	ldx	[loop_n + u1_off], %g5
-	sub	loop_n, -(2 * 8), loop_n
-
-	brlz	loop_n, L(top)
-	 stx	tmp, [rp - 8]
-
-L(loop_tail):
-	addxccc(%o4, %o5, %g3)
-	add	loop_n, u0_off, up
-
-	addxccc(%g1, %g5, %g5)
-	stx	%g3, [rp + 0]
-
-	brgz,pt	loop_n, L(done)
-	 stx	%g5, [rp + 8]
-
-	add	rp, (2 * 8), rp
-L(final_one):
-	ldx	[up+0], %o4
-	ldx	[vp+0], %o5
-	addxccc(%o4, %o5, %g3)
-	stx	%g3, [rp+0]
-
-L(done):
-	addxc(%g0, %g0, %i0)
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/addmul_1.asm b/gmp/mpn/sparc64/ultrasparct3/addmul_1.asm
deleted file mode 100644
index 939811e1ce..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/addmul_1.asm
+++ /dev/null
@@ -1,182 +0,0 @@
-dnl  SPARC v9 mpn_addmul_1 for T3/T4/T5.
-
-dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	26
-C UltraSPARC T4:	4.5
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`v0', `%i3')
-
-define(`u0',  `%l0')
-define(`u1',  `%l1')
-define(`u2',  `%l2')
-define(`u3',  `%l3')
-define(`r0',  `%l4')
-define(`r1',  `%l5')
-define(`r2',  `%l6')
-define(`r3',  `%l7')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_addmul_1)
-	save	%sp, -176, %sp
-	ldx	[up+0], %g1
-
-	and	n, 3, %g3
-	brz	%g3, L(b0)
-	 addcc	%g0, %g0, %g5			C clear carry limb, flag
-	cmp	%g3, 2
-	bcs	%xcc, L(b01)
-	 nop
-	be	%xcc, L(b10)
-	 ldx	[up+8], %g5
-
-L(b11):	ldx	[up+16], u3
-	mulx	%g1, v0, %o2
-	umulxhi(%g1, v0, %o3)
-	ldx	[rp+0], r1
-	mulx	%g5, v0, %o4
-	ldx	[rp+8], r2
-	umulxhi(%g5, v0, %o5)
-	ldx	[rp+16], r3
-	mulx	u3, v0, %g4
-	umulxhi(u3, v0, %g5)
-	addcc	%o3, %o4, %o4
-	addxccc(%o5, %g4, %g4)
-	addxc(	%g0, %g5, %g5)
-	addcc	r1, %o2, r1
-	stx	r1, [rp+0]
-	addxccc(r2, %o4, r2)
-	stx	r2, [rp+8]
-	addxccc(r3, %g4, r3)
-	stx	r3, [rp+16]
-	add	n, -3, n
-	add	up, 24, up
-	brz	n, L(xit)
-	 add	rp, 24, rp
-	b	L(com)
-	 nop
-
-L(b10):	mulx	%g1, v0, %o4
-	ldx	[rp+0], r2
-	umulxhi(%g1, v0, %o5)
-	ldx	[rp+8], r3
-	mulx	%g5, v0, %g4
-	umulxhi(%g5, v0, %g5)
-	addcc	%o5, %g4, %g4
-	addxc(	%g0, %g5, %g5)
-	addcc	r2, %o4, r2
-	stx	r2, [rp+0]
-	addxccc(r3, %g4, r3)
-	stx	r3, [rp+8]
-	add	n, -2, n
-	add	up, 16, up
-	brz	n, L(xit)
-	 add	rp, 16, rp
-	b	L(com)
-	 nop
-
-L(b01):	ldx	[rp+0], r3
-	mulx	%g1, v0, %g4
-	umulxhi(%g1, v0, %g5)
-	addcc	r3, %g4, r3
-	stx	r3, [rp+0]
-	add	n, -1, n
-	add	up, 8, up
-	brz	n, L(xit)
-	 add	rp, 8, rp
-
-L(com):	ldx	[up+0], %g1
-L(b0):	ldx	[up+8], u1
-	ldx	[up+16], u2
-	ldx	[up+24], u3
-	mulx	%g1, v0, %o0
-	umulxhi(%g1, v0, %o1)
-	b	L(lo0)
-	 nop
-
-	ALIGN(16)
-L(top):	ldx	[up+0], u0
-	addxc(	%g0, %g5, %g5)		C propagate carry into carry limb
-	ldx	[up+8], u1
-	addcc	r0, %o0, r0
-	ldx	[up+16], u2
-	addxccc(r1, %o2, r1)
-	ldx	[up+24], u3
-	addxccc(r2, %o4, r2)
-	stx	r0, [rp-32]
-	addxccc(r3, %g4, r3)
-	stx	r1, [rp-24]
-	mulx	u0, v0, %o0
-	stx	r2, [rp-16]
-	umulxhi(u0, v0, %o1)
-	stx	r3, [rp-8]
-L(lo0):	mulx	u1, v0, %o2
-	ldx	[rp+0], r0
-	umulxhi(u1, v0, %o3)
-	ldx	[rp+8], r1
-	mulx	u2, v0, %o4
-	ldx	[rp+16], r2
-	umulxhi(u2, v0, %o5)
-	ldx	[rp+24], r3
-	mulx	u3, v0, %g4
-	addxccc(%g5, %o0, %o0)
-	umulxhi(u3, v0, %g5)
-	add	up, 32, up
-	addxccc(%o1, %o2, %o2)
-	add	rp, 32, rp
-	addxccc(%o3, %o4, %o4)
-	add	n, -4, n
-	addxccc(%o5, %g4, %g4)
-	brgz	n, L(top)
-	 nop
-
-	addxc(	%g0, %g5, %g5)
-	addcc	r0, %o0, r0
-	stx	r0, [rp-32]
-	addxccc(r1, %o2, r1)
-	stx	r1, [rp-24]
-	addxccc(r2, %o4, r2)
-	stx	r2, [rp-16]
-	addxccc(r3, %g4, r3)
-	stx	r3, [rp-8]
-L(xit):	addxc(	%g0, %g5, %i0)
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/aormul_2.asm b/gmp/mpn/sparc64/ultrasparct3/aormul_2.asm
deleted file mode 100644
index ccc6a4408d..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/aormul_2.asm
+++ /dev/null
@@ -1,228 +0,0 @@
-dnl  SPARC v9 mpn_mul_2 and mpn_addmul_2 for T3/T4/T5.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		    cycles/limb      cycles/limb
-C		       mul_2           addmul_2
-C UltraSPARC T3:	22.5		 23.5
-C UltraSPARC T4:	 3.25		 3.75
-
-
-C The code is reasonably scheduled but also relies on OoO.  There was hope that
-C this could run at around 3.0 and 3.5 c/l respectively, on T4.  Two cycles per
-C iteration needs to be removed.
-C
-C We could almost use 2-way unrolling, but currently the wN registers live too
-C long.  By changing add x,w1,w1 to add x,w1,w0, i.e. migrate the values down-
-C wards, 2-way unrolling should become possible.  With n-indexed addressing it
-C should run no slower.
-C
-C The rp loads to g1/g3 are very much over-scheduled.  Presumably, they could
-C be postponed a full way, and then just one register could be used.
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`vp', `%i3')
-
-define(`v0', `%o0')
-define(`v1', `%o1')
-
-define(`w0', `%o2')
-define(`w1', `%o3')
-define(`w2', `%o4')
-define(`w3', `%o5')
-
-ifdef(`OPERATION_mul_2',`
-      define(`AM2',      `')
-      define(`ADDX',	 `addcc`'$1')
-      define(`func',     `mpn_mul_2')
-')
-ifdef(`OPERATION_addmul_2',`
-      define(`AM2',      `$1')
-      define(`ADDX',	 `addxccc($1,$2,$3)')
-      define(`func',     `mpn_addmul_2')
-')
-
-
-MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2)
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	save	%sp, -176, %sp
-
-	ldx	[vp+0], v0		C load v0
-	and	n, 3, %g5
-	ldx	[vp+8], v1		C load v1
-	add	n, -6, n
-	ldx	[up+0], %g4
-	brz	%g5, L(b0)
-	 cmp	%g5, 2
-	bcs	L(b1)
-	 nop
-	be	L(b2)
-	 nop
-
-L(b3):
-AM2(`	ldx	[rp+0], %g1')
-	mulx	%g4, v0, w2
-	umulxhi(%g4, v0, w3)
-	ldx	[up+8], %i5
-	mulx	%g4, v1, %l3
-	umulxhi(%g4, v1, %l7)
-AM2(`	ldx	[rp+8], %g3')
-	add	up, -8, up
-	add	rp, -8, rp
-	b	L(lo3)
-	 mov	0, w0
-
-L(b2):
-AM2(`	ldx	[rp+0], %g3')
-	mulx	%g4, v0, w3
-	umulxhi(%g4, v0, w0)
-	ldx	[up+8], %i4
-	mulx	%g4, v1, %l1
-	umulxhi(%g4, v1, %l5)
-AM2(`	ldx	[rp+8], %g1')
-	add	rp, 16, rp
-	brlz	n, L(end)
-	 mov	0, w1
-	ba	L(top)
-	 add	up, 16, up
-
-L(b1):
-AM2(`	ldx	[rp+0], %g1')
-	mulx	%g4, v0, w0
-	umulxhi(%g4, v0, w1)
-	ldx	[up+8], %i5
-	mulx	%g4, v1, %l3
-	umulxhi(%g4, v1, %l7)
-AM2(`	ldx	[rp+8], %g3')
-	add	up, 8, up
-	add	rp, 8, rp
-	b	L(lo1)
-	 mov	0, w2
-
-L(b0):
-AM2(`	ldx	[rp+0], %g3')
-	mulx	%g4, v0, w1
-	umulxhi(%g4, v0, w2)
-	ldx	[up+8], %i4
-	mulx	%g4, v1, %l1
-	umulxhi(%g4, v1, %l5)
-AM2(`	ldx	[rp+8], %g1')
-	b	L(lo0)
-	 mov	0, w3
-
-	ALIGN(16)			C cycle
-L(top):	mulx	%i4, v0, %l2		C 0->5
-	umulxhi(%i4, v0, %l6)		C 0->5
-	ldx	[up+0], %i5		C 1->6
-AM2(`	addcc	w3, %g3, w3')		C 1
-	stx	w3, [rp-16]		C 2
-	ADDX(`	%l1, w0, w0')		C 2
-	addxccc(%l5, w1, w1)		C 3
-	mulx	%i4, v1, %l3		C 3->9
-	umulxhi(%i4, v1, %l7)		C 4->9
-AM2(`	ldx	[rp+0], %g3')		C 4
-	addcc	%l2, w0, w0		C 5
-	addxccc(%l6, w1, w1)		C 5
-	addxc(	%g0, %g0, w2)		C 6
-L(lo1):	mulx	%i5, v0, %l0		C 6
-	umulxhi(%i5, v0, %l4)		C 7
-	ldx	[up+8], %i4		C 7
-AM2(`	addcc	w0, %g1, w0')		C 8
-	stx	w0, [rp-8]		C 8
-	ADDX(`	%l3, w1, w1')		C 9
-	addxccc(%l7, w2, w2)		C 9
-	mulx	%i5, v1, %l1		C 10
-	umulxhi(%i5, v1, %l5)		C 10
-AM2(`	ldx	[rp+8], %g1')		C 11
-	addcc	%l0, w1, w1		C 11
-	addxccc(%l4, w2, w2)		C 12
-	addxc(	%g0, %g0, w3)		C 12
-L(lo0):	mulx	%i4, v0, %l2		C 13
-	umulxhi(%i4, v0, %l6)		C 13
-	ldx	[up+16], %i5		C 14
-AM2(`	addcc	w1, %g3, w1')		C 14
-	stx	w1, [rp+0]		C 15
-	ADDX(`	%l1, w2, w2')		C 15
-	addxccc(%l5, w3, w3)		C 16
-	mulx	%i4, v1, %l3		C 16
-	umulxhi(%i4, v1, %l7)		C 17
-AM2(`	ldx	[rp+16], %g3')		C 17
-	addcc	%l2, w2, w2		C 18
-	addxccc(%l6, w3, w3)		C 18
-	addxc(	%g0, %g0, w0)		C 19
-L(lo3):	mulx	%i5, v0, %l0		C 19
-	umulxhi(%i5, v0, %l4)		C 20
-	ldx	[up+24], %i4		C 20
-AM2(`	addcc	w2, %g1, w2')		C 21
-	stx	w2, [rp+8]		C 21
-	ADDX(`	%l3, w3, w3')		C 22
-	addxccc(%l7, w0, w0)		C 22
-	mulx	%i5, v1, %l1		C 23
-	umulxhi(%i5, v1, %l5)		C 23
-AM2(`	ldx	[rp+24], %g1')		C 24
-	addcc	%l0, w3, w3		C 24
-	addxccc(%l4, w0, w0)		C 25
-	addxc(	%g0, %g0, w1)		C 25
-	add	up, 32, up
-	add	rp, 32, rp
-	brgz	n, L(top)
-	 add	n, -4, n
-
-L(end):	mulx	%i4, v0, %l2
-	umulxhi(%i4, v0, %l6)
-AM2(`	addcc	w3, %g3, w3')
-	stx	w3, [rp-16]
-	ADDX(`	%l1, w0, w0')
-	addxccc(%l5, w1, w1)
-	mulx	%i4, v1, %l3
-	umulxhi(%i4, v1, %l7)
-	addcc	%l2, w0, w0
-	addxccc(%l6, w1, w1)
-	addxc(	%g0, %g0, w2)
-AM2(`	addcc	w0, %g1, w0')
-	stx	w0, [rp-8]
-	ADDX(`	%l3, w1, w1')
-	stx	w1, [rp+0]
-	addxc(%l7, w2, %i0)
-
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm b/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm
deleted file mode 100644
index 845f6d6d69..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm
+++ /dev/null
@@ -1,219 +0,0 @@
-dnl  SPARC v9 mpn_mul_4 and mpn_addmul_4 for T3/T4/T5.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		    cycles/limb      cycles/limb
-C		       mul_4           addmul_4
-C UltraSPARC T3:	21.5		22.0
-C UltraSPARC T4:	 2.625		 2.75
-
-
-C The code is well-scheduled and relies on OoO very little.  There is hope that
-C this will run at around 2.5 and 2.75 c/l respectively, on T4.
-
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`vp', `%i3')
-
-define(`v0', `%g1')
-define(`v1', `%o7')
-define(`v2', `%g2')
-define(`v3', `%i3')
-
-define(`w0', `%o0')
-define(`w1', `%o1')
-define(`w2', `%o2')
-define(`w3', `%o3')
-define(`w4', `%o4')
-
-define(`r0', `%o5')
-
-define(`u0', `%i4')
-define(`u1', `%i5')
-
-define(`rp0', `rp')
-define(`rp1', `%g3')
-define(`rp2', `%g4')
-define(`up0', `up')
-define(`up1', `%g5')
-
-ifdef(`OPERATION_mul_4',`
-      define(`AM4',      `')
-      define(`ADDX',	 `addcc`'$1')
-      define(`func',     `mpn_mul_4')
-')
-ifdef(`OPERATION_addmul_4',`
-      define(`AM4',      `$1')
-      define(`ADDX',	 `addxccc($1,$2,$3)')
-      define(`func',     `mpn_addmul_4')
-')
-
-
-MULFUNC_PROLOGUE(mpn_mul_4 mpn_addmul_4)
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	save	%sp, -176, %sp
-
-	ldx	[up + 0], u1		C load up[0] early
-	andcc	n, 1, %g0		C is n odd?
-	ldx	[vp + 0], v0
-	sllx	n, 3, n
-	ldx	[vp + 8], v1
-	add	n, -28, n
-	ldx	[vp + 16], v2
-	add	rp, -16, rp
-	ldx	[vp + 24], v3
-	add	up, n, up0
-	add	rp, n, rp0
-	add	up0, 8, up1
-	add	rp0, 8, rp1
-	add	rp0, 16, rp2
-	mulx	u1, v0, %l0
-	mov	0, w0
-	mulx	u1, v1, %l1
-	mov	0, w1
-	mulx	u1, v2, %l2
-	mov	0, w2
-	mulx	u1, v3, %l3
-	mov	0, w3
-
-	be	L(evn)
-	 neg	n, n
-
-L(odd):	mov	u1, u0
-	ldx	[up1 + n], u1
-AM4(`	ldx	[rp2 + n], r0')
-	umulxhi(u0, v0, %l4)
-	umulxhi(u0, v1, %l5)
-	umulxhi(u0, v2, %l6)
-	umulxhi(u0, v3, %l7)
-	b	L(mid)
-	 add	n, 8, n
-
-L(evn):	ldx	[up1 + n], u0
-AM4(`	ldx	[rp2 + n], r0')
-	umulxhi(u1, v0, %l4)
-	umulxhi(u1, v1, %l5)
-	umulxhi(u1, v2, %l6)
-	umulxhi(u1, v3, %l7)
-	add	n, 16, n
-
-	ALIGN(16)
-L(top):	addcc	%l0, w0, w0
-	mulx	u0, v0, %l0	C w 0
-	addxccc(%l1, w1, w1)
-	mulx	u0, v1, %l1	C w 1
-	addxccc(%l2, w2, w2)
-	mulx	u0, v2, %l2	C w 2
-	addxccc(%l3, w3, w3)
-	mulx	u0, v3, %l3	C w 3
-	ldx	[up0 + n], u1
-	addxc(	%g0, %g0, w4)
-AM4(`	addcc	r0, w0, w0')
-	stx	w0, [rp0 + n]
-	ADDX(`	%l4, w1, w0')
-	umulxhi(u0, v0, %l4)	C w 1
-AM4(`	ldx	[rp1 + n], r0')
-	addxccc(%l5, w2, w1)
-	umulxhi(u0, v1, %l5)	C w 2
-	addxccc(%l6, w3, w2)
-	umulxhi(u0, v2, %l6)	C w 3
-	addxc(	%l7, w4, w3)
-	umulxhi(u0, v3, %l7)	C w 4
-L(mid):	addcc	%l0, w0, w0
-	mulx	u1, v0, %l0	C w 1
-	addxccc(%l1, w1, w1)
-	mulx	u1, v1, %l1	C w 2
-	addxccc(%l2, w2, w2)
-	mulx	u1, v2, %l2	C w 3
-	addxccc(%l3, w3, w3)
-	mulx	u1, v3, %l3	C w 4
-	ldx	[up1 + n], u0
-	addxc(	%g0, %g0, w4)
-AM4(`	addcc	r0, w0, w0')
-	stx	w0, [rp1 + n]
-	ADDX(`	%l4, w1, w0')
-	umulxhi(u1, v0, %l4)	C w 2
-AM4(`	ldx	[rp2 + n], r0')
-	addxccc(%l5, w2, w1)
-	umulxhi(u1, v1, %l5)	C w 3
-	addxccc(%l6, w3, w2)
-	umulxhi(u1, v2, %l6)	C w 4
-	addxc(	%l7, w4, w3)
-	umulxhi(u1, v3, %l7)	C w 5
-	brlz	n, L(top)
-	 add	n, 16, n
-
-L(end):	addcc	%l0, w0, w0
-	mulx	u0, v0, %l0
-	addxccc(%l1, w1, w1)
-	mulx	u0, v1, %l1
-	addxccc(%l2, w2, w2)
-	mulx	u0, v2, %l2
-	addxccc(%l3, w3, w3)
-	mulx	u0, v3, %l3
-	addxc(	%g0, %g0, w4)
-AM4(`	addcc	r0, w0, w0')
-	stx	w0, [rp0 + n]
-	ADDX(`	%l4, w1, w0')
-	umulxhi(u0, v0, %l4)
-AM4(`	ldx	[rp1 + n], r0')
-	addxccc(%l5, w2, w1)
-	umulxhi(u0, v1, %l5)
-	addxccc(%l6, w3, w2)
-	umulxhi(u0, v2, %l6)
-	addxc(	%l7, w4, w3)
-	umulxhi(u0, v3, %l7)
-	addcc	%l0, w0, w0
-	addxccc(%l1, w1, w1)
-	addxccc(%l2, w2, w2)
-	addxccc(%l3, w3, w3)
-	addxc(	%g0, %g0, w4)
-AM4(`	addcc	r0, w0, w0')
-	stx	w0, [rp1 + n]
-	ADDX(`	%l4, w1, w0')
-	addxccc(%l5, w2, w1)
-	addxccc(%l6, w3, w2)
-	stx	w0, [rp2 + n]
-	add	n, 16, n
-	stx	w1, [rp1 + n]
-	stx	w2, [rp2 + n]
-	addxc(	%l7, w4, %i0)
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/aorslsh_n.asm b/gmp/mpn/sparc64/ultrasparct3/aorslsh_n.asm
deleted file mode 100644
index 1014b1ba23..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/aorslsh_n.asm
+++ /dev/null
@@ -1,147 +0,0 @@
-dnl  SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	11
-C UltraSPARC T4:	 4
-
-C For sublsh_n we combine the two shifted limbs using xnor, using the identity
-C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) =
-C 0 as it is in our usage.  This gives us the ones complement for free.
-C Unfortunately, the same trick will not work for rsblsh_n, which will instead
-C require a separate negation.
-C
-C FIXME: Add rsblsh_n to this file.
-
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`vp', `%i2')
-define(`n',  `%i3')
-define(`cnt',`%i4')
-
-define(`tnc',`%o5')
-
-ifdef(`OPERATION_addlsh_n',`
-  define(`INITCY', `subcc	%g0, 0, %g0')
-  define(`MERGE',  `or')
-  define(`func',   `mpn_addlsh_n')
-')
-ifdef(`OPERATION_sublsh_n',`
-  define(`INITCY', `subcc	%g0, 1, %g0')
-  define(`MERGE',  `xnor')
-  define(`func',   `mpn_sublsh_n')
-')
-
-define(`rp0',  `rp')
-define(`rp1',  `%o2')
-define(`up0',  `up')
-define(`up1',  `%o3')
-define(`vp0',  `vp')
-define(`vp1',  `%o4')
-
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n)
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	save	%sp, -176, %sp
-	mov	64, tnc
-	sub	tnc, cnt, tnc
-
-	andcc	n, 1, %g0
-	sllx	n, 3, n
-	add	n, -16, n
-	add	up, n, up0
-	add	vp, n, vp0
-	add	rp, n, rp0
-	add	up0, 8, up1
-	add	vp0, 8, vp1
-	add	rp0, -8, rp1
-	add	rp0, -16, rp0
-	neg	n, n
-	be	L(evn)
-	 INITCY
-
-L(odd):	ldx	[vp0 + n], %l1
-	mov	0, %l2
-	ldx	[up0 + n], %l5
-	sllx	%l1, cnt, %g3
-	brgez	n, L(wd1)
-	 add	n, 8, n
-	ldx	[vp0 + n], %l0
-	b	L(lo1)
-	 sllx	%l1, cnt, %g3
-
-L(evn):	ldx	[vp0 + n], %l0
-	mov	0, %l3
-	ldx	[up0 + n], %l4
-	ldx	[vp1 + n], %l1
-	b	L(lo0)
-	 sllx	%l0, cnt, %g1
-
-L(top):	addxccc(%l6, %l4, %o0)
-	ldx	[vp0 + n], %l0
-	sllx	%l1, cnt, %g3
-	stx	%o0, [rp0 + n]
-L(lo1):	srlx	%l1, tnc, %l3
-	MERGE	%l2, %g3, %l7
-	ldx	[up0 + n], %l4
-	addxccc(%l7, %l5, %o1)
-	ldx	[vp1 + n], %l1
-	sllx	%l0, cnt, %g1
-	stx	%o1, [rp1 + n]
-L(lo0):	srlx	%l0, tnc, %l2
-	MERGE	%l3, %g1, %l6
-	ldx	[up1 + n], %l5
-	brlz,pt	n, L(top)
-	 add	n, 16, n
-
-	addxccc(%l6, %l4, %o0)
-	sllx	%l1, cnt, %g3
-	stx	%o0, [rp0 + n]
-L(wd1):	srlx	%l1, tnc, %l3
-	MERGE	%l2, %g3, %l7
-	addxccc(%l7, %l5, %o1)
-	stx	%o1, [rp1 + n]
-
-ifdef(`OPERATION_addlsh_n',
-`	addxc(	%l3, %g0, %i0)')
-ifdef(`OPERATION_sublsh_n',
-`	addxc(	%g0, %g0, %g1)
-	add	%g1, -1, %g1
-	sub	%l3, %g1, %i0')
-
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm b/gmp/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm
deleted file mode 100644
index 550860d368..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,147 +0,0 @@
-dnl  SPARC T3/T4/T5 mpn_bdiv_dbm1c.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C UltraSPARC T3:	25
-C UltraSPARC T4/T5:	 4
-
-C INPUT PARAMETERS
-define(`qp',  `%i0')
-define(`ap',  `%i1')
-define(`n',   `%i2')
-define(`bd',  `%i3')
-define(`h',   `%i4')
-
-define(`plo0',`%g4')  define(`plo1',`%g5')
-define(`phi0',`%l0')  define(`phi1',`%l1')
-define(`a0',  `%g1')  define(`a1',  `%g3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_bdiv_dbm1c)
-	save	%sp, -176, %sp
-
-	and	n, 3, %g5
-	ldx	[ap + 0], %g2
-	add	n, -5, n
-	brz	%g5, L(b0)
-	 cmp	%g5, 2
-	bcs	%xcc, L(b1)
-	 nop
-	be	%xcc, L(b2)
-	 nop
-
-L(b3):	ldx	[ap + 8], a0
-	mulx	bd, %g2, plo1
-	umulxhi(bd, %g2, phi1)
-	ldx	[ap + 16], a1
-	add	qp, -24, qp
-	b	L(lo3)
-	 add	ap, -8, ap
-
-L(b2):	ldx	[ap + 8], a1
-	mulx	bd, %g2, plo0
-	umulxhi(bd, %g2, phi0)
-	brlz,pt n, L(wd2)
-	 nop
-L(gt2):	ldx	[ap + 16], a0
-	add	ap, 16, ap
-	b	L(lo2)
-	 add	n, -1, n
-
-L(b1):	mulx	bd, %g2, plo1
-	 umulxhi(bd, %g2, phi1)
-	brlz,pn	n, L(wd1)
-	 add	qp, -8, qp
-L(gt1):	ldx	[ap + 8], a0
-	ldx	[ap + 16], a1
-	b	L(lo1)
-	 add	ap, 8, ap
-
-L(b0):	ldx	[ap + 8], a1
-	mulx	bd, %g2, plo0
-	umulxhi(bd, %g2, phi0)
-	ldx	[ap + 16], a0
-	b	L(lo0)
-	 add	qp, -16, qp
-
-L(top):	ldx	[ap + 0], a0
-	sub	h, phi1, h
-L(lo2):	mulx	bd, a1, plo1
-	umulxhi(bd, a1, phi1)
-	subcc	h, plo0, h
-	addxc(	phi0, %g0, phi0)
-	stx	h, [qp + 0]
-	ldx	[ap + 8], a1
-	sub	h, phi0, h
-L(lo1):	mulx	bd, a0, plo0
-	umulxhi(bd, a0, phi0)
-	subcc	h, plo1, h
-	addxc(	phi1, %g0, phi1)
-	stx	h, [qp + 8]
-	ldx	[ap + 16], a0
-	sub	h, phi1, h
-L(lo0):	mulx	bd, a1, plo1
-	umulxhi(bd, a1, phi1)
-	subcc	h, plo0, h
-	addxc(	phi0, %g0, phi0)
-	stx	h, [qp + 16]
-	ldx	[ap + 24], a1
-	sub	h, phi0, h
-L(lo3):	mulx	bd, a0, plo0
-	umulxhi(bd, a0, phi0)
-	subcc	h, plo1, h
-	addxc(	phi1, %g0, phi1)
-	stx	h, [qp + 24]
-	add	ap, 32, ap
-	add	qp, 32, qp
-	brgz,pt	n, L(top)
-	 add	n, -4, n
-
-L(end):	sub	h, phi1, h
-L(wd2):	mulx	bd, a1, plo1
-	umulxhi(bd, a1, phi1)
-	subcc	h, plo0, h
-	addxc(	phi0, %g0, phi0)
-	stx	h, [qp + 0]
-	sub	h, phi0, h
-L(wd1):	subcc	h, plo1, h
-	addxc(	phi1, %g0, phi1)
-	stx	h, [qp + 8]
-	sub	h, phi1, %i0
-
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/cnd_aors_n.asm b/gmp/mpn/sparc64/ultrasparct3/cnd_aors_n.asm
deleted file mode 100644
index f10ee72c1f..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/cnd_aors_n.asm
+++ /dev/null
@@ -1,143 +0,0 @@
-dnl  SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5.
-
-dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	 8.5
-C UltraSPARC T4:	 3
-
-C We use a double-pointer trick to allow indexed addressing.  Its setup
-C cost might be a problem in these functions, since we don't expect huge n
-C arguments.
-C
-C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can
-C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn.
-
-C INPUT PARAMETERS
-define(`cnd', `%i0')
-define(`rp',  `%i1')
-define(`up',  `%i2')
-define(`vp',  `%i3')
-define(`n',   `%i4')
-
-define(`mask',   `cnd')
-define(`up0', `%l0')  define(`up1', `%l1')
-define(`vp0', `%l2')  define(`vp1', `%l3')
-define(`rp0', `%g4')  define(`rp1', `%g5')
-define(`u0',  `%l4')  define(`u1',  `%l5')
-define(`v0',  `%l6')  define(`v1',  `%l7')
-define(`x0',  `%g1')  define(`x1',  `%g3')
-define(`w0',  `%g1')  define(`w1',  `%g3')
-
-ifdef(`OPERATION_cnd_add_n',`
-  define(`LOGOP',   `and	$1, $2, $3')
-  define(`MAKEMASK',`cmp	%g0, $1
-		     subc	%g0, %g0, $2')
-  define(`INITCY',  `addcc	%g0, 0, %g0')
-  define(`RETVAL',  `addxc(	%g0, %g0, %i0)')
-  define(`func',    `mpn_cnd_add_n')
-')
-ifdef(`OPERATION_cnd_sub_n',`
-  define(`LOGOP',   `orn	$2, $1, $3')
-  define(`MAKEMASK',`cmp	$1, 1
-		     subc	%g0, %g0, $2')
-  define(`INITCY',  `subcc	%g0, 1, %g0')
-  define(`RETVAL',  `addxc(	%g0, %g0, %i0)
-		     xor	%i0, 1, %i0')
-  define(`func',    `mpn_cnd_sub_n')
-')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(func)
-	save	%sp, -176, %sp
-
-	MAKEMASK(cnd,mask)
-
-	andcc	n, 1, %g0
-	sllx	n, 3, n
-	add	n, -16, n
-	add	vp, n, vp0
-	add	up, n, up0
-	add	rp, n, rp0
-	neg	n, n
-	be	L(evn)
-	 INITCY
-
-L(odd):	ldx	[vp0 + n], v1
-	ldx	[up0 + n], u1
-	LOGOP(	v1, mask, x1)
-	addxccc(u1, x1, w1)
-	stx	w1, [rp0 + n]
-	add	n, 8, n
-	brgz	n, L(rtn)
-	 nop
-
-L(evn):	add	vp0, 8, vp1
-	add	up0, 8, up1
-	add	rp0, -24, rp1
-	ldx	[vp0 + n], v0
-	ldx	[vp1 + n], v1
-	ldx	[up0 + n], u0
-	ldx	[up1 + n], u1
-	add	n, 16, n
-	brgz	n, L(end)
-	 add	rp0, -16, rp0
-
-L(top):	LOGOP(	v0, mask, x0)
-	ldx	[vp0 + n], v0
-	LOGOP(	v1, mask, x1)
-	ldx	[vp1 + n], v1
-	addxccc(u0, x0, w0)
-	ldx	[up0 + n], u0
-	addxccc(u1, x1, w1)
-	ldx	[up1 + n], u1
-	stx	w0, [rp0 + n]
-	add	n, 16, n
-	brlez	n, L(top)
-	 stx	w1, [rp1 + n]
-
-L(end):	LOGOP(	v0, mask, x0)
-	LOGOP(	v1, mask, x1)
-	addxccc(u0, x0, w0)
-	addxccc(u1, x1, w1)
-	stx	w0, [rp0 + n]
-	stx	w1, [rp1 + 32]
-
-L(rtn):	RETVAL
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/dive_1.asm b/gmp/mpn/sparc64/ultrasparct3/dive_1.asm
deleted file mode 100644
index d7dbdf953c..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/dive_1.asm
+++ /dev/null
@@ -1,129 +0,0 @@
-dnl  SPARC T3/T4/T5 mpn_divexact_1.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C UltraSPARC T3:	31
-C UltraSPARC T4/T5:	20-26  hits 20 early, then sharply drops
-
-C INPUT PARAMETERS
-define(`qp',  `%i0')
-define(`ap',  `%i1')
-define(`n',   `%i2')
-define(`d',   `%i3')
-
-define(`dinv',`%o4')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_divexact_1)
-	save	%sp, -176, %sp
-	cmp	n, 1
-	bne,pt	%xcc, L(gt1)
-	 ldx	[ap], %o5
-	udivx	%o5, d, %g1
-	stx	%g1, [qp]
-	return	%i7+8
-	 nop
-
-L(gt1):	add	d, -1, %g1
-	andn	%g1, d, %g1
-	popc	%g1, %i4		C i4 = count_trailing_zeros(d)
-
-	srlx	d, %i4, d
-	srlx	d, 1, %g1
-	and	%g1, 127, %g1
-
-	LEA64(binvert_limb_table, g2, g4)
-	ldub	[%g2+%g1], %g1
-	add	%g1, %g1, %g2
-	mulx	%g1, %g1, %g1
-	mulx	%g1, d, %g1
-	sub	%g2, %g1, %g2
-	add	%g2, %g2, %g1
-	mulx	%g2, %g2, %g2
-	mulx	%g2, d, %g2
-	sub	%g1, %g2, %g1
-	add	%g1, %g1, %o7
-	mulx	%g1, %g1, %g1
-	mulx	%g1, d, %g1
-	add	n, -2, n
-	brz,pt	%i4, L(norm)
-	 sub	%o7, %g1, dinv
-
-L(unnorm):
-	mov	0, %g4
-	sub	%g0, %i4, %o2
-	srlx	%o5, %i4, %o5
-L(top_unnorm):
-	ldx	[ap+8], %g3
-	add	ap, 8, ap
-	sllx	%g3, %o2, %g5
-	or	%g5, %o5, %g5
-	srlx	%g3, %i4, %o5
-	subcc	%g5, %g4, %g4
-	mulx	%g4, dinv, %g1
-	stx	%g1, [qp]
-	add	qp, 8, qp
-	umulxhi(d, %g1, %g1)
-	addxc(	%g1, %g0, %g4)
-	brgz,pt	n, L(top_unnorm)
-	 add	n, -1, n
-
-	sub	%o5, %g4, %g4
-	mulx	%g4, dinv, %g1
-	stx	%g1, [qp]
-	return	%i7+8
-	 nop
-
-L(norm):
-	mulx	dinv, %o5, %g1
-	stx	%g1, [qp]
-	add	qp, 8, qp
-	addcc	%g0, 0, %g4
-L(top_norm):
-	umulxhi(d, %g1, %g1)
-	ldx	[ap+8], %g5
-	add	ap, 8, ap
-	addxc(	%g1, %g0, %g1)
-	subcc	%g5, %g1, %g1
-	mulx	%g1, dinv, %g1
-	stx	%g1, [qp]
-	add	qp, 8, qp
-	brgz,pt	n, L(top_norm)
-	 add	n, -1, n
-
-	return	%i7+8
-	 nop
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/hamdist.asm b/gmp/mpn/sparc64/ultrasparct3/hamdist.asm
deleted file mode 100644
index 20ed8bf15b..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/hamdist.asm
+++ /dev/null
@@ -1,78 +0,0 @@
-dnl  SPARC v9 mpn_hamdist for T3/T4.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	18
-C UltraSPARC T4:	 3.5
-
-C INPUT PARAMETERS
-define(`up',   `%o0')
-define(`vp',   `%o1')
-define(`n',    `%o2')
-define(`pcnt', `%o5')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_hamdist)
-	subcc	n, 1, n
-	be	L(final_one)
-	 clr	pcnt
-L(top):
-	ldx	[up + 0], %g1
-	ldx	[vp + 0], %g2
-	ldx	[up + 8], %o4
-	ldx	[vp + 8], %g3
-	sub	n, 2, n
-	xor	%g1, %g2, %g1
-	add	up, 16, up
-	popc	%g1, %g2
-	add	vp, 16, vp
-	xor	%o4, %g3, %o4
-	add	pcnt, %g2, pcnt
-	popc	%o4, %g3
-	brgz	n, L(top)
-	 add	pcnt, %g3, pcnt
-	brlz,pt	n, L(done)
-	 nop
-L(final_one):
-	ldx	[up + 0], %g1
-	ldx	[vp + 0], %g2
-	xor	%g1,%g2, %g1
-	popc	%g1, %g2
-	add	pcnt, %g2, pcnt
-L(done):
-	retl
-	 mov	pcnt, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/invert_limb.asm b/gmp/mpn/sparc64/ultrasparct3/invert_limb.asm
deleted file mode 100644
index 4da49cf030..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/invert_limb.asm
+++ /dev/null
@@ -1,92 +0,0 @@
-dnl  SPARC T3/T4/T5 mpn_invert_limb.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C UltraSPARC T3:	 ?
-C UltraSPARC T4/T5:	 ?
-
-C INPUT PARAMETERS
-define(`d',  `%o0')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_invert_limb)
-	srlx	d, 54, %g1
-	LEA64(approx_tab, g2, g3)
-	and	%g1, 0x1fe, %g1
-	srlx	d, 24, %g4
-	lduh	[%g2+%g1], %g3
-	add	%g4, 1, %g4
-	sllx	%g3, 11, %g2
-	add	%g2, -1, %g2
-	mulx	%g3, %g3, %g3
-	mulx	%g3, %g4, %g3
-	srlx	%g3, 40, %g3
-	sub	%g2, %g3, %g2
-	sllx	%g2, 60, %g1
-	mulx	%g2, %g2, %g3
-	mulx	%g3, %g4, %g4
-	sub	%g1, %g4, %g1
-	srlx	%g1, 47, %g1
-	sllx	%g2, 13, %g2
-	add	%g1, %g2, %g1
-	and	d, 1, %g2
-	srlx	%g1, 1, %g4
-	sub	%g0, %g2, %g3
-	and	%g4, %g3, %g3
-	srlx	d, 1, %g4
-	add	%g4, %g2, %g2
-	mulx	%g1, %g2, %g2
-	sub	%g3, %g2, %g2
-	umulxhi(%g1, %g2, %g2)
-	srlx	%g2, 1, %g2
-	sllx	%g1, 31, %g1
-	add	%g2, %g1, %g1
-	mulx	%g1, d, %g3
-	umulxhi(d, %g1, %g4)
-	addcc	%g3, d, %g0
-	addxc(	%g4, d, %o0)
-	jmp	%o7+8
-	 sub	%g1, %o0, %o0
-EPILOGUE()
-
-	RODATA
-	ALIGN(2)
-	TYPE(	approx_tab, object)
-	SIZE(	approx_tab, 512)
-approx_tab:
-forloop(i,256,512-1,dnl
-`	.half	eval(0x7fd00/i)
-')dnl
diff --git a/gmp/mpn/sparc64/ultrasparct3/missing.asm b/gmp/mpn/sparc64/ultrasparct3/missing.asm
deleted file mode 100644
index c79032dd38..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/missing.asm
+++ /dev/null
@@ -1,77 +0,0 @@
-dnl  SPARC v9-2011 simulation support.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(__gmpn_umulh)
-	save	%sp, -176, %sp
-	ldx	[%sp+2047+176+256], %o0
-	ldx	[%sp+2047+176+256+8], %o1
-	rd	%ccr, %o4
-	srl	%o0, 0, %l4
-	srl	%o1, 0, %l1
-	srlx	%o1, 32, %o1
-	mulx	%o1, %l4, %l2
-	srlx	%o0, 32, %o0
-	mulx	%o0, %l1, %l3
-	mulx	%l1, %l4, %l1
-	srlx	%l1, 32, %l1
-	add	%l2, %l1, %l2
-	addcc	%l2, %l3, %l2
-	mulx	%o1, %o0, %o1
-	mov	0, %l1
-	movcs	%xcc, 1, %l1
-	sllx	%l1, 32, %l1
-	add	%o1, %l1, %o1
-	srlx	%l2, 32, %o0
-	add	%o1, %o0, %o0
-	stx	%o0, [%sp+2047+176+256]
-	wr	%o4, 0, %ccr
-	ret
-	 restore
-EPILOGUE()
-
-PROLOGUE(__gmpn_lzcnt)
-	save	%sp, -176, %sp
-	ldx	[%sp+2047+176+256], %o0
-	brz,a	%o0, 2f
-	 mov	64, %o1
-	brlz	%o0, 2f
-	 mov	0, %o1
-1:	sllx	%o0, 1, %o0
-	brgz	%o0, 1b
-	 add	%o1, 1, %o1
-	stx	%o1, [%sp+2047+176+256]
-2:	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/missing.m4 b/gmp/mpn/sparc64/ultrasparct3/missing.m4
deleted file mode 100644
index e5d6d8e98e..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/missing.m4
+++ /dev/null
@@ -1,88 +0,0 @@
-dnl  SPARC v9-2011 simulation support.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-dnl Usage addxccc(r1,r2,r3, t1)
-dnl  64-bit add with carry-in and carry-out
-dnl  FIXME: Register g2 must not be destination
-
-define(`addxccc',`dnl
-	add	%sp, -512, %sp
-	stx	%g2, [%sp+2047+256+16]
-	mov	0, %g2
-	movcs	%xcc, -1, %g2
-	addcc	%g2, 1, %g0
-	addccc	$1, $2, $3
-	ldx	[%sp+2047+256+16], %g2
-	sub	%sp, -512, %sp
-')
-
-
-dnl Usage addxc(r1,r2,r3, t1,t2)
-dnl  64-bit add with carry-in
-
-define(`addxc',`dnl
-	bcc	%xcc, 1f
-	 add	$1, $2, $3
-	add	$3, 1, $3
-1:
-')
-
-
-dnl Usage umulxhi(r1,r2,r3)
-dnl  64-bit multiply returning upper 64 bits
-dnl  Calls __gmpn_umulh using a non-standard calling convention
-
-define(`umulxhi',`dnl
-	add	%sp, -512, %sp
-	stx	$1, [%sp+2047+256]
-	stx	$2, [%sp+2047+256+8]
-	stx	%o7, [%sp+2047+256+16]
-	call	__gmpn_umulh
-	 nop
-	ldx	[%sp+2047+256+16], %o7
-	ldx	[%sp+2047+256], $3
-	sub	%sp, -512, %sp
-')
-dnl Usage lzcnt(r1,r2)
-dnl  Plain count leading zeros
-dnl  Calls __gmpn_lzcnt using a non-standard calling convention
-
-define(`lzcnt',`dnl
-	add	%sp, -512, %sp
-	stx	%o7, [%sp+2047+256+16]
-	call	__gmpn_lzcnt
-	 stx	$1, [%sp+2047+256]
-	ldx	[%sp+2047+256+16], %o7
-	ldx	[%sp+2047+256], $2
-	sub	%sp, -512, %sp
-')
diff --git a/gmp/mpn/sparc64/ultrasparct3/mod_1_4.asm b/gmp/mpn/sparc64/ultrasparct3/mod_1_4.asm
deleted file mode 100644
index 08facbd1cc..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/mod_1_4.asm
+++ /dev/null
@@ -1,233 +0,0 @@
-dnl  SPARC T3/T4/T5 mpn_mod_1s_4p.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C UltraSPARC T3:	30
-C UltraSPARC T4/T5:	 4
-
-C INPUT PARAMETERS
-define(`ap',  `%o0')
-define(`n',   `%o1')
-define(`d',   `%o2')
-define(`cps', `%o3')
-
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_mod_1s_4p)
-	save	%sp, -176, %sp
-	ldx	[%i3+16], %o4
-	ldx	[%i3+24], %o3
-	ldx	[%i3+32], %o2
-	ldx	[%i3+40], %o1
-	ldx	[%i3+48], %o0
-
-	and	%i1, 3, %g3
-	sllx	%i1, 3, %g1
-	add	%i0, %g1, %i0
-	brz	%g3, L(b00)
-	 cmp	%g3, 2
-	bcs	%xcc, L(b01)
-	 nop
-	be	%xcc, L(b10)
-	 nop
-
-L(b11):	ldx	[%i0-16], %g2
-	mulx	%g2, %o4, %g5
-	umulxhi(%g2, %o4, %g3)
-	ldx	[%i0-24], %g4
-	addcc	%g5, %g4, %g5
-	addxc(	%g3, %g0, %g4)
-	ldx	[%i0-8], %g2
-	mulx	%g2, %o3, %g1
-	umulxhi(%g2, %o3, %g3)
-	addcc	%g1, %g5, %g1
-	addxc(	%g3, %g4, %g2)
-	ba,pt	%xcc, .L8
-	 add	%i0, -32, %i0
-
-L(b00):	ldx	[%i0-24], %g3
-	mulx	%g3, %o4, %g2
-	umulxhi(%g3, %o4, %g5)
-	ldx	[%i0-32], %g4
-	addcc	%g2, %g4, %g2
-	addxc(	%g5, %g0, %g3)
-	ldx	[%i0-16], %g4
-	mulx	%g4, %o3, %g5
-	umulxhi(%g4, %o3, %i5)
-	addcc	%g2, %g5, %g5
-	addxc(	%g3, %i5, %g4)
-	ldx	[%i0-8], %g2
-	mulx	%g2, %o2, %g1
-	umulxhi(%g2, %o2, %g3)
-	addcc	%g1, %g5, %g1
-	addxc(	%g3, %g4, %g2)
-	ba,pt	%xcc, .L8
-	 add	%i0, -40, %i0
-
-L(b01):	ldx	[%i0-8], %g1
-	mov	0, %g2
-	ba,pt	%xcc, .L8
-	 add	%i0, -16, %i0
-
-L(b10):	ldx	[%i0-8], %g2
-	ldx	[%i0-16], %g1
-	add	%i0, -24, %i0
-
-.L8:	add	%i1, -5, %g3
-	brlz,pn	%g3, L(end)
-	 nop
-
-L(top):	ldx	[%i0-16], %i4
-	mulx	%i4, %o4, %o5
-	umulxhi(%i4, %o4, %i1)
-	ldx	[%i0-24], %i5
-	addcc	%o5, %i5, %o5
-	addxc(	%i1, %g0, %i4)
-	ldx	[%i0-8], %i5
-	mulx	%i5, %o3, %o7
-	umulxhi(%i5, %o3, %i1)
-	addcc	%o5, %o7, %o7
-	addxc(	%i4, %i1, %i5)
-	ldx	[%i0+0], %g4
-	mulx	%g4, %o2, %i1
-	umulxhi(%g4, %o2, %i4)
-	addcc	%o7, %i1, %i1
-	addxc(	%i5, %i4, %g4)
-	mulx	%g1, %o1, %i5
-	umulxhi(%g1, %o1, %i4)
-	addcc	%i1, %i5, %i5
-	addxc(	%g4, %i4, %g5)
-	mulx	%g2, %o0, %g1
-	umulxhi(%g2, %o0, %g4)
-	addcc	%g1, %i5, %g1
-	addxc(	%g4, %g5, %g2)
-	add	%g3, -4, %g3
-	brgez,pt %g3, L(top)
-	 add	%i0, -32, %i0
-
-L(end):	mulx	%g2, %o4, %g5
-	umulxhi(%g2, %o4, %g3)
-	addcc	%g1, %g5, %g5
-	addxc(	%g3, %g0, %g2)
-	ldx	[%i3+8], %i0
-	ldx	[%i3], %g4
-	sub	%g0, %i0, %i5
-	srlx	%g5, %i5, %i5
-	sllx	%g2, %i0, %g2
-	or	%i5, %g2, %g1
-	mulx	%g1, %g4, %l7
-	umulxhi(%g1, %g4, %g3)
-	sllx	%g5, %i0, %g2
-	add	%g1, 1, %g1
-	addcc	%l7, %g2, %g5
-	addxc(	%g3, %g1, %g1)
-	mulx	%g1, %i2, %g1
-	sub	%g2, %g1, %g2
-	cmp	%g2, %g5
-	add	%i2, %g2, %g1
-	movlu	%xcc, %g2, %g1
-	subcc	%g1, %i2, %g2
-	movgeu	%xcc, %g2, %g1
-	return	%i7+8
-	 srlx	%g1, %o0, %o0
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1s_4p_cps)
-	save	%sp, -176, %sp
-	lzcnt(	%i1, %i5)
-	sllx	%i1, %i5, %i1
-	call	mpn_invert_limb, 0
-	 mov	%i1, %o0
-	stx	%o0, [%i0]
-	sra	%i5, 0, %g1
-	stx	%g1, [%i0+8]
-	sub	%g0, %i5, %g2
-	srlx	%o0, %g2, %g2
-	mov	1, %g1
-	sllx	%g1, %i5, %g1
-	or	%g2, %g1, %g2
-	sub	%g0, %i1, %g1
-	mulx	%g2, %g1, %g2
-	srlx	%g2, %i5, %g1
-	stx	%g1, [%i0+16]
-
-	umulxhi(%o0, %g2, %g3)
-	add	%g2, %g3, %g3
-	xnor	%g0, %g3, %g3
-	mulx	%g3, %i1, %g3
-	mulx	%g2, %o0, %g2
-	cmp	%g2, %g3
-	add	%i1, %g3, %g1
-	movgeu	%xcc, %g3, %g1
-	srlx	%g1, %i5, %g2
-	stx	%g2, [%i0+24]
-
-	umulxhi(%o0, %g1, %g3)
-	add	%g1, %g3, %g3
-	xnor	%g0, %g3, %g3
-	mulx	%g3, %i1, %g3
-	mulx	%g1, %o0, %g1
-	cmp	%g1, %g3
-	add	%i1, %g3, %g2
-	movgeu	%xcc, %g3, %g2
-	srlx	%g2, %i5, %g1
-	stx	%g1, [%i0+32]
-
-	umulxhi(%o0, %g2, %g3)
-	add	%g2, %g3, %g3
-	xnor	%g0, %g3, %g3
-	mulx	%g3, %i1, %g3
-	mulx	%g2, %o0, %g2
-	cmp	%g2, %g3
-	add	%i1, %g3, %g1
-	movgeu	%xcc, %g3, %g1
-	srlx	%g1, %i5, %g2
-	stx	%g2, [%i0+40]
-
-	umulxhi(%o0, %g1, %g2)
-	add	%g1, %g2, %g2
-	xnor	%g0, %g2, %g2
-	mulx	%g2, %i1, %g2
-	mulx	%g1, %o0, %o0
-	cmp	%o0, %g2
-	add	%i1, %g2, %g3
-	movgeu	%xcc, %g2, %g3
-	srlx	%g3, %i5, %i5
-	stx	%i5, [%i0+48]
-
-	return	%i7+8
-	 nop
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/mod_34lsub1.asm b/gmp/mpn/sparc64/ultrasparct3/mod_34lsub1.asm
deleted file mode 100644
index 874428069e..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/mod_34lsub1.asm
+++ /dev/null
@@ -1,117 +0,0 @@
-dnl  SPARC v9 mpn_mod_34lsub1 for T3/T4/T5.
-
-dnl  Copyright 2005, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C UltraSPARC T1:	 -
-C UltraSPARC T3:	 5
-C UltraSPARC T4:	 1.57
-
-C This is based on the powerpc64/mode64 code.
-
-C INPUT PARAMETERS
-define(`up', `%i0')
-define(`n',  `%i1')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_mod_34lsub1)
-	save	%sp, -176, %sp
-
-	mov	0, %g1
-	mov	0, %g3
-	mov	0, %g4
-	addcc	%g0, 0, %g5
-
-	add	n, -3, n
-	brlz	n, L(lt3)
-	 nop
-
-	add	n, -3, n
-	ldx	[up+0], %l5
-	ldx	[up+8], %l6
-	ldx	[up+16], %l7
-	brlz	n, L(end)
-	 add	up, 24, up
-
-	ALIGN(16)
-L(top):	addxccc(%g1, %l5, %g1)
-	ldx	[up+0], %l5
-	addxccc(%g3, %l6, %g3)
-	ldx	[up+8], %l6
-	addxccc(%g4, %l7, %g4)
-	ldx	[up+16], %l7
-	add	n, -3, n
-	brgez	n, L(top)
-	 add	up, 24, up
-
-L(end):	addxccc(	%g1, %l5, %g1)
-	addxccc(%g3, %l6, %g3)
-	addxccc(%g4, %l7, %g4)
-	addxc(	%g5, %g0, %g5)
-
-L(lt3):	cmp	n, -2
-	blt	L(2)
-	 nop
-
-	ldx	[up+0], %l5
-	mov	0, %l6
-	beq	L(1)
-	 addcc	%g1, %l5, %g1
-
-	ldx	[up+8], %l6
-L(1):	addxccc(%g3, %l6, %g3)
-	addxccc(%g4, %g0, %g4)
-	addxc(	%g5, %g0, %g5)
-
-L(2):	sllx	%g1, 16, %l0
-	srlx	%l0, 16, %l0		C %l0 = %g1 mod 2^48
-	srlx	%g1, 48, %l3		C %l3 = %g1 div 2^48
-	srl	%g3, 0, %g1
-	sllx	%g1, 16, %l4		C %l4 = (%g3 mod 2^32) << 16
-	srlx	%g3, 32, %l5		C %l5 = %g3 div 2^32
-	sethi	%hi(0xffff0000), %g1
-	andn	%g4, %g1, %g1
-	sllx	%g1, 32, %l6		C %l6 = (%g4 mod 2^16) << 32
-	srlx	%g4, 16, %l7		C %l7 = %g4 div 2^16
-
-	add	%l0, %l3, %l0
-	add	%l4, %l5, %l4
-	add	%l6, %l7, %l6
-
-	add	%l0, %l4, %l0
-	add	%l6, %g5, %l6
-
-	add	%l0, %l6, %i0
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/mode1o.asm b/gmp/mpn/sparc64/ultrasparct3/mode1o.asm
deleted file mode 100644
index 494e1d3f4f..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/mode1o.asm
+++ /dev/null
@@ -1,82 +0,0 @@
-dnl  SPARC T3/T4/T5 mpn_modexact_1c_odd.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C                  cycles/limb
-C UltraSPARC T3:	30
-C UltraSPARC T4/T5:	26
-
-C INPUT PARAMETERS
-define(`ap',  `%o0')
-define(`n',   `%o1')
-define(`d',   `%o2')
-define(`cy',  `%o3')
-
-define(`dinv',`%o5')
-define(`a0',  `%g1')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_modexact_1c_odd)
-	srlx	d, 1, %g1
-	and	%g1, 127, %g1
-
-	LEA64(binvert_limb_table, g2, g4)
-	ldub	[%g2+%g1], %g1
-	add	%g1, %g1, %g2
-	mulx	%g1, %g1, %g1
-	mulx	%g1, d, %g1
-	sub	%g2, %g1, %g2
-	add	%g2, %g2, %g1
-	mulx	%g2, %g2, %g2
-	mulx	%g2, d, %g2
-	sub	%g1, %g2, %g1
-	add	%g1, %g1, %o5
-	mulx	%g1, %g1, %g1
-	mulx	%g1, d, %g1
-	sub	%o5, %g1, dinv
-	add	n, -1, n
-
-L(top):	ldx	[ap], a0
-	add	ap, 8, ap
-	subcc	a0, cy, %g3
-	mulx	%g3, dinv, %g5
-	umulxhi(d, %g5, %g5)
-	addxc(	%g5, %g0, cy)
-	brnz,pt	n, L(top)
-	 add	n, -1, n
-
-	retl
-	 mov	cy, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/mul_1.asm b/gmp/mpn/sparc64/ultrasparct3/mul_1.asm
deleted file mode 100644
index af05d627bc..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/mul_1.asm
+++ /dev/null
@@ -1,174 +0,0 @@
-dnl  SPARC v9 mpn_mul_1 for T3/T4/T5.
-
-dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	23
-C UltraSPARC T4:	 3
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`v0', `%i3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_mul_1)
-	save	%sp, -176, %sp
-
-	and	n, 3, %g5
-	add	n, -4, n
-	brz	%g5, L(b0)
-	 cmp	%g5, 2
-	bcs	%xcc, L(b1)
-	 nop
-	be	%xcc, L(b2)
-	 nop
-
-L(b3):	addcc	%g0, %g0, %i5
-	ldx	[up+0], %l0
-	ldx	[up+8], %l1
-	ldx	[up+16], %l2
-	mulx	%l0, v0, %o0
-	umulxhi(%l0, v0, %o1)
-	brgz	n, L(gt3)
-	 add	rp, -8, rp
-	mulx	%l1, v0, %o2
-	umulxhi(%l1, v0, %o3)
-	b	L(wd3)
-	 nop
-L(gt3):	ldx	[up+24], %l3
-	mulx	%l1, v0, %o2
-	umulxhi(%l1, v0, %o3)
-	add	up, 24, up
-	b	L(lo3)
-	 add	n, -3, n
-
-L(b2):	addcc	%g0, %g0, %o1
-	ldx	[up+0], %l1
-	ldx	[up+8], %l2
-	brgz	n, L(gt2)
-	 add	rp, -16, rp
-	mulx	%l1, v0, %o2
-	umulxhi(%l1, v0, %o3)
-	mulx	%l2, v0, %o4
-	umulxhi(%l2, v0, %o5)
-	b	L(wd2)
-	 nop
-L(gt2):	ldx	[up+16], %l3
-	mulx	%l1, v0, %o2
-	umulxhi(%l1, v0, %o3)
-	ldx	[up+24], %l0
-	mulx	%l2, v0, %o4
-	umulxhi(%l2, v0, %o5)
-	add	up, 16, up
-	b	L(lo2)
-	 add	n, -2, n
-
-L(b1):	addcc	%g0, %g0, %o3
-	ldx	[up+0], %l2
-	brgz	n, L(gt1)
-	nop
-	mulx	%l2, v0, %o4
-	stx	%o4, [rp+0]
-	umulxhi(%l2, v0, %i0)
-	ret
-	 restore
-L(gt1):	ldx	[up+8], %l3
-	ldx	[up+16], %l0
-	mulx	%l2, v0, %o4
-	umulxhi(%l2, v0, %o5)
-	ldx	[up+24], %l1
-	mulx	%l3, v0, %i4
-	umulxhi(%l3, v0, %i5)
-	add	rp, -24, rp
-	add	up, 8, up
-	b	L(lo1)
-	 add	n, -1, n
-
-L(b0):	addcc	%g0, %g0, %o5
-	ldx	[up+0], %l3
-	ldx	[up+8], %l0
-	ldx	[up+16], %l1
-	mulx	%l3, v0, %i4
-	umulxhi(%l3, v0, %i5)
-	ldx	[up+24], %l2
-	mulx	%l0, v0, %o0
-	umulxhi(%l0, v0, %o1)
-	b	L(lo0)
-	 nop
-
-	ALIGN(16)
-L(top):	ldx	[up+0], %l3	C 0
-	addxccc(%i4, %o5, %i4)	C 0
-	mulx	%l1, v0, %o2	C 1
-	stx	%i4, [rp+0]	C 1
-	umulxhi(%l1, v0, %o3)	C 2
-L(lo3):	ldx	[up+8], %l0	C 2
-	addxccc(%o0, %i5, %o0)	C 3
-	mulx	%l2, v0, %o4	C 3
-	stx	%o0, [rp+8]	C 4
-	umulxhi(%l2, v0, %o5)	C 4
-L(lo2):	ldx	[up+16], %l1	C 5
-	addxccc(%o2, %o1, %o2)	C 5
-	mulx	%l3, v0, %i4	C 6
-	stx	%o2, [rp+16]	C 6
-	umulxhi(%l3, v0, %i5)	C 7
-L(lo1):	ldx	[up+24], %l2	C 7
-	addxccc(%o4, %o3, %o4)	C 8
-	mulx	%l0, v0, %o0	C 8
-	stx	%o4, [rp+24]	C 9
-	umulxhi(%l0, v0, %o1)	C 9
-	add	rp, 32, rp	C 10
-L(lo0):	add	up, 32, up	C 10
-	brgz	n, L(top)	C 11
-	 add	n, -4, n	C 11
-
-L(end):	addxccc(%i4, %o5, %i4)
-	mulx	%l1, v0, %o2
-	stx	%i4, [rp+0]
-	umulxhi(%l1, v0, %o3)
-	addxccc(%o0, %i5, %o0)
-L(wd3):	mulx	%l2, v0, %o4
-	stx	%o0, [rp+8]
-	umulxhi(%l2, v0, %o5)
-	addxccc(%o2, %o1, %o2)
-L(wd2):	stx	%o2, [rp+16]
-	addxccc(%o4, %o3, %o4)
-	stx	%o4, [rp+24]
-	addxc(	%g0, %o5, %i0)
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/popcount.asm b/gmp/mpn/sparc64/ultrasparct3/popcount.asm
deleted file mode 100644
index de80f3c809..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/popcount.asm
+++ /dev/null
@@ -1,70 +0,0 @@
-dnl  SPARC v9 mpn_popcount for T3/T4.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	15
-C UltraSPARC T4:	 2.5
-
-C INPUT PARAMETERS
-define(`up',   `%o0')
-define(`n',    `%o1')
-define(`pcnt', `%o5')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_popcount)
-	subcc	n, 1, n
-	be	L(final_one)
-	 clr	pcnt
-L(top):
-	ldx	[up + 0], %g1
-	sub	n, 2, n
-	ldx	[up + 8], %o4
-	add	up, 16, up
-	popc	%g1, %g2
-	popc	%o4, %g3
-	add	pcnt, %g2, pcnt
-	brgz	n, L(top)
-	 add	pcnt, %g3, pcnt
-	brlz,pt	n, L(done)
-	 nop
-L(final_one):
-	ldx	[up + 0], %g1
-	popc	%g1, %g2
-	add	pcnt, %g2, pcnt
-L(done):
-	retl
-	 mov	pcnt, %o0
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm b/gmp/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm
deleted file mode 100644
index 216ddc0ba1..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm
+++ /dev/null
@@ -1,93 +0,0 @@
-dnl  SPARC v9 mpn_sqr_dial_addlsh1 for T3/T4/T5.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	?
-C UltraSPARC T4:	>= 4.5
-
-
-define(`rp', `%i0')
-define(`tp', `%i1')
-define(`up', `%i2')
-define(`n',  `%i3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sqr_diag_addlsh1)
-	save	%sp, -176, %sp
-
-	ldx	[up+0], %g1
-	mulx	%g1, %g1, %o0
-	umulxhi(%g1, %g1, %g2)
-	stx	%o0, [rp+0]
-
-	ldx	[up+8], %g1
-	ldx	[tp+0], %g4
-	ldx	[tp+8], %g5
-	mulx	%g1, %g1, %o0
-	orcc	%g0, %g0, %o5
-	b	L(dm)
-	 add	n, -2, n
-
-	ALIGN(16)
-L(top):	ldx	[up+8], %g1
-	addcc	%g4, %o2, %o2
-	addxccc(%g5, %o0, %g3)
-	ldx	[tp+16], %g4
-	ldx	[tp+24], %g5
-	mulx	%g1, %g1, %o0
-	stx	%o2, [rp+8]
-	stx	%g3, [rp+16]
-	add	rp, 16, rp
-	add	tp, 16, tp
-L(dm):	add	%g2, %o5, %o2
-	umulxhi(%g1, %g1, %g2)
-	addxccc(%g4, %g4, %g4)
-	addxccc(%g5, %g5, %g5)
-	add	up, 8, up
-	addxc(	%g0, %g0, %o5)
-	brnz	n, L(top)
-	 add	n, -1, n
-
-	addcc	%o2, %g4, %g4
-	addxccc(%o0, %g5, %g5)
-	stx	%g4, [rp+8]
-	stx	%g5, [rp+16]
-	addxc(	%o5, %g2, %g2)
-	stx	%g2, [rp+24]
-
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/sub_n.asm b/gmp/mpn/sparc64/ultrasparct3/sub_n.asm
deleted file mode 100644
index 0e4bc939e3..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/sub_n.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl  SPARC v9 mpn_sub_n for T3/T4.
-
-dnl  Contributed to the GNU project by David Miller.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	 8
-C UltraSPARC T4:	 3
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`vp', `%i2')
-define(`n',  `%i3')
-define(`cy', `%i4')
-
-define(`u0_off', `%l0')
-define(`u1_off', `%l1')
-define(`v0_off', `%l2')
-define(`v1_off', `%l3')
-define(`r0_off', `%l4')
-define(`r1_off', `%l5')
-define(`loop_n', `%l6')
-define(`tmp', `%l7')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sub_nc)
-	save	%sp, -176, %sp
-	ba,pt	%xcc, L(ent)
-	 xor	cy, 1, cy
-EPILOGUE()
-PROLOGUE(mpn_sub_n)
-	save	%sp, -176, %sp
-	mov	1, cy
-L(ent):
-	subcc	n, 1, n
-	be	L(final_one)
-	 cmp	%g0, cy
-
-	ldx	[up + 0], %o4
-	sllx	n, 3, tmp
-
-	ldx	[vp + 0], %o5
-	add	up, tmp, u0_off
-
-	ldx	[up + 8], %g5
-	add	vp, tmp, v0_off
-
-	ldx	[vp + 8], %g1
-	add	rp, tmp, r0_off
-
-	neg	tmp, loop_n
-	add	u0_off, 8, u1_off
-
-	add	v0_off, 8, v1_off
-	sub	loop_n, -(2 * 8), loop_n
-
-	sub	r0_off, 16, r0_off
-	brgez,pn loop_n, L(loop_tail)
-	 sub	r0_off, 8, r1_off
-
-	b,a	L(top)
-	ALIGN(16)
-L(top):
-	xnor	%o5, 0, tmp
-	ldx	[loop_n + v0_off], %o5
-
-	addxccc(%o4, tmp, %g3)
-	ldx	[loop_n + u0_off], %o4
-
-	xnor	%g1, 0, %g1
-	stx	%g3, [loop_n + r0_off]
-
-	addxccc(%g5, %g1, tmp)
-	ldx	[loop_n + v1_off], %g1
-
-	ldx	[loop_n + u1_off], %g5
-	sub	loop_n, -(2 * 8), loop_n
-
-	brlz	loop_n, L(top)
-	 stx	tmp, [loop_n + r1_off]
-
-L(loop_tail):
-	xnor	%o5, 0, tmp
-	xnor	%g1, 0, %g1
-
-	addxccc(%o4, tmp, %g3)
-	add	loop_n, u0_off, up
-
-	addxccc(%g5, %g1, %g5)
-	add	loop_n, r0_off, rp
-
-	stx	%g3, [rp + 0]
-	add	loop_n, v0_off, vp
-
-	brgz,pt	loop_n, L(done)
-	 stx	%g5, [rp + 8]
-
-	add	rp, (2 * 8), rp
-
-L(final_one):
-	ldx	[up+0], %o4
-	ldx	[vp+0], %o5
-	xnor	%o5, %g0, %o5
-	addxccc(%o4, %o5, %g3)
-	stx	%g3, [rp+0]
-
-L(done):
-	clr	%i0
-	movcc	%xcc, 1, %i0
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/sparc64/ultrasparct3/submul_1.asm b/gmp/mpn/sparc64/ultrasparct3/submul_1.asm
deleted file mode 100644
index 5635d1bdbd..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/submul_1.asm
+++ /dev/null
@@ -1,170 +0,0 @@
-dnl  SPARC v9 mpn_submul_1 for T3/T4/T5.
-
-dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		   cycles/limb
-C UltraSPARC T3:	26
-C UltraSPARC T4:	 4.5
-
-C INPUT PARAMETERS
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n',  `%i2')
-define(`v0', `%i3')
-
-ASM_START()
-	REGISTER(%g2,#scratch)
-	REGISTER(%g3,#scratch)
-PROLOGUE(mpn_submul_1)
-	save	%sp, -176, %sp
-	ldx	[up+0], %g1
-
-	and	n, 3, %g5
-	add	n, -4, n
-	brz	%g5, L(b00)
-	 cmp	%g5, 2
-	bcs	%xcc, L(b01)
-	 nop
-	bne	%xcc, L(b11)
-	 ldx	[up+8], %g4
-
-L(b10):	add	up, 16, up
-	addcc	%g0, 0, %g3
-	mulx	%g1, v0, %l4
-	umulxhi(%g1, v0, %l5)
-	ldx	[rp+0], %o2
-	mulx	%g4, v0, %l6
-	umulxhi(%g4, v0, %l7)
-	brlz	n, L(wd2)
-	 nop
-L(gt2):	ldx	[up+0], %o0
-	b	L(lo2)
-	 nop
-
-L(b00):	add	rp, -16, rp
-	addcc	%g0, 0, %g3
-	ldx	[up+8], %o1
-	mulx	%g1, v0, %l0
-	umulxhi(%g1, v0, %l1)
-	ldx	[up+16], %o0
-	ldx	[rp+16], %o2
-	mulx	%o1, v0, %l2
-	umulxhi(%o1, v0, %l3)
-	b	     L(lo0)
-	 nop
-
-L(b01):	add	up, 8, up
-	add	rp, -8, rp
-	addcc	%g0, 0, %g3
-	ldx	[rp+8], %o3
-	mulx	%g1, v0, %l6
-	umulxhi(%g1, v0, %l7)
-	brlz	n, L(wd1)
-	 nop
-	ldx	[up+0], %o0
-	ldx	[up+8], %o1
-	mulx	%o0, v0, %l0
-	umulxhi(%o0, v0, %l1)
-	b	L(lo1)
-	 nop
-
-L(b11):	add	up, 24, up
-	add	rp, 8, rp
-	addcc	%g0, 0, %g3
-	mulx	%g1, v0, %l2
-	umulxhi(%g1, v0, %l3)
-	ldx	[up-8], %o1
-	ldx	[rp-8], %o3
-	mulx	%g4, v0, %l4
-	umulxhi(%g4, v0, %l5)
-	brlz	n, L(end)
-	 nop
-
-	ALIGN(16)
-L(top):	ldx	[up+0], %o0
-	addxccc(%g3, %l2, %g1)
-	ldx	[rp+0], %o2
-	addxc(	%g0, %l3, %g3)
-	mulx	%o1, v0, %l6
-	subcc	%o3, %g1, %g4
-	umulxhi(%o1, v0, %l7)
-	stx	%g4, [rp-8]
-L(lo2):	ldx	[up+8], %o1
-	addxccc(%g3, %l4, %g1)
-	ldx	[rp+8], %o3
-	addxc(	%g0, %l5, %g3)
-	mulx	%o0, v0, %l0
-	subcc	%o2, %g1, %g4
-	umulxhi(%o0, v0, %l1)
-	stx	%g4, [rp+0]
-L(lo1):	ldx	[up+16], %o0
-	addxccc(%g3, %l6, %g1)
-	ldx	[rp+16], %o2
-	addxc(	%g0, %l7, %g3)
-	mulx	%o1, v0, %l2
-	subcc	%o3, %g1, %g4
-	umulxhi(%o1, v0, %l3)
-	stx	%g4, [rp+8]
-L(lo0):	ldx	[up+24], %o1
-	addxccc(%g3, %l0, %g1)
-	ldx	[rp+24], %o3
-	addxc(	%g0, %l1, %g3)
-	mulx	%o0, v0, %l4
-	subcc	%o2, %g1, %g4
-	umulxhi(%o0, v0, %l5)
-	stx	%g4, [rp+16]
-	add	n, -4, n
-	add	up, 32, up
-	brgez	n, L(top)
-	 add	rp, 32, rp
-
-L(end):	addxccc(%g3, %l2, %g1)
-	ldx	[rp+0], %o2
-	addxc(	%g0, %l3, %g3)
-	mulx	%o1, v0, %l6
-	subcc	%o3, %g1, %g4
-	umulxhi(%o1, v0, %l7)
-	stx	%g4, [rp-8]
-L(wd2):	addxccc(%g3, %l4, %g1)
-	ldx	[rp+8], %o3
-	addxc(	%g0, %l5, %g3)
-	subcc	%o2, %g1, %g4
-	stx	%g4, [rp+0]
-L(wd1):	addxccc(%g3, %l6, %g1)
-	addxc(	%g0, %l7, %g3)
-	subcc	%o3, %g1, %g4
-	stx	%g4, [rp+8]
-	addxc(	%g0, %g3, %i0)
-	ret
-	 restore
-EPILOGUE()
diff --git a/gmp/mpn/thumb/add_n.asm b/gmp/mpn/thumb/add_n.asm
deleted file mode 100644
index 08ed60b9be..0000000000
--- a/gmp/mpn/thumb/add_n.asm
+++ /dev/null
@@ -1,63 +0,0 @@
-dnl  ARM/Thumb mpn_add_n.
-
-dnl  Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-define(`rp',	r0)
-define(`up',	r1)
-define(`vp',	r2)
-define(`n',	r3)
-
-ASM_START()
-	.thumb
-PROLOGUE(mpn_add_nc)
-	push	{r4, r5, r6}
-	ldr	r6, [sp, #12]		C init carry save register
-	sub	r6, #1
-	b	L(top)
-EPILOGUE()
-PROLOGUE(mpn_add_n)
-	push	{r4, r5, r6}
-	neg	r6, n			C init carry save register
-
-L(top):	ldmia	up!, {r4}		C load next limb from S1
-	cmp	n, r6			C tricky carry restore
-	ldmia	vp!, {r5}		C load next limb from S2
-	adc	r4, r5
-	stmia	rp!, {r4}		C store result limb to RES
-	sbc	r6, r6			C save negated carry
-	sub	n, #1
-	bne	L(top)
-
-	add	r0, r6, #1
-	pop	{r4, r5, r6}
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/thumb/add_n.s b/gmp/mpn/thumb/add_n.s
new file mode 100644
index 0000000000..294cfe6ae2
--- /dev/null
+++ b/gmp/mpn/thumb/add_n.s
@@ -0,0 +1,48 @@
+@ ARM/Thumb mpn_add_n -- Add two limb vectors of the same length > 0 and store
+@ sum in a third limb vector.
+
+@ Copyright 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 3 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+@ INPUT PARAMETERS
+@ RES_ptr	r0
+@ S1_ptr	r1
+@ S2_ptr	r2
+@ SIZE		r3
+
+@ NOT TESTED CODE
+
+	.text
+	.thumb
+	.align	0
+	.global	___gmpn_add_n
+___gmpn_add_n:
+	push	{r4, r5, r6, lr}
+	mov	r6, #1			@ init carry save register
+
+Loop:	sub	r6, #1			@ restore carry (set iff r6 was 0)
+	ldmia	r1!, {r4}		@ load next limb from S1
+	ldmia	r2!, {r5}		@ load next limb from S2
+	adc	r4, r5
+	stmia	r0!, {r4}		@ store result limb to RES
+	sbc	r6, r6			@ save negated carry
+	sub	r3, #1
+	bge	Loop			@ loop back while remaining count >= 4
+
+	mov	r0, r6
+	pop	{r4, r5, r6, pc}
diff --git a/gmp/mpn/thumb/sub_n.asm b/gmp/mpn/thumb/sub_n.asm
deleted file mode 100644
index a38572048e..0000000000
--- a/gmp/mpn/thumb/sub_n.asm
+++ /dev/null
@@ -1,63 +0,0 @@
-dnl  ARM/Thumb mpn_sub_n.
-
-dnl  Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C INPUT PARAMETERS
-define(`rp',	r0)
-define(`up',	r1)
-define(`vp',	r2)
-define(`n',	r3)
-
-ASM_START()
-	.thumb
-PROLOGUE(mpn_sub_nc)
-	push	{r4, r5, r6}
-	ldr	r6, [sp, #12]		C init carry save register
-	neg	r6, r6
-	b	L(top)
-EPILOGUE()
-PROLOGUE(mpn_sub_n)
-	push	{r4, r5, r6}
-	mov	r6, n			C init carry save register
-
-L(top):	ldmia	up!, {r4}		C load next limb from S1
-	cmp	n, r6			C tricky carry restore
-	ldmia	vp!, {r5}		C load next limb from S2
-	sbc	r4, r5
-	stmia	rp!, {r4}		C store result limb to RES
-	sbc	r6, r6			C save negated carry
-	sub	n, #1
-	bne	L(top)
-
-	neg	r0, r6
-	pop	{r4, r5, r6}
-	bx	lr
-EPILOGUE()
diff --git a/gmp/mpn/thumb/sub_n.s b/gmp/mpn/thumb/sub_n.s
new file mode 100644
index 0000000000..fbd4c98194
--- /dev/null
+++ b/gmp/mpn/thumb/sub_n.s
@@ -0,0 +1,48 @@
+@ ARM/Thumb mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+@ store difference in a third limb vector.
+
+@ Copyright 1997, 2000 Free Software Foundation, Inc.
+
+@ This file is part of the GNU MP Library.
+
+@ The GNU MP Library is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU Lesser General Public License as published by
+@ the Free Software Foundation; either version 3 of the License, or (at your
+@ option) any later version.
+
+@ The GNU MP Library is distributed in the hope that it will be useful, but
+@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+@ or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+@ License for more details.
+
+@ You should have received a copy of the GNU Lesser General Public License
+@ along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+@ INPUT PARAMETERS
+@ RES_ptr	r0
+@ S1_ptr	r1
+@ S2_ptr	r2
+@ SIZE		r3
+
+@ NOT TESTED CODE
+
+	.text
+	.thumb
+	.align	0
+	.global	___gmpn_sub_n
+___gmpn_sub_n:
+	push	{r4, r5, r6, lr}
+	mov	r6, #1			@ init carry save register
+
+Loop:	sub	r6, #1			@ restore carry (set iff r6 was 0)
+	ldmia	r1!, {r4}		@ load next limb from S1
+	ldmia	r2!, {r5}		@ load next limb from S2
+	sbc	r4, r5
+	stmia	r0!, {r4}		@ store result limb to RES
+	sbc	r6, r6			@ save negated carry
+	sub	r3, #1
+	bge	Loop			@ loop back while remaining count >= 4
+
+	mov	r0, r6
+	pop	{r4, r5, r6, pc}
diff --git a/gmp/mpn/vax/add_n.asm b/gmp/mpn/vax/add_n.asm
deleted file mode 100644
index 0a0bf78ab3..0000000000
--- a/gmp/mpn/vax/add_n.asm
+++ /dev/null
@@ -1,64 +0,0 @@
-dnl  VAX mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
-dnl  in a third limb vector.
-
-dnl  Copyright 1999, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_add_n)
-	.word	0x0
-	movl	16(ap), r0
-	movl	12(ap), r1
-	movl	8(ap), r2
-	movl	4(ap), r3
-	mnegl	r0, r5
-	addl2	$3, r0
-	ashl	$-2, r0, r0	C unroll loop count
-	bicl2	$-4, r5		C mask out low 2 bits
-	movaq	(r5)[r5], r5	C 9x
-	jmp	L(top)[r5]
-
-L(top):	movl	(r2)+, r4
-	adwc	(r1)+, r4
-	movl	r4, (r3)+
-	movl	(r2)+, r4
-	adwc	(r1)+, r4
-	movl	r4, (r3)+
-	movl	(r2)+, r4
-	adwc	(r1)+, r4
-	movl	r4, (r3)+
-	movl	(r2)+, r4
-	adwc	(r1)+, r4
-	movl	r4, (r3)+
-	sobgtr	r0, L(top)
-
-	adwc	r0, r0
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/add_n.s b/gmp/mpn/vax/add_n.s
new file mode 100644
index 0000000000..60773cc348
--- /dev/null
+++ b/gmp/mpn/vax/add_n.s
@@ -0,0 +1,59 @@
+# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# s2_ptr	(sp + 12)
+# size		(sp + 16)
+
+.text
+	.align 1
+.globl ___gmpn_add_n
+___gmpn_add_n:
+	.word	0x0
+	movl	16(ap),r0
+	movl	12(ap),r1
+	movl	8(ap),r2
+	movl	4(ap),r3
+	mnegl	r0,r5
+	addl2	$3,r0
+	ashl	$-2,r0,r0	# unroll loop count
+	bicl2	$-4,r5		# mask out low 2 bits
+	movaq	(r5)[r5],r5	# 9x
+	jmp	Loop(r5)
+
+Loop:	movl	(r2)+,r4
+	adwc	(r1)+,r4
+	movl	r4,(r3)+
+	movl	(r2)+,r4
+	adwc	(r1)+,r4
+	movl	r4,(r3)+
+	movl	(r2)+,r4
+	adwc	(r1)+,r4
+	movl	r4,(r3)+
+	movl	(r2)+,r4
+	adwc	(r1)+,r4
+	movl	r4,(r3)+
+	sobgtr	r0,Loop
+
+	adwc	r0,r0
+	ret
diff --git a/gmp/mpn/vax/addmul_1.asm b/gmp/mpn/vax/addmul_1.asm
deleted file mode 100644
index 8a6f636bdf..0000000000
--- a/gmp/mpn/vax/addmul_1.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl  VAX mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
-dnl  to a second limb vector.
-
-dnl  Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
-	.word	0xfc0
-	movl	12(ap), r4
-	movl	8(ap), r8
-	movl	4(ap), r9
-	clrl	r3
-	incl	r4
-	ashl	$-1, r4, r7
-	clrl	r11
-	movl	16(ap), r6
-	jlss	L(v0_big)
-	jlbc	r4, L(1)
-
-C Loop for v0 < 0x80000000
-L(tp1):	movl	(r8)+, r1
-	jlss	L(1n0)
-	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	$0, r3
-	addl2	r2, (r9)+
-	adwc	$0, r3
-L(1):	movl	(r8)+, r1
-	jlss	L(1n1)
-L(1p1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	$0, r11
-	addl2	r10, (r9)+
-	adwc	$0, r11
-
-	sobgtr	r7, L(tp1)
-	movl	r11, r0
-	ret
-
-L(1n0):	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r6, r3
-	addl2	r2, (r9)+
-	adwc	$0, r3
-	movl	(r8)+, r1
-	jgeq	L(1p1)
-L(1n1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r6, r11
-	addl2	r10, (r9)+
-	adwc	$0, r11
-
-	sobgtr	r7, L(tp1)
-	movl	r11, r0
-	ret
-
-L(v0_big):
-	jlbc	r4, L(2)
-
-C Loop for v0 >= 0x80000000
-L(tp2):	movl	(r8)+, r1
-	jlss	L(2n0)
-	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r1, r3
-	addl2	r2, (r9)+
-	adwc	$0, r3
-L(2):	movl	(r8)+, r1
-	jlss	L(2n1)
-L(2p1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r1, r11
-	addl2	r10, (r9)+
-	adwc	$0, r11
-
-	sobgtr	r7, L(tp2)
-	movl	r11, r0
-	ret
-
-L(2n0):	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r6, r3
-	addl2	r2, (r9)+
-	adwc	r1, r3
-	movl	(r8)+, r1
-	jgeq	L(2p1)
-L(2n1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r6, r11
-	addl2	r10, (r9)+
-	adwc	r1, r11
-
-	sobgtr	r7, L(tp2)
-	movl	r11, r0
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/addmul_1.s b/gmp/mpn/vax/addmul_1.s
new file mode 100644
index 0000000000..e2f86e074d
--- /dev/null
+++ b/gmp/mpn/vax/addmul_1.s
@@ -0,0 +1,124 @@
+# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# size		(sp + 12)
+# s2_limb	(sp + 16)
+
+.text
+	.align 1
+.globl ___gmpn_addmul_1
+___gmpn_addmul_1:
+	.word	0xfc0
+	movl	12(ap),r4
+	movl	8(ap),r8
+	movl	4(ap),r9
+	movl	16(ap),r6
+	jlss	s2_big
+
+	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L1
+	clrl	r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1:	movl	(r8)+,r1
+	jlss	L1n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	$0,r3
+	addl2	r2,(r9)+
+	adwc	$0,r3
+L1:	movl	(r8)+,r1
+	jlss	L1n1
+L1p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	$0,r11
+	addl2	r10,(r9)+
+	adwc	$0,r11
+
+	sobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+L1n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	addl2	r2,(r9)+
+	adwc	$0,r3
+	movl	(r8)+,r1
+	jgeq	L1p1
+L1n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	addl2	r10,(r9)+
+	adwc	$0,r11
+
+	sobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+
+s2_big:	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L2
+	clrl	r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2:	movl	(r8)+,r1
+	jlss	L2n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r1,r3
+	addl2	r2,(r9)+
+	adwc	$0,r3
+L2:	movl	(r8)+,r1
+	jlss	L2n1
+L2p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r1,r11
+	addl2	r10,(r9)+
+	adwc	$0,r11
+
+	sobgtr	r7,Loop2
+	movl	r11,r0
+	ret
+
+L2n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	addl2	r2,(r9)+
+	adwc	r1,r3
+	movl	(r8)+,r1
+	jgeq	L2p1
+L2n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	addl2	r10,(r9)+
+	adwc	r1,r11
+
+	sobgtr	r7,Loop2
+	movl	r11,r0
+	ret
diff --git a/gmp/mpn/vax/elf.m4 b/gmp/mpn/vax/elf.m4
deleted file mode 100644
index e04f0bafc9..0000000000
--- a/gmp/mpn/vax/elf.m4
+++ /dev/null
@@ -1,54 +0,0 @@
-divert(-1)
-
-dnl  m4 macros for VAX assembler.
-
-dnl  Copyright 2001, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-defreg(r0,`%r``''0')
-defreg(r1,`%r``''1')
-defreg(r2,`%r``''2')
-defreg(r3,`%r``''3')
-defreg(r4,`%r``''4')
-defreg(r5,`%r``''5')
-defreg(r6,`%r``''6')
-defreg(r7,`%r``''7')
-defreg(r8,`%r``''8')
-defreg(r9,`%r``''9')
-defreg(r10,`%r``''10')
-defreg(r11,`%r``''11')
-defreg(r12,`%r``''12')
-defreg(r13,`%r``''13')
-defreg(r14,`%r``''14')
-defreg(r15,`%r``''15')
-defreg(ap,`%a``''p')
-
-define(`foo', blablabla)
-
-divert
diff --git a/gmp/mpn/vax/gmp-mparam.h b/gmp/mpn/vax/gmp-mparam.h
index 9f20b9b783..ea262ddc40 100644
--- a/gmp/mpn/vax/gmp-mparam.h
+++ b/gmp/mpn/vax/gmp-mparam.h
@@ -1,41 +1,30 @@
 /* VAX gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2000-2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* These numbers were measured manually using the tune/speed program.
-   The standard tune/tuneup takes too long.  (VAX 8800) */
+   The standard tune/tunup takes too long.  (VAX 8800) */
 
-#define MUL_TOOM22_THRESHOLD             14
-#define MUL_TOOM33_THRESHOLD            110
+#define MUL_KARATSUBA_THRESHOLD          14
+#define MUL_TOOM3_THRESHOLD             110
 
 #define SQR_BASECASE_THRESHOLD            6
-#define SQR_TOOM2_THRESHOLD              42
+#define SQR_KARATSUBA_THRESHOLD          42
 #define SQR_TOOM3_THRESHOLD             250
 
 /* #define DIV_SB_PREINV_THRESHOLD         */
diff --git a/gmp/mpn/vax/lshift.asm b/gmp/mpn/vax/lshift.asm
deleted file mode 100644
index 941e9994b8..0000000000
--- a/gmp/mpn/vax/lshift.asm
+++ /dev/null
@@ -1,59 +0,0 @@
-dnl  VAX mpn_lshift -- left shift.
-
-dnl  Copyright 1999-2001, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_lshift)
-	.word	0x1c0
-	movl	4(ap), r7
-	movl	8(ap), r6
-	movl	12(ap), r1
-	movl	16(ap), r8
-
-	moval	(r6)[r1], r6
-	moval	(r7)[r1], r7
-	clrl	r3
-	movl	-(r6), r2
-	ashq	r8, r2, r4
-	movl	r5, r0
-	movl	r2, r3
-	decl	r1
-	jeql	L(end)
-
-L(top):	movl	-(r6), r2
-	ashq	r8, r2, r4
-	movl	r5, -(r7)
-	movl	r2, r3
-	sobgtr	r1, L(top)
-
-L(end):	movl	r4, -4(r7)
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/lshift.s b/gmp/mpn/vax/lshift.s
new file mode 100644
index 0000000000..6f3d600be0
--- /dev/null
+++ b/gmp/mpn/vax/lshift.s
@@ -0,0 +1,56 @@
+# VAX mpn_lshift -- left shift.
+
+# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# rptr		(sp + 4)
+# sptr		(sp + 8)
+# size		(sp + 12)
+# cnt		(sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp	call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11		call-saved registers
+
+.text
+	.align 1
+.globl ___gmpn_lshift
+___gmpn_lshift:
+	.word	0x1c0
+	movl	4(ap),r7
+	movl	8(ap),r6
+	movl	12(ap),r1
+	movl	16(ap),r8
+
+	moval	(r6)[r1],r6
+	moval	(r7)[r1],r7
+	clrl	r3
+	movl	-(r6),r2
+	ashq	r8,r2,r4
+	movl	r5,r0
+	movl	r2,r3
+	decl	r1
+	jeql	Lend
+
+Loop:	movl	-(r6),r2
+	ashq	r8,r2,r4
+	movl	r5,-(r7)
+	movl	r2,r3
+	sobgtr	r1,Loop
+
+Lend:	movl	r4,-4(r7)
+	ret
diff --git a/gmp/mpn/vax/mul_1.asm b/gmp/mpn/vax/mul_1.asm
deleted file mode 100644
index 8e4dcd2177..0000000000
--- a/gmp/mpn/vax/mul_1.asm
+++ /dev/null
@@ -1,118 +0,0 @@
-dnl  VAX mpn_mul_1 -- Multiply a limb vector with a limb and store the result
-dnl  in a second limb vector.
-
-dnl  Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_mul_1)
-	.word	0xfc0
-	movl	12(ap), r4
-	movl	8(ap), r8
-	movl	4(ap), r9
-	clrl	r3
-	incl	r4
-	ashl	$-1, r4, r7
-	clrl	r11
-	movl	16(ap), r6
-	jlss	L(v0_big)
-	jlbc	r4, L(1)
-
-C Loop for v0 < 0x80000000
-L(tp1):	movl	(r8)+, r1
-	jlss	L(1n0)
-	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	$0, r3
-	movl	r2, (r9)+
-L(1):	movl	(r8)+, r1
-	jlss	L(1n1)
-L(1p1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	$0, r11
-	movl	r10, (r9)+
-
-	sobgtr	r7, L(tp1)
-	movl	r11, r0
-	ret
-
-L(1n0):	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r6, r3
-	movl	r2, (r9)+
-	movl	(r8)+, r1
-	jgeq	L(1p1)
-L(1n1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r6, r11
-	movl	r10, (r9)+
-
-	sobgtr	r7, L(tp1)
-	movl	r11, r0
-	ret
-
-L(v0_big):
-	jlbc	r4, L(2)
-
-C Loop for v0 >= 0x80000000
-L(tp2):	movl	(r8)+, r1
-	jlss	L(2n0)
-	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r1, r3
-	movl	r2, (r9)+
-L(2):	movl	(r8)+, r1
-	jlss	L(2n1)
-L(2p1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r1, r11
-	movl	r10, (r9)+
-
-	sobgtr	r7, L(tp2)
-	movl	r11, r0
-	ret
-
-L(2n0):	emul	r1, r6, $0, r2
-	addl2	r1, r3
-	addl2	r11, r2
-	adwc	r6, r3
-	movl	r2, (r9)+
-	movl	(r8)+, r1
-	jgeq	L(2p1)
-L(2n1):	emul	r1, r6, $0, r10
-	addl2	r1, r11
-	addl2	r3, r10
-	adwc	r6, r11
-	movl	r10, (r9)+
-
-	sobgtr	r7, L(tp2)
-	movl	r11, r0
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/mul_1.s b/gmp/mpn/vax/mul_1.s
new file mode 100644
index 0000000000..c6f4594bd8
--- /dev/null
+++ b/gmp/mpn/vax/mul_1.s
@@ -0,0 +1,121 @@
+# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# size		(sp + 12)
+# s2_limb	(sp + 16)
+
+.text
+	.align 1
+.globl ___gmpn_mul_1
+___gmpn_mul_1:
+	.word	0xfc0
+	movl	12(ap),r4
+	movl	8(ap),r8
+	movl	4(ap),r9
+	movl	16(ap),r6
+	jlss	s2_big
+
+# One might want to combine the addl2 and the store below, but that
+# is actually just slower according to my timing tests.  (VAX 3600)
+
+	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L1
+	clrl	r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1:	movl	(r8)+,r1
+	jlss	L1n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	$0,r3
+	movl	r2,(r9)+
+L1:	movl	(r8)+,r1
+	jlss	L1n1
+L1p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	$0,r11
+	movl	r10,(r9)+
+
+	sobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+L1n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	movl	r2,(r9)+
+	movl	(r8)+,r1
+	jgeq	L1p1
+L1n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	movl	r10,(r9)+
+
+	sobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+
+s2_big:	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L2
+	clrl	r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2:	movl	(r8)+,r1
+	jlss	L2n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r1,r3
+	movl	r2,(r9)+
+L2:	movl	(r8)+,r1
+	jlss	L2n1
+L2p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r1,r11
+	movl	r10,(r9)+
+
+	sobgtr	r7,Loop2
+	movl	r11,r0
+	ret
+
+L2n0:	emul	r1,r6,$0,r2
+	addl2	r1,r3
+	addl2	r11,r2
+	adwc	r6,r3
+	movl	r2,(r9)+
+	movl	(r8)+,r1
+	jgeq	L2p1
+L2n1:	emul	r1,r6,$0,r10
+	addl2	r1,r11
+	addl2	r3,r10
+	adwc	r6,r11
+	movl	r10,(r9)+
+
+	sobgtr	r7,Loop2
+	movl	r11,r0
+	ret
diff --git a/gmp/mpn/vax/rshift.asm b/gmp/mpn/vax/rshift.asm
deleted file mode 100644
index 00b2daac01..0000000000
--- a/gmp/mpn/vax/rshift.asm
+++ /dev/null
@@ -1,57 +0,0 @@
-dnl  VAX mpn_rshift -- right shift.
-
-dnl  Copyright 1999-2001, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_rshift)
-	.word	0x1c0
-	movl	4(ap), r7
-	movl	8(ap), r6
-	movl	12(ap), r1
-	movl	16(ap), r8
-
-	movl	(r6)+, r2
-	subl3	r8, $32, r8
-	ashl	r8, r2, r0
-	decl	r1
-	jeql	L(end)
-
-L(top):	movl	(r6)+, r3
-	ashq	r8, r2, r4
-	movl	r5, (r7)+
-	movl	r3, r2
-	sobgtr	r1, L(top)
-
-L(end):	clrl	r3
-	ashq	r8, r2, r4
-	movl	r5, (r7)
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/rshift.s b/gmp/mpn/vax/rshift.s
new file mode 100644
index 0000000000..ae27208e2c
--- /dev/null
+++ b/gmp/mpn/vax/rshift.s
@@ -0,0 +1,54 @@
+# VAX mpn_rshift -- right shift.
+
+# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# rptr		(sp + 4)
+# sptr		(sp + 8)
+# size		(sp + 12)
+# cnt		(sp + 16)
+# r0=retval r1=size r2,r3=itmp r4,r5=otmp	call-used registers
+# r6=sptr r7=rptr r8=cnt r9 r10 r11		call-saved registers
+
+.text
+	.align 1
+.globl ___gmpn_rshift
+___gmpn_rshift:
+	.word	0x1c0
+	movl	4(ap),r7
+	movl	8(ap),r6
+	movl	12(ap),r1
+	movl	16(ap),r8
+
+	movl	(r6)+,r2
+	subl3	r8,$32,r8
+	ashl	r8,r2,r0
+	decl	r1
+	jeql	Lend
+
+Loop:	movl	(r6)+,r3
+	ashq	r8,r2,r4
+	movl	r5,(r7)+
+	movl	r3,r2
+	sobgtr	r1,Loop
+
+Lend:	clrl	r3
+	ashq	r8,r2,r4
+	movl	r5,(r7)
+	ret
diff --git a/gmp/mpn/vax/sub_n.asm b/gmp/mpn/vax/sub_n.asm
deleted file mode 100644
index 2844ef2cc1..0000000000
--- a/gmp/mpn/vax/sub_n.asm
+++ /dev/null
@@ -1,64 +0,0 @@
-dnl  VAX mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
-dnl  store difference in a third limb vector.
-
-dnl  Copyright 1999, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
-	.word	0x0
-	movl	16(ap), r0
-	movl	12(ap), r1
-	movl	8(ap), r2
-	movl	4(ap), r3
-	mnegl	r0, r5
-	addl2	$3, r0
-	ashl	$-2, r0, r0	C unroll loop count
-	bicl2	$-4, r5		C mask out low 2 bits
-	movaq	(r5)[r5], r5	C 9x
-	jmp	L(top)[r5]
-
-L(top):	movl	(r2)+, r4
-	sbwc	(r1)+, r4
-	movl	r4, (r3)+
-	movl	(r2)+, r4
-	sbwc	(r1)+, r4
-	movl	r4, (r3)+
-	movl	(r2)+, r4
-	sbwc	(r1)+, r4
-	movl	r4, (r3)+
-	movl	(r2)+, r4
-	sbwc	(r1)+, r4
-	movl	r4, (r3)+
-	sobgtr	r0, L(top)
-
-	adwc	r0, r0
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/sub_n.s b/gmp/mpn/vax/sub_n.s
new file mode 100644
index 0000000000..c9ad1ecfb8
--- /dev/null
+++ b/gmp/mpn/vax/sub_n.s
@@ -0,0 +1,59 @@
+# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+# difference in a third limb vector.
+
+# Copyright 1999, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# s2_ptr	(sp + 12)
+# size		(sp + 16)
+
+.text
+	.align 1
+.globl ___gmpn_sub_n
+___gmpn_sub_n:
+	.word	0x0
+	movl	16(ap),r0
+	movl	12(ap),r1
+	movl	8(ap),r2
+	movl	4(ap),r3
+	mnegl	r0,r5
+	addl2	$3,r0
+	ashl	$-2,r0,r0	# unroll loop count
+	bicl2	$-4,r5		# mask out low 2 bits
+	movaq	(r5)[r5],r5	# 9x
+	jmp	Loop(r5)
+
+Loop:	movl	(r2)+,r4
+	sbwc	(r1)+,r4
+	movl	r4,(r3)+
+	movl	(r2)+,r4
+	sbwc	(r1)+,r4
+	movl	r4,(r3)+
+	movl	(r2)+,r4
+	sbwc	(r1)+,r4
+	movl	r4,(r3)+
+	movl	(r2)+,r4
+	sbwc	(r1)+,r4
+	movl	r4,(r3)+
+	sobgtr	r0,Loop
+
+	adwc	r0,r0
+	ret
diff --git a/gmp/mpn/vax/submul_1.asm b/gmp/mpn/vax/submul_1.asm
deleted file mode 100644
index 60d47fcd6f..0000000000
--- a/gmp/mpn/vax/submul_1.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl  VAX mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
-dnl  result from a second limb vector.
-
-dnl  Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
-	.word	0xfc0
-	movl	12(ap), r4
-	movl	8(ap), r8
-	movl	4(ap), r9
-	clrl	r3
-	incl	r4
-	ashl	$-1, r4, r7
-	clrl	r11
-	movl	16(ap), r6
-	jlss	L(v0_big)
-	jlbc	r4, L(1)
-
-C Loop for v0 < 0x80000000
-L(tp1):	movl	(r8)+, r1
-	jlss	L(1n0)
-	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	$0, r3
-	subl2	r2, (r9)+
-	adwc	$0, r3
-L(1):	movl	(r8)+, r1
-	jlss	L(1n1)
-L(1p1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	$0, r11
-	subl2	r10, (r9)+
-	adwc	$0, r11
-
-	sobgtr	r7, L(tp1)
-	movl	r11, r0
-	ret
-
-L(1n0):	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r6, r3
-	subl2	r2, (r9)+
-	adwc	$0, r3
-	movl	(r8)+, r1
-	jgeq	L(1p1)
-L(1n1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r6, r11
-	subl2	r10, (r9)+
-	adwc	$0, r11
-
-	sobgtr	r7, L(tp1)
-	movl	r11, r0
-	ret
-
-L(v0_big):
-	jlbc	r4, L(2)
-
-C Loop for v0 >= 0x80000000
-L(tp2):	movl	(r8)+, r1
-	jlss	L(2n0)
-	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r1, r3
-	subl2	r2, (r9)+
-	adwc	$0, r3
-L(2):	movl	(r8)+, r1
-	jlss	L(2n1)
-L(2p1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r1, r11
-	subl2	r10, (r9)+
-	adwc	$0, r11
-
-	sobgtr	r7, L(tp2)
-	movl	r11, r0
-	ret
-
-L(2n0):	emul	r1, r6, $0, r2
-	addl2	r11, r2
-	adwc	r6, r3
-	subl2	r2, (r9)+
-	adwc	r1, r3
-	movl	(r8)+, r1
-	jgeq	L(2p1)
-L(2n1):	emul	r1, r6, $0, r10
-	addl2	r3, r10
-	adwc	r6, r11
-	subl2	r10, (r9)+
-	adwc	r1, r11
-
-	sobgtr	r7, L(tp2)
-	movl	r11, r0
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/vax/submul_1.s b/gmp/mpn/vax/submul_1.s
new file mode 100644
index 0000000000..ad0ddbbacb
--- /dev/null
+++ b/gmp/mpn/vax/submul_1.s
@@ -0,0 +1,124 @@
+# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
+
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# size		(sp + 12)
+# s2_limb	(sp + 16)
+
+.text
+	.align 1
+.globl ___gmpn_submul_1
+___gmpn_submul_1:
+	.word	0xfc0
+	movl	12(ap),r4
+	movl	8(ap),r8
+	movl	4(ap),r9
+	movl	16(ap),r6
+	jlss	s2_big
+
+	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L1
+	clrl	r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1:	movl	(r8)+,r1
+	jlss	L1n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	$0,r3
+	subl2	r2,(r9)+
+	adwc	$0,r3
+L1:	movl	(r8)+,r1
+	jlss	L1n1
+L1p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	$0,r11
+	subl2	r10,(r9)+
+	adwc	$0,r11
+
+	sobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+L1n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	subl2	r2,(r9)+
+	adwc	$0,r3
+	movl	(r8)+,r1
+	jgeq	L1p1
+L1n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	subl2	r10,(r9)+
+	adwc	$0,r11
+
+	sobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+
+s2_big:	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L2
+	clrl	r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2:	movl	(r8)+,r1
+	jlss	L2n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r1,r3
+	subl2	r2,(r9)+
+	adwc	$0,r3
+L2:	movl	(r8)+,r1
+	jlss	L2n1
+L2p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r1,r11
+	subl2	r10,(r9)+
+	adwc	$0,r11
+
+	sobgtr	r7,Loop2
+	movl	r11,r0
+	ret
+
+L2n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	subl2	r2,(r9)+
+	adwc	r1,r3
+	movl	(r8)+,r1
+	jgeq	L2p1
+L2n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	subl2	r10,(r9)+
+	adwc	r1,r11
+
+	sobgtr	r7,Loop2
+	movl	r11,r0
+	ret
diff --git a/gmp/mpn/x86/README b/gmp/mpn/x86/README
index 8d7ac9080d..883db227d2 100644
--- a/gmp/mpn/x86/README
+++ b/gmp/mpn/x86/README
@@ -1,30 +1,19 @@
-Copyright 1999-2002 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/x86/aors_n.asm b/gmp/mpn/x86/aors_n.asm
index 5d359f59b6..c8969995c8 100644
--- a/gmp/mpn/x86/aors_n.asm
+++ b/gmp/mpn/x86/aors_n.asm
@@ -1,42 +1,32 @@
 dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
 
-dnl  Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C     cycles/limb
-C P5	3.375
-C P6	3.125
-C K6	3.5
-C K7	2.25
-C P4	8.75
+C P5:   3.375
+C P6:   3.125
+C K6:   3.5
+C K7:   2.25
+C P4:   8.75
 
 
 ifdef(`OPERATION_add_n',`
@@ -109,7 +99,7 @@ L(0a):	leal	(%eax,%eax,8),%eax
 	C possible to simplify.
 	pushl	%ebp		FRAME_pushl()
 	movl	PARAM_CARRY,%ebp
-	shrl	%ebp			C shift bit 0 into carry
+	shrl	$1,%ebp			C shift bit 0 into carry
 	popl	%ebp		FRAME_popl()
 
 	jmp	*%eax			C jump into loop
@@ -158,7 +148,7 @@ L(0b):	leal	(%eax,%eax,8),%eax
 L(oopgo):
 	pushl	%ebp		FRAME_pushl()
 	movl	PARAM_CARRY,%ebp
-	shrl	%ebp			C shift bit 0 into carry
+	shrl	$1,%ebp			C shift bit 0 into carry
 	popl	%ebp		FRAME_popl()
 
 	ALIGN(16)
diff --git a/gmp/mpn/x86/aorsmul_1.asm b/gmp/mpn/x86/aorsmul_1.asm
index 54a8905441..b4db427657 100644
--- a/gmp/mpn/x86/aorsmul_1.asm
+++ b/gmp/mpn/x86/aorsmul_1.asm
@@ -1,51 +1,40 @@
 dnl  x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
 dnl  limb and add the result to a second limb vector.
 
-dnl  Copyright 1992, 1994, 1997, 1999-2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C			    cycles/limb
-C P5				14.75
-C P6 model 0-8,10-12		 7.5
-C P6 model 9  (Banias)		 6.7
-C P6 model 13 (Dothan)		 6.75
-C P4 model 0  (Willamette)	24.0
-C P4 model 1  (?)		24.0
-C P4 model 2  (Northwood)	24.0
+
+C                           cycles/limb
+C P5:                           14.75
+C P6 model 0-8,10-12)            7.5
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           6.75
+C P4 model 0  (Willamette)      24.0
+C P4 model 1  (?)               24.0
+C P4 model 2  (Northwood)       24.0
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C Intel Atom
-C AMD K6			12.5
-C AMD K7			 5.25
-C AMD K8
-C AMD K10
+C K6:                           12.5
+C K7:                            5.25
+C K8:
 
 
 ifdef(`OPERATION_addmul_1',`
diff --git a/gmp/mpn/x86/atom/aorrlsh1_n.asm b/gmp/mpn/x86/atom/aorrlsh1_n.asm
deleted file mode 100644
index cd1a650022..0000000000
--- a/gmp/mpn/x86/atom/aorrlsh1_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl  Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-define(RSH, 31)
-
-ifdef(`OPERATION_addlsh1_n', `
-	define(M4_inst,        adc)
-	define(M4_opp,         sub)
-	define(M4_function,    mpn_addlsh1_n)
-	define(M4_function_c,  mpn_addlsh1_nc)
-',`ifdef(`OPERATION_rsblsh1_n', `
-	define(M4_inst,        sbb)
-	define(M4_opp,         add)
-	define(M4_function,    mpn_rsblsh1_n)
-	define(M4_function_c,  mpn_rsblsh1_nc)
-',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
-
-include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86/atom/aorrlsh2_n.asm b/gmp/mpn/x86/atom/aorrlsh2_n.asm
deleted file mode 100644
index 10f4419de9..0000000000
--- a/gmp/mpn/x86/atom/aorrlsh2_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl  Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 30)
-
-ifdef(`OPERATION_addlsh2_n', `
-	define(M4_inst,        adcl)
-	define(M4_opp,         subl)
-	define(M4_function,    mpn_addlsh2_n)
-	define(M4_function_c,  mpn_addlsh2_nc)
-',`ifdef(`OPERATION_rsblsh2_n', `
-	define(M4_inst,        sbbl)
-	define(M4_opp,         addl)
-	define(M4_function,    mpn_rsblsh2_n)
-	define(M4_function_c,  mpn_rsblsh2_nc)
-',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
-
-include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86/atom/aorrlshC_n.asm b/gmp/mpn/x86/atom/aorrlshC_n.asm
deleted file mode 100644
index 71cfe490d6..0000000000
--- a/gmp/mpn/x86/atom/aorrlshC_n.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl  Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                          mp_size_t size);
-C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                           mp_size_t size, mp_limb_t carry);
-C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                          mp_size_t size);
-C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                           mp_size_t size, mp_signed_limb_t carry);
-
-C				cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 6
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CORB,	20)
-defframe(PARAM_SIZE,	16)
-defframe(PARAM_DBLD,	12)
-defframe(PARAM_SRC,	 8)
-defframe(PARAM_DST,	 4)
-
-dnl  re-use parameter space
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBP,`PARAM_DBLD')
-define(SAVE_VP,`PARAM_SRC')
-define(SAVE_UP,`PARAM_DST')
-
-define(M, eval(m4_lshift(1,LSH)))
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebx')
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-
-PROLOGUE(M4_function_c)
-deflit(`FRAME',0)
-	movl	PARAM_CORB, %eax
-	movl	%eax, %edx
-	shr	$LSH, %edx
-	andl	$1, %edx
-	M4_opp	%edx, %eax
-	jmp	L(start_nc)
-EPILOGUE()
-
-PROLOGUE(M4_function)
-deflit(`FRAME',0)
-
-	xor	%eax, %eax
-	xor	%edx, %edx
-L(start_nc):
-	push	rp			FRAME_pushl()
-
-	mov	PARAM_SIZE, %ecx	C size
-	mov	PARAM_DST, rp
-	mov	up, SAVE_UP
-	incl	%ecx			C size + 1
-	mov	PARAM_SRC, up
-	mov	vp, SAVE_VP
-	shr	%ecx			C (size+1)\2
-	mov	PARAM_DBLD, vp
-	mov	%ebp, SAVE_EBP
-	mov	%ecx, VAR_COUNT
-	jnc	L(entry)		C size odd
-
-	shr	%edx			C size even
-	mov	(vp), %ecx
-	lea	4(vp), vp
-	lea	(%eax,%ecx,M), %edx
-	mov	%ecx, %eax
-	lea	-4(up), up
-	lea	-4(rp), rp
-	jmp	L(enteven)
-
-	ALIGN(16)
-L(oop):
-	lea	(%eax,%ecx,M), %ebp
-	shr	$RSH, %ecx
-	mov	4(vp), %eax
-	shr	%edx
-	lea	8(vp), vp
-	M4_inst	(up), %ebp
-	lea	(%ecx,%eax,M), %edx
-	mov	%ebp, (rp)
-L(enteven):
-	M4_inst	4(up), %edx
-	lea	8(up), up
-	mov	%edx, 4(rp)
-	adc	%edx, %edx
-	shr	$RSH, %eax
-	lea	8(rp), rp
-L(entry):
-	mov	(vp), %ecx
-	decl	VAR_COUNT
-	jnz	L(oop)
-
-	lea	(%eax,%ecx,M), %ebp
-	shr	$RSH, %ecx
-	shr	%edx
-	mov	SAVE_VP, vp
-	M4_inst	(up), %ebp
-	mov	%ecx, %eax
-	mov	SAVE_UP, up
-	M4_inst	$0, %eax
-	mov	%ebp, (rp)
-	mov	SAVE_EBP, %ebp
-	pop	rp			FRAME_popl()
-	ret
-EPILOGUE()
-
-ASM_END()
diff --git a/gmp/mpn/x86/atom/aors_n.asm b/gmp/mpn/x86/atom/aors_n.asm
deleted file mode 100644
index 45ec287c3a..0000000000
--- a/gmp/mpn/x86/atom/aors_n.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl  Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 3
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-ifdef(`OPERATION_add_n', `
-	define(M4_inst,        adcl)
-	define(M4_function_n,  mpn_add_n)
-	define(M4_function_nc, mpn_add_nc)
-	define(M4_description, add)
-',`ifdef(`OPERATION_sub_n', `
-	define(M4_inst,        sbbl)
-	define(M4_function_n,  mpn_sub_n)
-	define(M4_function_nc, mpn_sub_nc)
-	define(M4_description, subtract)
-',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                         mp_size_t size);
-C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C	                   mp_size_t size, mp_limb_t carry);
-C
-C Calculate src1,size M4_description src2,size, and store the result in
-C dst,size.  The return value is the carry bit from the top of the result (1
-C or 0).
-C
-C The _nc version accepts 1 or 0 for an initial carry into the low limb of
-C the calculation.  Note values other than 1 or 0 here will lead to garbage
-C results.
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_SRC2, 12)
-defframe(PARAM_SRC1, 8)
-defframe(PARAM_DST,  4)
-
-dnl  re-use parameter space
-define(SAVE_RP,`PARAM_SIZE')
-define(SAVE_VP,`PARAM_SRC1')
-define(SAVE_UP,`PARAM_DST')
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebx')
-define(`cy',  `%ecx')
-define(`r1',  `%ecx')
-define(`r2',  `%edx')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(M4_function_n)
-	xor	cy, cy			C carry
-L(start):
-	mov	PARAM_SIZE, %eax	C size
-	mov	rp, SAVE_RP
-	mov	PARAM_DST, rp
-	mov	up, SAVE_UP
-	mov	PARAM_SRC1, up
-	shr	%eax			C size >> 1
-	mov	vp, SAVE_VP
-	mov	PARAM_SRC2, vp
-	jz	L(one)			C size == 1
-	jc	L(three)		C size % 2 == 1
-
-	shr	cy
-	mov	(up), r2
-	lea	4(up), up
-	lea	4(vp), vp
-	lea	-4(rp), rp
-	jmp	L(entry)
-L(one):
-	shr	cy
-	mov	(up), r1
-	jmp	L(end)
-L(three):
-	shr	cy
-	mov	(up), r1
-
-	ALIGN(16)
-L(oop):
-	M4_inst	(vp), r1
-	lea	8(up), up
-	mov	-4(up), r2
-	lea	8(vp), vp
-	mov	r1, (rp)
-L(entry):
-	M4_inst	-4(vp), r2
-	lea	8(rp), rp
-	dec	%eax
-	mov	(up), r1
-	mov	r2, -4(rp)
-	jnz	L(oop)
-
-L(end):					C %eax is zero here
-	mov	SAVE_UP, up
-	M4_inst	(vp), r1
-	mov	SAVE_VP, vp
-	mov	r1, (rp)
-	adc	%eax, %eax
-	mov	SAVE_RP, rp
-	ret
-EPILOGUE()
-
-PROLOGUE(M4_function_nc)
-	mov	PARAM_CARRY, cy		C carry
-	jmp	L(start)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/aorslshC_n.asm b/gmp/mpn/x86/atom/aorslshC_n.asm
deleted file mode 100644
index 75ace65e51..0000000000
--- a/gmp/mpn/x86/atom/aorslshC_n.asm
+++ /dev/null
@@ -1,247 +0,0 @@
-dnl  Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
-C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C				mp_limb_t carry);
-C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
-C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C				mp_signed_limb_t borrow);
-
-defframe(PARAM_CORB,	16)
-defframe(PARAM_SIZE,	12)
-defframe(PARAM_SRC,	 8)
-defframe(PARAM_DST,	 4)
-
-C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                          mp_size_t size,);
-C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                           mp_size_t size, mp_limb_t carry);
-C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                          mp_size_t size,);
-C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C                           mp_size_t size, mp_limb_t borrow);
-
-C if src1 == dst, _ip1 is used
-
-C					cycles/limb
-C				dst!=src1,src2	dst==src1
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 7		 6
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(GPARAM_CORB,	20)
-defframe(GPARAM_SIZE,	16)
-defframe(GPARAM_SRC2,	12)
-
-dnl  re-use parameter space
-define(SAVE_EBP,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_UP,`PARAM_DST')
-
-define(M, eval(m4_lshift(1,LSH)))
-define(`rp',  `%edi')
-define(`up',  `%esi')
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-
-PROLOGUE(M4_ip_function_c)
-deflit(`FRAME',0)
-	movl	PARAM_CORB, %ecx
-	movl	%ecx, %edx
-	shr	$LSH, %edx
-	andl	$1, %edx
-	M4_opp	%edx, %ecx
-	jmp	L(start_nc)
-EPILOGUE()
-
-PROLOGUE(M4_ip_function)
-deflit(`FRAME',0)
-
-	xor	%ecx, %ecx
-	xor	%edx, %edx
-L(start_nc):
-	push	rp			FRAME_pushl()
-	mov	PARAM_DST, rp
-	mov	up, SAVE_UP
-	mov	PARAM_SRC, up
-	mov	%ebx, SAVE_EBX
-	mov	PARAM_SIZE, %ebx	C size
-L(inplace):
-	incl	%ebx			C size + 1
-	shr	%ebx			C (size+1)\2
-	mov	%ebp, SAVE_EBP
-	jnc	L(entry)		C size odd
-
-	add	%edx, %edx		C size even
-	mov	%ecx, %ebp
-	mov	(up), %ecx
-	lea	-4(rp), rp
-	lea	(%ebp,%ecx,M), %eax
-	lea	4(up), up
-	jmp	L(enteven)
-
-	ALIGN(16)
-L(oop):
-	lea	(%ecx,%eax,M), %ebp
-	shr	$RSH, %eax
-	mov	4(up), %ecx
-	add	%edx, %edx
-	lea	8(up), up
-	M4_inst	%ebp, (rp)
-	lea	(%eax,%ecx,M), %eax
-
-L(enteven):
-	M4_inst	%eax, 4(rp)
-	lea	8(rp), rp
-
-	sbb	%edx, %edx
-	shr	$RSH, %ecx
-
-L(entry):
-	mov	(up), %eax
-	decl	%ebx
-	jnz	L(oop)
-
-	lea	(%ecx,%eax,M), %ebp
-	shr	$RSH, %eax
-	shr	%edx
-	M4_inst	%ebp, (rp)
-	mov	SAVE_UP, up
-	adc	$0, %eax
-	mov	SAVE_EBP, %ebp
-	mov	SAVE_EBX, %ebx
-	pop	rp			FRAME_popl()
-	ret
-EPILOGUE()
-
-PROLOGUE(M4_function_c)
-deflit(`FRAME',0)
-	movl	GPARAM_CORB, %ecx
-	movl	%ecx, %edx
-	shr	$LSH, %edx
-	andl	$1, %edx
-	M4_opp	%edx, %ecx
-	jmp	L(generic_nc)
-EPILOGUE()
-
-PROLOGUE(M4_function)
-deflit(`FRAME',0)
-
-	xor	%ecx, %ecx
-	xor	%edx, %edx
-L(generic_nc):
-	push	rp			FRAME_pushl()
-	mov	PARAM_DST, rp
-	mov	up, SAVE_UP
-	mov	PARAM_SRC, up
-	cmp	rp, up
-	mov	%ebx, SAVE_EBX
-	jne	L(general)
-	mov	GPARAM_SIZE, %ebx	C size
-	mov	GPARAM_SRC2, up
-	jmp	L(inplace)
-
-L(general):
-	mov	GPARAM_SIZE, %eax	C size
-	mov	%ebx, SAVE_EBX
-	incl	%eax			C size + 1
-	mov	up, %ebx		C vp
-	mov	GPARAM_SRC2, up		C up
-	shr	%eax			C (size+1)\2
-	mov	%ebp, SAVE_EBP
-	mov	%eax, GPARAM_SIZE
-	jnc	L(entry2)		C size odd
-
-	add	%edx, %edx		C size even
-	mov	%ecx, %ebp
-	mov	(up), %ecx
-	lea	-4(rp), rp
-	lea	-4(%ebx), %ebx
-	lea	(%ebp,%ecx,M), %eax
-	lea	4(up), up
-	jmp	L(enteven2)
-
-	ALIGN(16)
-L(oop2):
-	lea	(%ecx,%eax,M), %ebp
-	shr	$RSH, %eax
-	mov	4(up), %ecx
-	add	%edx, %edx
-	lea	8(up), up
-	mov	(%ebx), %edx
-	M4_inst	%ebp, %edx
-	lea	(%eax,%ecx,M), %eax
-	mov	%edx, (rp)
-L(enteven2):
-	mov	4(%ebx), %edx
-	lea	8(%ebx), %ebx
-	M4_inst	%eax, %edx
-	mov	%edx, 4(rp)
-	sbb	%edx, %edx
-	shr	$RSH, %ecx
-	lea	8(rp), rp
-L(entry2):
-	mov	(up), %eax
-	decl	GPARAM_SIZE
-	jnz	L(oop2)
-
-	lea	(%ecx,%eax,M), %ebp
-	shr	$RSH, %eax
-	shr	%edx
-	mov	(%ebx), %edx
-	M4_inst	%ebp, %edx
-	mov	%edx, (rp)
-	mov	SAVE_UP, up
-	adc	$0, %eax
-	mov	SAVE_EBP, %ebp
-	mov	SAVE_EBX, %ebx
-	pop	rp			FRAME_popl()
-	ret
-EPILOGUE()
-
-ASM_END()
diff --git a/gmp/mpn/x86/atom/bdiv_q_1.asm b/gmp/mpn/x86/atom/bdiv_q_1.asm
deleted file mode 100644
index 31e908ec44..0000000000
--- a/gmp/mpn/x86/atom/bdiv_q_1.asm
+++ /dev/null
@@ -1,35 +0,0 @@
-dnl  Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel
-dnl  division by 1-limb divisor, returning quotient only.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-include_mpn(`x86/pentium/bdiv_q_1.asm')
diff --git a/gmp/mpn/x86/atom/cnd_add_n.asm b/gmp/mpn/x86/atom/cnd_add_n.asm
deleted file mode 100644
index 50bf2ad64b..0000000000
--- a/gmp/mpn/x86/atom/cnd_add_n.asm
+++ /dev/null
@@ -1,113 +0,0 @@
-dnl  X86 mpn_cnd_add_n optimised for Intel Atom.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9   (Banias)		 ?
-C P6 model 13  (Dothan)		 ?
-C P4 model 0-1 (Willamette)	 ?
-C P4 model 2   (Northwood)	 ?
-C P4 model 3-4 (Prescott)	 ?
-C Intel atom			 4.67
-C AMD K6			 ?
-C AMD K7			 ?
-C AMD K8			 ?
-
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebp')
-define(`n',   `%ecx')
-define(`cnd', `20(%esp)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_cnd_add_n)
-	push	%edi
-	push	%esi
-	push	%ebx
-	push	%ebp
-
-	mov	cnd, %eax		C make cnd into a mask (1)
-	mov	24(%esp), rp
-	neg	%eax			C make cnd into a mask (1)
-	mov	28(%esp), up
-	sbb	%eax, %eax		C make cnd into a mask (1)
-	mov	32(%esp), vp
-	mov	%eax, cnd		C make cnd into a mask (1)
-	mov	36(%esp), n
-
-	xor	%edx, %edx
-
-	shr	$1, n
-	jnc	L(top)
-
-	mov	0(vp), %eax
-	and	cnd, %eax
-	lea	4(vp), vp
-	add	0(up), %eax
-	lea	4(rp), rp
-	lea	4(up), up
-	sbb	%edx, %edx
-	mov	%eax, -4(rp)
-	inc	n
-	dec	n
-	je	L(end)
-
-L(top):	sbb	%edx, %edx
-	mov	0(vp), %eax
-	and	cnd, %eax
-	lea	8(vp), vp
-	lea	8(rp), rp
-	mov	-4(vp), %ebx
-	and	cnd, %ebx
-	add	%edx, %edx
-	adc	0(up), %eax
-	lea	8(up), up
-	mov	%eax, -8(rp)
-	adc	-4(up), %ebx
-	dec	n
-	mov	%ebx, -4(rp)
-	jne	L(top)
-
-L(end):	mov	$0, %eax
-	adc	%eax, %eax
-
-	pop	%ebp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/cnd_sub_n.asm b/gmp/mpn/x86/atom/cnd_sub_n.asm
deleted file mode 100644
index 221bedca37..0000000000
--- a/gmp/mpn/x86/atom/cnd_sub_n.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl  X86 mpn_cnd_sub_n optimised for Intel Atom.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9   (Banias)		 ?
-C P6 model 13  (Dothan)		 ?
-C P4 model 0-1 (Willamette)	 ?
-C P4 model 2   (Northwood)	 ?
-C P4 model 3-4 (Prescott)	 ?
-C Intel atom			 5.67
-C AMD K6			 ?
-C AMD K7			 ?
-C AMD K8			 ?
-
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebp')
-define(`n',   `%ecx')
-define(`cnd', `20(%esp)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_cnd_sub_n)
-	push	%edi
-	push	%esi
-	push	%ebx
-	push	%ebp
-
-	mov	cnd, %eax		C make cnd into a mask (1)
-	mov	24(%esp), rp
-	neg	%eax			C make cnd into a mask (1)
-	mov	28(%esp), up
-	sbb	%eax, %eax		C make cnd into a mask (1)
-	mov	32(%esp), vp
-	mov	%eax, cnd		C make cnd into a mask (1)
-	mov	36(%esp), n
-
-	xor	%edx, %edx
-
-	inc	n
-	shr	n
-	jnc	L(ent)
-
-	mov	0(vp), %eax
-	and	cnd, %eax
-	lea	4(vp), vp
-	mov	0(up), %edx
-	sub	%eax, %edx
-	lea	4(rp), rp
-	lea	4(up), up
-	mov	%edx, -4(rp)
-	sbb	%edx, %edx		C save cy
-
-L(ent):	mov	0(vp), %ebx
-	and	cnd, %ebx
-	add	%edx, %edx		C restore cy
-	mov	0(up), %edx
-	dec	n
-	je	L(end)
-
-L(top):	sbb	%ebx, %edx
-	mov	4(vp), %eax
-	mov	%edx, 0(rp)
-	sbb	%edx, %edx		C save cy
-	mov	8(vp), %ebx
-	lea	8(up), up
-	and	cnd, %ebx
-	and	cnd, %eax
-	add	%edx, %edx		C restore cy
-	mov	-4(up), %edx
-	lea	8(rp), rp
-	sbb	%eax, %edx
-	mov	%edx, -4(rp)
-	dec	n
-	mov	0(up), %edx
-	lea	8(vp), vp
-	jne	L(top)
-
-L(end):	sbb	%ebx, %edx
-	mov	%edx, 0(rp)
-
-	mov	$0, %eax
-	adc	%eax, %eax
-
-	pop	%ebp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/dive_1.asm b/gmp/mpn/x86/atom/dive_1.asm
deleted file mode 100644
index 71036a15a4..0000000000
--- a/gmp/mpn/x86/atom/dive_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_divexact_1 -- mpn by limb exact division.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_divexact_1)
-include_mpn(`x86/pentium/dive_1.asm')
diff --git a/gmp/mpn/x86/atom/gmp-mparam.h b/gmp/mpn/x86/atom/gmp-mparam.h
deleted file mode 100644
index 45df12806c..0000000000
--- a/gmp/mpn/x86/atom/gmp-mparam.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1667 MHz Pineview (Atom D510) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               5
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              4
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           31
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                74
-#define MUL_TOOM44_THRESHOLD               178
-#define MUL_TOOM6H_THRESHOLD               270
-#define MUL_TOOM8H_THRESHOLD               399
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     127
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                105
-#define SQR_TOOM4_THRESHOLD                178
-#define SQR_TOOM6_THRESHOLD                303
-#define SQR_TOOM8_THRESHOLD                527
-
-#define MULMID_TOOM42_THRESHOLD             54
-
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               18
-
-#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    380, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
-    {    143, 9}, {    287, 8}, {    575,10}, {    159,11}, \
-    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
-    {    671,10}, {    351, 9}, {    703,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    415, 9}, {    831,11}, \
-    {    223,10}, {    447,12}, {    127,11}, {    255,10}, \
-    {    543,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    671,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    735,12}, {    383,11}, {    831,12}, \
-    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1151,12}, {    703,11}, \
-    {   1471,13}, {    383,12}, {    831,11}, {   1663,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1663,13}, {    895,12}, {   1919,14}, {    511,13}, \
-    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
-    {   1407,12}, {   2943,14}, {    767,13}, {   1663,12}, \
-    {   3455,13}, {   1919,15}, {    511,14}, {   1023,13}, \
-    {   2431,14}, {   1279,13}, {   2943,12}, {   5887,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 150
-#define MUL_FFT_THRESHOLD                 4544
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    340, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255,10}, {     79, 9}, {    159, 8}, {    319,10}, \
-    {     95, 9}, {    191,11}, {     63,10}, {    127, 9}, \
-    {    255, 8}, {    511, 9}, {    271,10}, {    143, 9}, \
-    {    287, 8}, {    575, 9}, {    303, 8}, {    607,10}, \
-    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287, 9}, \
-    {    575,10}, {    303, 9}, {    607,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671,10}, {    351, 9}, \
-    {    703,11}, {    191,10}, {    383, 9}, {    767,10}, \
-    {    415,11}, {    223,10}, {    447,12}, {    127,11}, \
-    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,12}, {    319,11}, \
-    {    671,10}, {   1343,11}, {    735,12}, {    383,11}, \
-    {    831,12}, {    447,11}, {    959,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    831,11}, {   1663,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1215,13}, {    639,12}, \
-    {   1471,13}, {    767,12}, {   1663,13}, {    895,12}, \
-    {   1791,14}, {    511,13}, {   1023,12}, {   2111,13}, \
-    {   1151,12}, {   2431,13}, {   1407,14}, {    767,13}, \
-    {   1663,12}, {   3455,13}, {   1791,15}, {    511,14}, \
-    {   1023,13}, {   2431,14}, {   1279,13}, {   2943,12}, \
-    {   5887,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 151
-#define SQR_FFT_THRESHOLD                 2880
-
-#define MULLO_BASECASE_THRESHOLD             6
-#define MULLO_DC_THRESHOLD                  48
-#define MULLO_MUL_N_THRESHOLD             8907
-
-#define DC_DIV_QR_THRESHOLD                 59
-#define DC_DIVAPPR_Q_THRESHOLD             250
-#define DC_BDIV_QR_THRESHOLD                59
-#define DC_BDIV_Q_THRESHOLD                169
-
-#define INV_MULMOD_BNM1_THRESHOLD           38
-#define INV_NEWTON_THRESHOLD               246
-#define INV_APPR_THRESHOLD                 246
-
-#define BINV_NEWTON_THRESHOLD              276
-#define REDC_1_TO_REDC_N_THRESHOLD          67
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1442
-#define MUPI_DIV_QR_THRESHOLD              114
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define POWM_SEC_TABLE  1,22,98,416,1378
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                     133
-#define HGCD_APPR_THRESHOLD                169
-#define HGCD_REDUCE_THRESHOLD             2479
-#define GCD_DC_THRESHOLD                   460
-#define GCDEXT_DC_THRESHOLD                342
-#define JACOBI_BASE_METHOD                   3
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        23
-#define SET_STR_DC_THRESHOLD               321
-#define SET_STR_PRECOMPUTE_THRESHOLD      1099
-
-#define FAC_DSC_THRESHOLD                  198
-#define FAC_ODD_THRESHOLD                   34
diff --git a/gmp/mpn/x86/atom/logops_n.asm b/gmp/mpn/x86/atom/logops_n.asm
deleted file mode 100644
index 3cb6d7310c..0000000000
--- a/gmp/mpn/x86/atom/logops_n.asm
+++ /dev/null
@@ -1,151 +0,0 @@
-dnl  Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C				   cycles/limb
-C				op	nop	opn
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 3	 3.5	 3.5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-define(M4_choose_op,
-`ifdef(`OPERATION_$1',`
-define(`M4_function', `mpn_$1')
-define(`M4_want_pre', `$4')
-define(`M4_inst',     `$3')
-define(`M4_want_post',`$2')
-')')
-define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
-define(M4post,`ifelse(M4_want_post,yes,`$1')')
-
-M4_choose_op( and_n,     , andl,    )
-M4_choose_op( andn_n,    , andl, yes)
-M4_choose_op( nand_n, yes, andl,    )
-M4_choose_op( ior_n,     ,  orl,    )
-M4_choose_op( iorn_n,    ,  orl, yes)
-M4_choose_op( nior_n, yes,  orl,    )
-M4_choose_op( xor_n,     , xorl,    )
-M4_choose_op( xnor_n, yes, xorl,    )
-
-ifdef(`M4_function',,
-`m4_error(`Unrecognised or undefined OPERATION symbol
-')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size);
-C
-
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_SRC1, 12)
-defframe(PARAM_SRC2, 8)
-defframe(PARAM_DST,  4)
-
-dnl  re-use parameter space
-define(SAVE_RP,`PARAM_SIZE')
-define(SAVE_VP,`PARAM_SRC1')
-define(SAVE_UP,`PARAM_DST')
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebx')
-define(`cnt', `%eax')
-define(`r1',  `%ecx')
-define(`r2',  `%edx')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(M4_function)
-	mov	PARAM_SIZE, cnt		C size
-	mov	rp, SAVE_RP
-	mov	PARAM_DST, rp
-	mov	up, SAVE_UP
-	mov	PARAM_SRC1, up
-	shr	cnt			C size >> 1
-	mov	vp, SAVE_VP
-	mov	PARAM_SRC2, vp
-	mov	(up), r1
-	jz	L(end)			C size == 1
-	jnc	L(even)			C size % 2 == 0
-
-	ALIGN(16)
-L(oop):
-M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
-	M4_inst	(vp), r1
-	lea	8(up), up
-	mov	-4(up), r2
-M4post(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
-	lea	8(vp), vp
-	mov	r1, (rp)
-L(entry):
-M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r2)')
-	M4_inst	-4(vp), r2
-	lea	8(rp), rp
-M4post(`	notl_or_xorl_GMP_NUMB_MASK(r2)')
-	dec	cnt
-	mov	(up), r1
-	mov	r2, -4(rp)
-	jnz	L(oop)
-
-L(end):
-M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
-	mov	SAVE_UP, up
-	M4_inst	(vp), r1
-M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)')
-	mov	SAVE_VP, vp
-	mov	r1, (rp)
-	mov	SAVE_RP, rp
-	ret
-
-L(even):
-	mov	r1, r2
-	lea	4(up), up
-	lea	4(vp), vp
-	lea	-4(rp), rp
-	jmp	L(entry)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/lshift.asm b/gmp/mpn/x86/atom/lshift.asm
deleted file mode 100644
index f2c70dd3e8..0000000000
--- a/gmp/mpn/x86/atom/lshift.asm
+++ /dev/null
@@ -1,218 +0,0 @@
-dnl  Intel Atom mpn_lshift -- mpn left shift.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C			unsigned cnt);
-
-C				  cycles/limb
-C				cnt!=1	cnt==1
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 5	 2.5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CNT, 16)
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC,  8)
-defframe(PARAM_DST,  4)
-
-dnl  re-use parameter space
-define(SAVE_UP,`PARAM_CNT')
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`cnt',  `%ecx')
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-deflit(`FRAME',0)
-PROLOGUE(mpn_lshift)
-	mov	PARAM_CNT, cnt
-	mov	PARAM_SIZE, %edx
-	mov	up, SAVE_UP
-	mov	PARAM_SRC, up
-	push	rp			FRAME_pushl()
-	mov	PARAM_DST, rp
-
-C We can use faster code for shift-by-1 under certain conditions.
-	cmp	$1,cnt
-	jne	L(normal)
-	cmpl	rp, up
-	jnc	L(special)		C jump if s_ptr + 1 >= res_ptr
-	leal	(up,%edx,4),%eax
-	cmpl	%eax,rp
-	jnc	L(special)		C jump if res_ptr >= s_ptr + size
-
-L(normal):
-	lea	-4(up,%edx,4), up
-	mov	%ebx, SAVE_EBX
-	lea	-4(rp,%edx,4), rp
-
-	shr	%edx
-	mov	(up), %eax
-	mov	%edx, VAR_COUNT
-	jnc	L(evn)
-
-	mov	%eax, %ebx
-	shl	%cl, %ebx
-	neg	cnt
-	shr	%cl, %eax
-	test	%edx, %edx
-	jnz	L(gt1)
-	mov	%ebx, (rp)
-	jmp	L(quit)
-
-L(gt1):	mov	%ebp, SAVE_EBP
-	push	%eax
-	mov	-4(up), %eax
-	mov	%eax, %ebp
-	shr	%cl, %eax
-	jmp	L(lo1)
-
-L(evn):	mov	%ebp, SAVE_EBP
-	neg	cnt
-	mov	%eax, %ebp
-	mov	-4(up), %edx
-	shr	%cl, %eax
-	mov	%edx, %ebx
-	shr	%cl, %edx
-	neg	cnt
-	decl	VAR_COUNT
-	lea	4(rp), rp
-	lea	-4(up), up
-	jz	L(end)
-	push	%eax			FRAME_pushl()
-
-	ALIGN(8)
-L(top):	shl	%cl, %ebp
-	or	%ebp, %edx
-	shl	%cl, %ebx
-	neg	cnt
-	mov	-4(up), %eax
-	mov	%eax, %ebp
-	mov	%edx, -4(rp)
-	shr	%cl, %eax
-	lea	-8(rp), rp
-L(lo1):	mov	-8(up), %edx
-	or	%ebx, %eax
-	mov	%edx, %ebx
-	shr	%cl, %edx
-	lea	-8(up), up
-	neg	cnt
-	mov	%eax, (rp)
-	decl	VAR_COUNT
-	jg	L(top)
-
-	pop	%eax			FRAME_popl()
-L(end):
-	shl	%cl, %ebp
-	shl	%cl, %ebx
-	or	%ebp, %edx
-	mov	SAVE_EBP, %ebp
-	mov	%edx, -4(rp)
-	mov	%ebx, -8(rp)
-
-L(quit):
-	mov	SAVE_UP, up
-	mov	SAVE_EBX, %ebx
-	pop	rp			FRAME_popl()
-	ret
-
-L(special):
-deflit(`FRAME',4)
-	lea	3(%edx), %eax		C size + 3
-	dec	%edx			C size - 1
-	mov	(up), %ecx
-	shr	$2, %eax		C (size + 3) / 4
-	and	$3, %edx		C (size - 1) % 4
-	jz	L(goloop)		C jmp if  size == 1 (mod 4)
-	shr	%edx
-	jnc	L(odd)			C jum if  size == 3 (mod 4)
-
-	add	%ecx, %ecx
-	lea	4(up), up
-	mov	%ecx, (rp)
-	mov	(up), %ecx
-	lea	4(rp), rp
-
-	dec	%edx
-	jnz	L(goloop)		C jump if  size == 0 (mod 4)
-L(odd):	lea	-8(up), up
-	lea	-8(rp), rp
-	jmp	L(sentry)		C reached if size == 2 or 3 (mod 4)
-
-L(sloop):
-	adc	%ecx, %ecx
-	mov	4(up), %edx
-	mov	%ecx, (rp)
-	adc	%edx, %edx
-	mov	8(up), %ecx
-	mov	%edx, 4(rp)
-L(sentry):
-	adc	%ecx, %ecx
-	mov	12(up), %edx
-	mov	%ecx, 8(rp)
-	adc	%edx, %edx
-	lea	16(up), up
-	mov	%edx, 12(rp)
-	lea	16(rp), rp
-	mov	(up), %ecx
-L(goloop):
-	decl	%eax
-	jnz	L(sloop)
-
-L(squit):
-	adc	%ecx, %ecx
-	mov	%ecx, (rp)
-	adc	%eax, %eax
-
-	mov	SAVE_UP, up
-	pop	rp			FRAME_popl()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/lshiftc.asm b/gmp/mpn/x86/atom/lshiftc.asm
deleted file mode 100644
index 5be53ed19d..0000000000
--- a/gmp/mpn/x86/atom/lshiftc.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl  Intel Atom mpn_lshiftc -- mpn left shift with complement.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C			 unsigned cnt);
-
-C				cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 5.5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CNT, 16)
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC,  8)
-defframe(PARAM_DST,  4)
-
-dnl  re-use parameter space
-define(SAVE_UP,`PARAM_CNT')
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`cnt',  `%ecx')
-
-ASM_START()
-	TEXT
-
-PROLOGUE(mpn_lshiftc)
-deflit(`FRAME',0)
-	mov	PARAM_CNT, cnt
-	mov	PARAM_SIZE, %edx
-	mov	up, SAVE_UP
-	mov	PARAM_SRC, up
-	push	rp			FRAME_pushl()
-	mov	PARAM_DST, rp
-
-	lea	-4(up,%edx,4), up
-	mov	%ebx, SAVE_EBX
-	lea	-4(rp,%edx,4), rp
-
-	shr	%edx
-	mov	(up), %eax
-	mov	%edx, VAR_COUNT
-	jnc	L(evn)
-
-	mov	%eax, %ebx
-	shl	%cl, %ebx
-	neg	cnt
-	shr	%cl, %eax
-	test	%edx, %edx
-	jnz	L(gt1)
-	not	%ebx
-	mov	%ebx, (rp)
-	jmp	L(quit)
-
-L(gt1):	mov	%ebp, SAVE_EBP
-	push	%eax
-	mov	-4(up), %eax
-	mov	%eax, %ebp
-	shr	%cl, %eax
-	jmp	L(lo1)
-
-L(evn):	mov	%ebp, SAVE_EBP
-	neg	cnt
-	mov	%eax, %ebp
-	mov	-4(up), %edx
-	shr	%cl, %eax
-	mov	%edx, %ebx
-	shr	%cl, %edx
-	neg	cnt
-	decl	VAR_COUNT
-	lea	4(rp), rp
-	lea	-4(up), up
-	jz	L(end)
-	push	%eax			FRAME_pushl()
-
-L(top):	shl	%cl, %ebp
-	or	%ebp, %edx
-	shl	%cl, %ebx
-	neg	cnt
-	not	%edx
-	mov	-4(up), %eax
-	mov	%eax, %ebp
-	mov	%edx, -4(rp)
-	shr	%cl, %eax
-	lea	-8(rp), rp
-L(lo1):	mov	-8(up), %edx
-	or	%ebx, %eax
-	mov	%edx, %ebx
-	shr	%cl, %edx
-	not	%eax
-	lea	-8(up), up
-	neg	cnt
-	mov	%eax, (rp)
-	decl	VAR_COUNT
-	jg	L(top)
-
-	pop	%eax			FRAME_popl()
-L(end):
-	shl	%cl, %ebp
-	shl	%cl, %ebx
-	or	%ebp, %edx
-	mov	SAVE_EBP, %ebp
-	not	%edx
-	not	%ebx
-	mov	%edx, -4(rp)
-	mov	%ebx, -8(rp)
-
-L(quit):
-	mov	SAVE_UP, up
-	mov	SAVE_EBX, %ebx
-	pop	rp			FRAME_popl()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/mmx/copyd.asm b/gmp/mpn/x86/atom/mmx/copyd.asm
deleted file mode 100644
index b80fb033fe..0000000000
--- a/gmp/mpn/x86/atom/mmx/copyd.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_copyd -- copy limb vector, decrementing.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_copyd)
-include_mpn(`x86/k7/mmx/copyd.asm')
diff --git a/gmp/mpn/x86/atom/mmx/copyi.asm b/gmp/mpn/x86/atom/mmx/copyi.asm
deleted file mode 100644
index 49b6b8d662..0000000000
--- a/gmp/mpn/x86/atom/mmx/copyi.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_copyi -- copy limb vector, incrementing.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_copyi)
-include_mpn(`x86/k7/mmx/copyi.asm')
diff --git a/gmp/mpn/x86/atom/mmx/hamdist.asm b/gmp/mpn/x86/atom/mmx/hamdist.asm
deleted file mode 100644
index 3fe8253240..0000000000
--- a/gmp/mpn/x86/atom/mmx/hamdist.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_hamdist -- hamming distance.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_hamdist)
-include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/gmp/mpn/x86/atom/mod_34lsub1.asm b/gmp/mpn/x86/atom/mod_34lsub1.asm
deleted file mode 100644
index 6d57ba385d..0000000000
--- a/gmp/mpn/x86/atom/mod_34lsub1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_34lsub1)
-include_mpn(`x86/p6/mod_34lsub1.asm')
diff --git a/gmp/mpn/x86/atom/mode1o.asm b/gmp/mpn/x86/atom/mode1o.asm
deleted file mode 100644
index c9ee6bd2db..0000000000
--- a/gmp/mpn/x86/atom/mode1o.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_modexact_1_odd -- exact division style remainder.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd)
-include_mpn(`x86/pentium/mode1o.asm')
diff --git a/gmp/mpn/x86/atom/rshift.asm b/gmp/mpn/x86/atom/rshift.asm
deleted file mode 100644
index 1cb5dbefe9..0000000000
--- a/gmp/mpn/x86/atom/rshift.asm
+++ /dev/null
@@ -1,152 +0,0 @@
-dnl  Intel Atom mpn_rshift -- mpn right shift.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Converted from AMD64 by Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C			unsigned cnt);
-
-C				cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CNT, 16)
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC,  8)
-defframe(PARAM_DST,  4)
-
-dnl  re-use parameter space
-define(SAVE_UP,`PARAM_CNT')
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`cnt',  `%ecx')
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-deflit(`FRAME',0)
-PROLOGUE(mpn_rshift)
-	mov	PARAM_CNT, cnt
-	mov	PARAM_SIZE, %edx
-	mov	up, SAVE_UP
-	mov	PARAM_SRC, up
-	push	rp			FRAME_pushl()
-	mov	PARAM_DST, rp
-	mov	%ebx, SAVE_EBX
-
-	shr	%edx
-	mov	(up), %eax
-	mov	%edx, VAR_COUNT
-	jnc	L(evn)
-
-	mov	%eax, %ebx
-	shr	%cl, %ebx
-	neg	cnt
-	shl	%cl, %eax
-	test	%edx, %edx
-	jnz	L(gt1)
-	mov	%ebx, (rp)
-	jmp	L(quit)
-
-L(gt1):	mov	%ebp, SAVE_EBP
-	push	%eax
-	mov	4(up), %eax
-	mov	%eax, %ebp
-	shl	%cl, %eax
-	jmp	L(lo1)
-
-L(evn):	mov	%ebp, SAVE_EBP
-	neg	cnt
-	mov	%eax, %ebp
-	mov	4(up), %edx
-	shl	%cl, %eax
-	mov	%edx, %ebx
-	shl	%cl, %edx
-	neg	cnt
-	decl	VAR_COUNT
-	lea	-4(rp), rp
-	lea	4(up), up
-	jz	L(end)
-	push	%eax			FRAME_pushl()
-
-	ALIGN(8)
-L(top):	shr	%cl, %ebp
-	or	%ebp, %edx
-	shr	%cl, %ebx
-	neg	cnt
-	mov	4(up), %eax
-	mov	%eax, %ebp
-	mov	%edx, 4(rp)
-	shl	%cl, %eax
-	lea	8(rp), rp
-L(lo1):	mov	8(up), %edx
-	or	%ebx, %eax
-	mov	%edx, %ebx
-	shl	%cl, %edx
-	lea	8(up), up
-	neg	cnt
-	mov	%eax, (rp)
-	decl	VAR_COUNT
-	jg	L(top)
-
-	pop	%eax			FRAME_popl()
-L(end):
-	shr	%cl, %ebp
-	shr	%cl, %ebx
-	or	%ebp, %edx
-	mov	SAVE_EBP, %ebp
-	mov	%edx, 4(rp)
-	mov	%ebx, 8(rp)
-
-L(quit):
-	mov	SAVE_UP, up
-	mov	SAVE_EBX, %ebx
-	pop	rp			FRAME_popl()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm b/gmp/mpn/x86/atom/sse2/aorsmul_1.asm
deleted file mode 100644
index 969a14a919..0000000000
--- a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm
+++ /dev/null
@@ -1,174 +0,0 @@
-dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C			    cycles/limb
-C P5				 -
-C P6 model 0-8,10-12		 -
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 8
-C AMD K6
-C AMD K7			 -
-C AMD K8
-C AMD K10
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`n',  `%ecx')
-
-ifdef(`OPERATION_addmul_1',`
-	define(ADDSUB,  add)
-	define(func_1,  mpn_addmul_1)
-	define(func_1c, mpn_addmul_1c)')
-ifdef(`OPERATION_submul_1',`
-	define(ADDSUB,  sub)
-	define(func_1,  mpn_submul_1)
-	define(func_1c, mpn_submul_1c)')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
-
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_1)
-	xor	%edx, %edx
-L(ent):	push	%edi
-	push	%esi
-	push	%ebx
-	mov	16(%esp), rp
-	mov	20(%esp), up
-	mov	24(%esp), n
-	movd	28(%esp), %mm7
-	test	$1, n
-	jz	L(fi0or2)
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	shr	$2, n
-	jnc	L(fi1)
-
-L(fi3):	lea	-8(up), up
-	lea	-8(rp), rp
-	movd	12(up), %mm1
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	add	$1, n			C increment and clear carry
-	jmp	L(lo3)
-
-L(fi1):	movd	%mm0, %ebx
-	jz	L(wd1)
-	movd	4(up), %mm1
-	pmuludq	%mm7, %mm1
-	jmp	L(lo1)
-
-L(fi0or2):
-	movd	(up), %mm1
-	pmuludq	%mm7, %mm1
-	shr	$2, n
-	movd	4(up), %mm0
-	jc	L(fi2)
-	lea	-4(up), up
-	lea	-4(rp), rp
-	movd	%mm1, %eax
-	pmuludq	%mm7, %mm0
-	jmp	L(lo0)
-
-L(fi2):	lea	4(up), up
-	add	$1, n			C increment and clear carry
-	movd	%mm1, %eax
-	lea	-12(rp), rp
-	jmp	L(lo2)
-
-C	ALIGN(16)			C alignment seems irrelevant
-L(top):	movd	4(up), %mm1
-	adc	$0, %edx
-	ADDSUB	%eax, 12(rp)
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-L(lo1):	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	ADDSUB	%ebx, (rp)
-L(lo0):	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	movd	%mm0, %ebx
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	ADDSUB	%eax, 4(rp)
-L(lo3):	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	ADDSUB	%ebx, 8(rp)
-L(lo2):	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	dec	n
-	jnz	L(top)
-
-L(end):	adc	n, %edx			C n is zero here
-	ADDSUB	%eax, 12(rp)
-	movd	%mm0, %ebx
-	lea	16(rp), rp
-L(wd1):	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %eax
-	adc	n, %eax
-	ADDSUB	%ebx, (rp)
-	emms
-	adc	n, %eax
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	ret
-EPILOGUE()
-PROLOGUE(func_1c)
-	mov	20(%esp), %edx		C carry
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm
deleted file mode 100644
index 782e914019..0000000000
--- a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom  mpn_bdiv_dbm1.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_bdiv_dbm1c)
-include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm')
diff --git a/gmp/mpn/x86/atom/sse2/divrem_1.asm b/gmp/mpn/x86/atom/sse2/divrem_1.asm
deleted file mode 100644
index f84709a22e..0000000000
--- a/gmp/mpn/x86/atom/sse2/divrem_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_divrem_1 -- mpn by limb division.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1)
-include_mpn(`x86/pentium4/sse2/divrem_1.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mod_1_1.asm b/gmp/mpn/x86/atom/sse2/mod_1_1.asm
deleted file mode 100644
index ae6581d9b6..0000000000
--- a/gmp/mpn/x86/atom/sse2/mod_1_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom/SSE2 mpn_mod_1_1.
-
-dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1_1p)
-include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mod_1_4.asm b/gmp/mpn/x86/atom/sse2/mod_1_4.asm
deleted file mode 100644
index 31faa3f0a3..0000000000
--- a/gmp/mpn/x86/atom/sse2/mod_1_4.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom/SSE2 mpn_mod_1_4.
-
-dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1s_4p)
-include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mul_1.asm b/gmp/mpn/x86/atom/sse2/mul_1.asm
deleted file mode 100644
index aa3bb974bb..0000000000
--- a/gmp/mpn/x86/atom/sse2/mul_1.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl  Intel Atom mpn_mul_1.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C			    cycles/limb
-C P5				 -
-C P6 model 0-8,10-12		 -
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 7.5
-C AMD K6			 -
-C AMD K7			 -
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_MUL,  16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC,  8)
-defframe(PARAM_DST,  4)
-
-define(`rp', `%edx')
-define(`up', `%esi')
-define(`n',  `%ecx')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(mpn_mul_1c)
-	movd	PARAM_CARRY, %mm6	C carry
-	jmp	L(ent)
-EPILOGUE()
-
-	ALIGN(8)			C for compact code
-PROLOGUE(mpn_mul_1)
-	pxor	%mm6, %mm6
-L(ent):	push	%esi			FRAME_pushl()
-	mov	PARAM_SRC, up
-	mov	PARAM_SIZE, %eax	C size
-	movd	PARAM_MUL, %mm7
-	movd	(up), %mm0
-	mov	%eax, n
-	and	$3, %eax
-	pmuludq	%mm7, %mm0
-	mov	PARAM_DST, rp
-	jz	L(lo0)
-	cmp	$2, %eax
-	lea	-16(up,%eax,4),up
-	lea	-16(rp,%eax,4),rp
-	jc	L(lo1)
-	jz	L(lo2)
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-L(lo0):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-L(lo3):	paddq	%mm0, %mm6
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 4(rp)
-	psrlq	$32, %mm6
-L(lo2):	paddq	%mm0, %mm6
-	movd	12(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 8(rp)
-	psrlq	$32, %mm6
-L(lo1):	paddq	%mm0, %mm6
-	sub	$4, n
-	movd	%mm6, 12(rp)
-	lea	16(up), up
-	ja	L(top)
-
-	psrlq	$32, %mm6
-	movd	%mm6, %eax
-	emms
-	pop	%esi			FRAME_popl()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/sse2/mul_basecase.asm b/gmp/mpn/x86/atom/sse2/mul_basecase.asm
deleted file mode 100644
index 97d3aeb5ad..0000000000
--- a/gmp/mpn/x86/atom/sse2/mul_basecase.asm
+++ /dev/null
@@ -1,501 +0,0 @@
-dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in
-dnl  a third limb vector.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C  * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
-C    4 large loops into one; we could use it for the outer loop branch.
-C  * Optimise code outside of inner loops.
-C  * Write combined addmul_1 feed-in a wind-down code, and use when iterating
-C    outer each loop.  ("Overlapping software pipelining")
-C  * Postpone push of ebx until we know vn > 1.  Perhaps use caller-saves regs
-C    for inlined mul_1, allowing us to postpone all pushes.
-C  * Perhaps write special code for vn <= un < M, for some small M.
-
-C void mpn_mul_basecase (mp_ptr wp,
-C                        mp_srcptr xp, mp_size_t xn,
-C                        mp_srcptr yp, mp_size_t yn);
-C
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`un',  `%ecx')
-define(`vp',  `%ebp')
-define(`vn',  `36(%esp)')
-
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_basecase)
-	push	%edi
-	push	%esi
-	push	%ebx
-	push	%ebp
-	mov	20(%esp), rp
-	mov	24(%esp), up
-	mov	28(%esp), un
-	mov	32(%esp), vp
-
-	movd	(up), %mm0
-	movd	(vp), %mm7
-	pmuludq	%mm7, %mm0
-	pxor	%mm6, %mm6
-
-	mov	un, %eax
-	and	$3, %eax
-	jz	L(of0)
-	cmp	$2, %eax
-	jc	L(of1)
-	jz	L(of2)
-
-C ================================================================
-	jmp	L(m3)
-	ALIGN(16)
-L(lm3):	movd	-4(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-	paddq	%mm0, %mm6
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -4(rp)
-	psrlq	$32, %mm6
-L(m3):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 4(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	sub	$4, un
-	movd	%mm6, 8(rp)
-	lea	16(up), up
-	ja	L(lm3)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 12(rp)
-
-	decl	vn
-	jz	L(done)
-	lea	-8(rp), rp
-
-L(ol3):	mov	28(%esp), un
-	neg	un
-	lea	4(vp), vp
-	movd	(vp), %mm7	C read next V limb
-	mov	24(%esp), up
-	lea	16(rp,un,4), rp
-
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	sar	$2, un
-	movd	4(up), %mm1
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	lea	-8(up), up
-	xor	%edx, %edx	C zero edx and CF
-	jmp	L(a3)
-
-L(la3):	movd	4(up), %mm1
-	adc	$0, %edx
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%ebx, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	movd	%mm0, %ebx
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%eax, 4(rp)
-L(a3):	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%ebx, 8(rp)
-	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	jnz	L(la3)
-
-	adc	un, %edx	C un is zero here
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %eax
-	adc	un, %eax
-	add	%ebx, 16(rp)
-	adc	un, %eax
-	mov	%eax, 20(rp)
-
-	decl	vn
-	jnz	L(ol3)
-	jmp	L(done)
-
-C ================================================================
-	ALIGN(16)
-L(lm0):	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-L(of0):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 4(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	12(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 8(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	sub	$4, un
-	movd	%mm6, 12(rp)
-	lea	16(up), up
-	ja	L(lm0)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 16(rp)
-
-	decl	vn
-	jz	L(done)
-	lea	-4(rp), rp
-
-L(ol0):	mov	28(%esp), un
-	neg	un
-	lea	4(vp), vp
-	movd	(vp), %mm7	C read next V limb
-	mov	24(%esp), up
-	lea	20(rp,un,4), rp
-
-	movd	(up), %mm1
-	pmuludq	%mm7, %mm1
-	sar	$2, un
-	movd	4(up), %mm0
-	lea	-4(up), up
-	movd	%mm1, %eax
-	pmuludq	%mm7, %mm0
-	xor	%edx, %edx	C zero edx and CF
-	jmp	L(a0)
-
-L(la0):	movd	4(up), %mm1
-	adc	$0, %edx
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%ebx, (rp)
-L(a0):	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	movd	%mm0, %ebx
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%eax, 4(rp)
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%ebx, 8(rp)
-	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	jnz	L(la0)
-
-	adc	un, %edx	C un is zero here
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %eax
-	adc	un, %eax
-	add	%ebx, 16(rp)
-	adc	un, %eax
-	mov	%eax, 20(rp)
-
-	decl	vn
-	jnz	L(ol0)
-	jmp	L(done)
-
-C ================================================================
-	ALIGN(16)
-L(lm1):	movd	-12(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-	paddq	%mm0, %mm6
-	movd	-8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -12(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	-4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -8(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -4(rp)
-	psrlq	$32, %mm6
-L(of1):	paddq	%mm0, %mm6
-	sub	$4, un
-	movd	%mm6, (rp)
-	lea	16(up), up
-	ja	L(lm1)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 4(rp)
-
-	decl	vn
-	jz	L(done)
-	lea	-16(rp), rp
-
-L(ol1):	mov	28(%esp), un
-	neg	un
-	lea	4(vp), vp
-	movd	(vp), %mm7	C read next V limb
-	mov	24(%esp), up
-	lea	24(rp,un,4), rp
-
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	sar	$2, un
-	movd	%mm0, %ebx
-	movd	4(up), %mm1
-	pmuludq	%mm7, %mm1
-	xor	%edx, %edx	C zero edx and CF
-	inc	un
-	jmp	L(a1)
-
-L(la1):	movd	4(up), %mm1
-	adc	$0, %edx
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-L(a1):	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%ebx, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	movd	%mm0, %ebx
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%eax, 4(rp)
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%ebx, 8(rp)
-	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	jnz	L(la1)
-
-	adc	un, %edx	C un is zero here
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %eax
-	adc	un, %eax
-	add	%ebx, 16(rp)
-	adc	un, %eax
-	mov	%eax, 20(rp)
-
-	decl	vn
-	jnz	L(ol1)
-	jmp	L(done)
-
-C ================================================================
-	ALIGN(16)
-L(lm2):	movd	-8(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-	paddq	%mm0, %mm6
-	movd	-4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -8(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -4(rp)
-	psrlq	$32, %mm6
-L(of2):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	sub	$4, un
-	movd	%mm6, 4(rp)
-	lea	16(up), up
-	ja	L(lm2)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 8(rp)
-
-	decl	vn
-	jz	L(done)
-	lea	-12(rp), rp
-
-L(ol2):	mov	28(%esp), un
-	neg	un
-	lea	4(vp), vp
-	movd	(vp), %mm7	C read next V limb
-	mov	24(%esp), up
-	lea	12(rp,un,4), rp
-
-	movd	(up), %mm1
-	pmuludq	%mm7, %mm1
-	sar	$2, un
-	movd	4(up), %mm0
-	lea	4(up), up
-	movd	%mm1, %eax
-	xor	%edx, %edx	C zero edx and CF
-	jmp	L(lo2)
-
-L(la2):	movd	4(up), %mm1
-	adc	$0, %edx
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%ebx, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	movd	%mm0, %ebx
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%eax, 4(rp)
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %edx
-	movd	%mm1, %eax
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%ebx, 8(rp)
-L(lo2):	psrlq	$32, %mm1
-	adc	%edx, %eax
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	jnz	L(la2)
-
-	adc	un, %edx	C un is zero here
-	add	%eax, 12(rp)
-	movd	%mm0, %ebx
-	psrlq	$32, %mm0
-	adc	%edx, %ebx
-	movd	%mm0, %eax
-	adc	un, %eax
-	add	%ebx, 16(rp)
-	adc	un, %eax
-	mov	%eax, 20(rp)
-
-	decl	vn
-	jnz	L(ol2)
-C	jmp	L(done)
-
-C ================================================================
-L(done):
-	emms
-	pop	%ebp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sse2/popcount.asm b/gmp/mpn/x86/atom/sse2/popcount.asm
deleted file mode 100644
index 7847aec8e6..0000000000
--- a/gmp/mpn/x86/atom/sse2/popcount.asm
+++ /dev/null
@@ -1,35 +0,0 @@
-dnl  Intel Atom mpn_popcount -- population count.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-MULFUNC_PROLOGUE(mpn_popcount)
-include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm b/gmp/mpn/x86/atom/sse2/sqr_basecase.asm
deleted file mode 100644
index af19ed854d..0000000000
--- a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm
+++ /dev/null
@@ -1,634 +0,0 @@
-dnl  x86 mpn_sqr_basecase -- square an mpn number, optimised for atom.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C  * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
-C    4 large loops into one; we could use it for the outer loop branch.
-C  * Optimise code outside of inner loops.
-C  * Write combined addmul_1 feed-in a wind-down code, and use when iterating
-C    outer each loop.  ("Overlapping software pipelining")
-C  * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone
-C    all pushes.
-C  * Perhaps write special code for n < M, for some small M.
-C  * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps
-C    with even less pipelined code.
-C  * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left.
-C    Consider breaking out earlier, saving high the cost of short loops.
-
-C void mpn_sqr_basecase (mp_ptr wp,
-C                        mp_srcptr xp, mp_size_t xn);
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`n',   `%ecx')
-
-define(`un',  `%ebp')
-
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sqr_basecase)
-	push	%edi
-	push	%esi
-	mov	12(%esp), rp
-	mov	16(%esp), up
-	mov	20(%esp), n
-
-	lea	4(rp), rp	C write triangular product starting at rp[1]
-	dec	n
-	movd	(up), %mm7
-
-	jz	L(one)
-	lea	4(up), up
-	push	%ebx
-	push	%ebp
-	mov	n, %eax
-
-	movd	(up), %mm0
-	neg	n
-	pmuludq	%mm7, %mm0
-	pxor	%mm6, %mm6
-	mov	n, un
-
-	and	$3, %eax
-	jz	L(of0)
-	cmp	$2, %eax
-	jc	L(of1)
-	jz	L(of2)
-
-C ================================================================
-	jmp	L(m3)
-	ALIGN(16)
-L(lm3):	movd	-4(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-	paddq	%mm0, %mm6
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -4(rp)
-	psrlq	$32, %mm6
-L(m3):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 4(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	add	$4, un
-	movd	%mm6, 8(rp)
-	lea	16(up), up
-	js	L(lm3)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 12(rp)
-
-	inc	n
-C	jz	L(done)
-  lea	-12(up), up
-  lea	4(rp), rp
-	jmp	L(ol2)
-
-C ================================================================
-	ALIGN(16)
-L(lm0):	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-L(of0):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 4(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	12(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, 8(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	add	$4, un
-	movd	%mm6, 12(rp)
-	lea	16(up), up
-	js	L(lm0)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 16(rp)
-
-	inc	n
-C	jz	L(done)
-  lea	-8(up), up
-  lea	8(rp), rp
-	jmp	L(ol3)
-
-C ================================================================
-	ALIGN(16)
-L(lm1):	movd	-12(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-	paddq	%mm0, %mm6
-	movd	-8(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -12(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	-4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -8(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -4(rp)
-	psrlq	$32, %mm6
-L(of1):	paddq	%mm0, %mm6
-	add	$4, un
-	movd	%mm6, (rp)
-	lea	16(up), up
-	js	L(lm1)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 4(rp)
-
-	inc	n
-	jz	L(done)		C goes away when we add special n=2 code
-  lea	-20(up), up
-  lea	-4(rp), rp
-	jmp	L(ol0)
-
-C ================================================================
-	ALIGN(16)
-L(lm2):	movd	-8(up), %mm0
-	pmuludq	%mm7, %mm0
-	psrlq	$32, %mm6
-	lea	16(rp), rp
-	paddq	%mm0, %mm6
-	movd	-4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -8(rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, -4(rp)
-	psrlq	$32, %mm6
-L(of2):	paddq	%mm0, %mm6
-	movd	4(up), %mm0
-	pmuludq	%mm7, %mm0
-	movd	%mm6, (rp)
-	psrlq	$32, %mm6
-	paddq	%mm0, %mm6
-	add	$4, un
-	movd	%mm6, 4(rp)
-	lea	16(up), up
-	js	L(lm2)
-
-	psrlq	$32, %mm6
-	movd	%mm6, 8(rp)
-
-	inc	n
-C	jz	L(done)
-  lea	-16(up), up
-C  lea	(rp), rp
-C	jmp	L(ol1)
-
-C ================================================================
-
-L(ol1):	lea	4(up,n,4), up
-	movd	(up), %mm7	C read next U invariant limb
-	lea	8(rp,n,4), rp
-	mov	n, un
-
-	movd	4(up), %mm1
-	pmuludq	%mm7, %mm1
-	sar	$2, un
-	movd	%mm1, %ebx
-	inc	un
-	jz	L(re1)
-
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	xor	%edx, %edx	C zero edx and CF
-	jmp	L(a1)
-
-L(la1):	adc	$0, %edx
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%eax, (rp)
-L(a1):	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	movd	%mm0, %eax
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%ebx, 4(rp)
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%eax, 8(rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	movd	4(up), %mm1
-	jnz	L(la1)
-
-	adc	un, %edx	C un is zero here
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	adc	un, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %eax
-	adc	un, %eax
-	add	%ebx, 4(rp)
-	adc	un, %eax
-	mov	%eax, 8(rp)
-
-	inc	n
-
-C ================================================================
-
-L(ol0):	lea	(up,n,4), up
-	movd	4(up), %mm7	C read next U invariant limb
-	lea	4(rp,n,4), rp
-	mov	n, un
-
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	sar	$2, un
-	movd	12(up), %mm1
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	xor	%edx, %edx	C zero edx and CF
-	jmp	L(a0)
-
-L(la0):	adc	$0, %edx
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	movd	%mm0, %eax
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%ebx, 4(rp)
-L(a0):	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%eax, 8(rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	movd	4(up), %mm1
-	jnz	L(la0)
-
-	adc	un, %edx	C un is zero here
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	adc	un, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %eax
-	adc	un, %eax
-	add	%ebx, 4(rp)
-	adc	un, %eax
-	mov	%eax, 8(rp)
-
-	inc	n
-
-C ================================================================
-
-L(ol3):	lea	12(up,n,4), up
-	movd	-8(up), %mm7	C read next U invariant limb
-	lea	(rp,n,4), rp	C put rp back
-	mov	n, un
-
-	movd	-4(up), %mm1
-	pmuludq	%mm7, %mm1
-	sar	$2, un
-	movd	%mm1, %ebx
-	movd	(up), %mm0
-	xor	%edx, %edx	C zero edx and CF
-	jmp	L(a3)
-
-L(la3):	adc	$0, %edx
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	movd	%mm0, %eax
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%ebx, 4(rp)
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%eax, 8(rp)
-L(a3):	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	movd	4(up), %mm1
-	jnz	L(la3)
-
-	adc	un, %edx	C un is zero here
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	adc	un, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %eax
-	adc	un, %eax
-	add	%ebx, 4(rp)
-	adc	un, %eax
-	mov	%eax, 8(rp)
-
-	inc	n
-
-C ================================================================
-
-L(ol2):	lea	8(up,n,4), up
-	movd	-4(up), %mm7	C read next U invariant limb
-	lea	12(rp,n,4), rp
-	mov	n, un
-
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	xor	%edx, %edx
-	sar	$2, un
-	movd	4(up), %mm1
-	test	un, un		C clear carry
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	inc	un
-	jnz	L(a2)
-	jmp	L(re2)
-
-L(la2):	adc	$0, %edx
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-L(a2):	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	movd	8(up), %mm0
-	pmuludq	%mm7, %mm0
-	adc	$0, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	movd	%mm0, %eax
-	movd	12(up), %mm1
-	pmuludq	%mm7, %mm1
-	adc	$0, %edx
-	add	%ebx, 4(rp)
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	lea	16(up), up
-	movd	(up), %mm0
-	adc	$0, %edx
-	add	%eax, 8(rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %edx
-	pmuludq	%mm7, %mm0
-	inc	un
-	movd	4(up), %mm1
-	jnz	L(la2)
-
-	adc	un, %edx	C un is zero here
-	add	%ebx, 12(rp)
-	movd	%mm0, %eax
-	pmuludq	%mm7, %mm1
-	lea	16(rp), rp
-	psrlq	$32, %mm0
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	adc	un, %edx
-	add	%eax, (rp)
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %eax
-	adc	un, %eax
-	add	%ebx, 4(rp)
-	adc	un, %eax
-	mov	%eax, 8(rp)
-
-	inc	n
-	jmp	L(ol1)
-
-C ================================================================
-L(re2):	psrlq	$32, %mm0
-	movd	(up), %mm7	C read next U invariant limb
-	adc	%edx, %eax
-	movd	%mm0, %edx
-	movd	%mm1, %ebx
-	adc	un, %edx
-	add	%eax, (rp)
-	lea	4(rp), rp
-	psrlq	$32, %mm1
-	adc	%edx, %ebx
-	movd	%mm1, %eax
-	movd	4(up), %mm1
-	adc	un, %eax
-	add	%ebx, (rp)
-	pmuludq	%mm7, %mm1
-	adc	un, %eax
-	mov	%eax, 4(rp)
-	movd	%mm1, %ebx
-
-L(re1):	psrlq	$32, %mm1
-	add	%ebx, 4(rp)
-	movd	%mm1, %eax
-	adc	un, %eax
-	xor	n, n		C make n zeroness assumption below true
-	mov	%eax, 8(rp)
-
-L(done):			C n is zero here
-	mov	24(%esp), up
-	mov	28(%esp), %eax
-
-	movd	(up), %mm0
-	inc	%eax
-	pmuludq	%mm0, %mm0
-	lea	4(up), up
-	mov	20(%esp), rp
-	shr	%eax
-	movd	%mm0, (rp)
-	psrlq	$32, %mm0
-	lea	-12(rp), rp
-	mov	%eax, 28(%esp)
-	jnc	L(odd)
-
-	movd	%mm0, %ebp
-	movd	(up), %mm0
-	lea	8(rp), rp
-	pmuludq	%mm0, %mm0
-	lea	-4(up), up
-	add	8(rp), %ebp
-	movd	%mm0, %edx
-	adc	12(rp), %edx
-	rcr	n
-	jmp	L(ent)
-
-C	ALIGN(16)		C alignment seems irrelevant
-L(top):	movd	(up), %mm1
-	adc	n, n
-	movd	%mm0, %eax
-	pmuludq	%mm1, %mm1
-	movd	4(up), %mm0
-	adc	(rp), %eax
-	movd	%mm1, %ebx
-	pmuludq	%mm0, %mm0
-	psrlq	$32, %mm1
-	adc	4(rp), %ebx
-	movd	%mm1, %ebp
-	movd	%mm0, %edx
-	adc	8(rp), %ebp
-	adc	12(rp), %edx
-	rcr	n		C FIXME: isn't this awfully slow on atom???
-	adc	%eax, (rp)
-	adc	%ebx, 4(rp)
-L(ent):	lea	8(up), up
-	adc	%ebp, 8(rp)
-	psrlq	$32, %mm0
-	adc	%edx, 12(rp)
-L(odd):	decl	28(%esp)
-	lea	16(rp), rp
-	jnz	L(top)
-
-L(end):	adc	n, n
-	movd	%mm0, %eax
-	adc	n, %eax
-	mov	%eax, (rp)
-
-L(rtn):	emms
-	pop	%ebp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	ret
-
-L(one):	pmuludq	%mm7, %mm7
-	movq	%mm7, -4(rp)
-	emms
-	pop	%esi
-	pop	%edi
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sublsh1_n.asm b/gmp/mpn/x86/atom/sublsh1_n.asm
deleted file mode 100644
index d3e7e5b5cb..0000000000
--- a/gmp/mpn/x86/atom/sublsh1_n.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1)
-include_mpn(`x86/k7/sublsh1_n.asm')
diff --git a/gmp/mpn/x86/atom/sublsh2_n.asm b/gmp/mpn/x86/atom/sublsh2_n.asm
deleted file mode 100644
index 79405cf9f4..0000000000
--- a/gmp/mpn/x86/atom/sublsh2_n.asm
+++ /dev/null
@@ -1,57 +0,0 @@
-dnl  Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
-
-dnl  Contributed to the GNU project by Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 30)
-
-ifdef(`OPERATION_addlsh2_n', `
-	define(M4_inst,		adcl)
-	define(M4_opp,		subl)
-	define(M4_function,	mpn_addlsh2_n)
-	define(M4_function_c,	mpn_addlsh2_nc)
-	define(M4_ip_function_c, mpn_addlsh2_nc_ip1)
-	define(M4_ip_function,	mpn_addlsh2_n_ip1)
-',`ifdef(`OPERATION_sublsh2_n', `
-	define(M4_inst,		sbbl)
-	define(M4_opp,		addl)
-	define(M4_function,	mpn_sublsh2_n)
-	define(M4_function_c,	mpn_sublsh2_nc)
-	define(M4_ip_function_c, mpn_sublsh2_nc_ip1)
-	define(M4_ip_function,	mpn_sublsh2_n_ip1)
-',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1)
-
-include_mpn(`x86/atom/aorslshC_n.asm')
diff --git a/gmp/mpn/x86/bd1/gmp-mparam.h b/gmp/mpn/x86/bd1/gmp-mparam.h
deleted file mode 100644
index 7d80a1cb4c..0000000000
--- a/gmp/mpn/x86/bd1/gmp-mparam.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 3600 MHz Bulldozer Zambezi */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              3
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           27
-
-#define MUL_TOOM22_THRESHOLD                32
-#define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD               154
-#define MUL_TOOM6H_THRESHOLD               230
-#define MUL_TOOM8H_THRESHOLD               354
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     110
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 48
-#define SQR_TOOM3_THRESHOLD                 87
-#define SQR_TOOM4_THRESHOLD                204
-#define SQR_TOOM6_THRESHOLD                315
-#define SQR_TOOM8_THRESHOLD                430
-
-#define MULMID_TOOM42_THRESHOLD             48
-
-#define MULMOD_BNM1_THRESHOLD               21
-#define SQRMOD_BNM1_THRESHOLD               23
-
-#define MUL_FFT_MODF_THRESHOLD             840  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    840, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
-    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     31, 6}, \
-    {     63, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     51, 8}, {     27, 7}, {     55, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 7}, {     79, 9}, \
-    {     23, 8}, {     55, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     79,10}, {     47, 9}, {    103,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
-    {     95, 9}, {    191,10}, {    111,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    159,11}, {     95,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    271,11}, \
-    {    159,10}, {    319, 9}, {    639,11}, {    191,10}, \
-    {    383, 9}, {    767,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    639,12}, {    191,11}, {    383,10}, \
-    {    799,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,11}, {    607,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    735,10}, {   1471, 9}, {   2943,12}, \
-    {    383,11}, {    799,10}, {   1599,11}, {    863,10}, \
-    {   1727,12}, {    447,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,10}, \
-    {   2943,13}, {    383,12}, {    767,11}, {   1599,12}, \
-    {    831,11}, {   1727,10}, {   3455,14}, {    255,13}, \
-    {    511,12}, {   1087,11}, {   2239,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1727,11}, {   3455,13}, {    895,12}, \
-    {   1919,11}, {   3839,12}, {   1983,11}, {   3967,10}, \
-    {   7935,14}, {    511,13}, {   1023,12}, {   2239,13}, \
-    {   1151,12}, {   2495,11}, {   4991,13}, {   1279,12}, \
-    {   2623,13}, {   1407,12}, {   2943,14}, {    767,13}, \
-    {   1535,12}, {   3071,13}, {   1663,12}, {   3455,13}, \
-    {   1791,12}, {   3583,13}, {   1919,12}, {   3967,11}, \
-    {   7935,15}, {    511,14}, {   1023,13}, {   2175,12}, \
-    {   4479,13}, {   2431,12}, {   4991,14}, {   1279,13}, \
-    {   2943,12}, {   6015,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD                 7808
-
-#define SQR_FFT_MODF_THRESHOLD             690  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    690, 5}, {     28, 6}, {     15, 5}, {     32, 6}, \
-    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     40, 7}, {     21, 6}, \
-    {     43, 7}, {     23, 6}, {     47, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 7}, {     79, 8}, {     43, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     63, 8}, {    127, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    127,10}, {     79, 9}, {    167,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
-    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
-    {    271,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    191,10}, {    383,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    511, 9}, {   1023,10}, {    543,11}, \
-    {    287,10}, {    607, 9}, {   1215,11}, {    319,10}, \
-    {    639,12}, {    191,11}, {    383,10}, {    799,11}, \
-    {    415,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    543,10}, {   1087,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,10}, {   1471,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    863,12}, {    447,11}, {    927,13}, \
-    {    255,12}, {    511,11}, {   1055,10}, {   2111,11}, \
-    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,10}, {   3455,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2111,12}, {   1087,11}, \
-    {   2239,10}, {   4479,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1727,11}, {   3455,13}, {    895,12}, {   1983,11}, \
-    {   3967,14}, {    511,13}, {   1023,12}, {   2239,11}, \
-    {   4479,13}, {   1151,12}, {   2495,11}, {   4991,10}, \
-    {   9983,13}, {   1279,12}, {   2623,13}, {   1407,12}, \
-    {   2943,14}, {    767,13}, {   1663,12}, {   3455,13}, \
-    {   1791,12}, {   3583,13}, {   1919,12}, {   3967,15}, \
-    {    511,14}, {   1023,13}, {   2175,12}, {   4479,13}, \
-    {   2431,12}, {   4991,11}, {   9983,14}, {   1279,13}, \
-    {   2687,12}, {   5375,13}, {   2943,12}, {   5887,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 166
-#define SQR_FFT_THRESHOLD                 6784
-
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  31
-#define MULLO_MUL_N_THRESHOLD            14709
-
-#define DC_DIV_QR_THRESHOLD                 53
-#define DC_DIVAPPR_Q_THRESHOLD             230
-#define DC_BDIV_QR_THRESHOLD                50
-#define DC_BDIV_Q_THRESHOLD                136
-
-#define INV_MULMOD_BNM1_THRESHOLD           78
-#define INV_NEWTON_THRESHOLD               202
-#define INV_APPR_THRESHOLD                 202
-
-#define BINV_NEWTON_THRESHOLD              236
-#define REDC_1_TO_REDC_N_THRESHOLD          55
-
-#define MU_DIV_QR_THRESHOLD               1442
-#define MU_DIVAPPR_Q_THRESHOLD            1652
-#define MUPI_DIV_QR_THRESHOLD               81
-#define MU_BDIV_QR_THRESHOLD              1787
-#define MU_BDIV_Q_THRESHOLD               1685
-
-#define POWM_SEC_TABLE  1,22,194,376,692,2657
-
-#define MATRIX22_STRASSEN_THRESHOLD         21
-#define HGCD_THRESHOLD                      85
-#define HGCD_APPR_THRESHOLD                 50
-#define HGCD_REDUCE_THRESHOLD             4455
-#define GCD_DC_THRESHOLD                   456
-#define GCDEXT_DC_THRESHOLD                345
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        27
-#define SET_STR_DC_THRESHOLD               100
-#define SET_STR_PRECOMPUTE_THRESHOLD       960
-
-#define FAC_DSC_THRESHOLD                  208
-#define FAC_ODD_THRESHOLD                   26
diff --git a/gmp/mpn/x86/bd2/gmp-mparam.h b/gmp/mpn/x86/bd2/gmp-mparam.h
deleted file mode 100644
index c5a53f2f9f..0000000000
--- a/gmp/mpn/x86/bd2/gmp-mparam.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 4000 MHz Piledriver Vishera  */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.8 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        19
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              3
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
-
-#define MUL_TOOM22_THRESHOLD                30
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               153
-#define MUL_TOOM6H_THRESHOLD               222
-#define MUL_TOOM8H_THRESHOLD               357
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      99
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      96
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 38
-#define SQR_TOOM3_THRESHOLD                 89
-#define SQR_TOOM4_THRESHOLD                196
-#define SQR_TOOM6_THRESHOLD                290
-#define SQR_TOOM8_THRESHOLD                454
-
-#define MULMID_TOOM42_THRESHOLD             68
-
-#define MULMOD_BNM1_THRESHOLD               19
-#define SQRMOD_BNM1_THRESHOLD               22
-
-#define MUL_FFT_MODF_THRESHOLD             636  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    636, 5}, {     27, 6}, {     27, 7}, {     15, 6}, \
-    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     23, 6}, {     47, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     31, 7}, {     63, 8}, {     43, 9}, \
-    {     23, 8}, {     55, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
-    {     95, 9}, {    191,11}, {     63,10}, {    127, 6}, \
-    {   2111, 5}, {   4351, 6}, {   2239, 7}, {   1215, 9}, \
-    {    311, 8}, {    639,10}, {    175, 8}, {    703,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
-    {    159, 9}, {    671,11}, {    191,10}, {    383, 9}, \
-    {    799,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,12}, {    447,11}, {    895,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,10}, \
-    {   2431,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1471,13}, {    383,12}, {    767,11}, {   1599,12}, \
-    {    831,11}, {   1727,10}, {   3455,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1023,11}, {   2047,12}, \
-    {   1087,11}, {   2239,10}, {   4479,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1727,11}, {   3455,13}, {    895,12}, \
-    {   1919,14}, {    511,13}, {   1023,12}, {   2239,11}, \
-    {   4479,13}, {   1151,12}, {   2495,11}, {   4991,13}, \
-    {   1279,12}, {   2623,13}, {   1407,12}, {   2943,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,12}, \
-    {   3455,13}, {   1919,15}, {    511,14}, {   1023,13}, \
-    {   2175,12}, {   4479,13}, {   2431,12}, {   4991,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {   1535,13}, \
-    {   3455,14}, {   1791,13}, {   3967,12}, {   7935,11}, \
-    {  15871,15}, {   1023,14}, {   2047,13}, {   4479,14}, \
-    {   2303,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 172
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             606  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    606, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
-    {     29, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
-    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95,10}, {     31, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
-    {     95, 9}, {    191,11}, {     63,10}, {    159,11}, \
-    {     95,10}, {    191, 6}, {   3135, 5}, {   6399, 6}, \
-    {   3455, 8}, {    895, 9}, {    479, 8}, {    991,10}, \
-    {    255, 9}, {    575,11}, {    159, 9}, {    639,10}, \
-    {    335, 8}, {   1343,10}, {    351,11}, {    191, 9}, \
-    {    799,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543,11}, {    287,10}, {    607, 9}, {   1215,10}, \
-    {    671,12}, {    191,11}, {    383,10}, {    767, 9}, \
-    {   1535,10}, {    799,11}, {    415,10}, {    863,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,10}, {   1087,11}, {    607,12}, {    319,11}, \
-    {    671,10}, {   1343,11}, {    735,12}, {    383,11}, \
-    {    799,10}, {   1599,11}, {    863,12}, {    447,11}, \
-    {    927,13}, {    255,12}, {    511,11}, {   1087,12}, \
-    {    575,11}, {   1215,12}, {    639,11}, {   1343,12}, \
-    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1727,12}, {    895,11}, \
-    {   1791,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1087,11}, {   2239,10}, {   4479,12}, {   1215,13}, \
-    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1727,13}, {    895,12}, {   1919,14}, {    511,13}, \
-    {   1023,12}, {   2239,11}, {   4479,13}, {   1151,12}, \
-    {   2495,11}, {   4991,13}, {   1279,12}, {   2623,13}, \
-    {   1407,12}, {   2943,14}, {    767,13}, {   1663,12}, \
-    {   3455,13}, {   1791,12}, {   3583,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2175,12}, {   4479,13}, \
-    {   2431,12}, {   4991,14}, {   1279,13}, {   2943,12}, \
-    {   5887,14}, {   1535,13}, {   3455,14}, {   1791,13}, \
-    {   3967,15}, {   1023,14}, {   2047,13}, {   4479,14}, \
-    {   2303,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 160
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  34
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 67
-#define DC_DIVAPPR_Q_THRESHOLD             196
-#define DC_BDIV_QR_THRESHOLD                67
-#define DC_BDIV_Q_THRESHOLD                112
-
-#define INV_MULMOD_BNM1_THRESHOLD           70
-#define INV_NEWTON_THRESHOLD               262
-#define INV_APPR_THRESHOLD                 222
-
-#define BINV_NEWTON_THRESHOLD              288
-#define REDC_1_TO_REDC_N_THRESHOLD          67
-
-#define MU_DIV_QR_THRESHOLD               1718
-#define MU_DIVAPPR_Q_THRESHOLD            1652
-#define MUPI_DIV_QR_THRESHOLD              122
-#define MU_BDIV_QR_THRESHOLD              1387
-#define MU_BDIV_Q_THRESHOLD               1528
-
-#define POWM_SEC_TABLE  1,16,69,508,1378,2657,2825
-
-#define MATRIX22_STRASSEN_THRESHOLD         19
-#define HGCD_THRESHOLD                      61
-#define HGCD_APPR_THRESHOLD                 50
-#define HGCD_REDUCE_THRESHOLD             3389
-#define GCD_DC_THRESHOLD                   492
-#define GCDEXT_DC_THRESHOLD                345
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                 9
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               189
-#define SET_STR_PRECOMPUTE_THRESHOLD       541
-
-#define FAC_DSC_THRESHOLD                  141
-#define FAC_ODD_THRESHOLD                   29
diff --git a/gmp/mpn/x86/bdiv_dbm1c.asm b/gmp/mpn/x86/bdiv_dbm1c.asm
index 0288c475cd..dbee28fd94 100644
--- a/gmp/mpn/x86/bdiv_dbm1c.asm
+++ b/gmp/mpn/x86/bdiv_dbm1c.asm
@@ -1,51 +1,32 @@
 dnl  x86 mpn_bdiv_dbm1.
 
-dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 
-include(`../config.m4')
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12)
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)		 5.1
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)	13.67
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom
-C AMD K6
-C AMD K7			 3.5
-C AMD K8
-C AMD K10
+include(`../config.m4')
 
+C	    cycles/limb
+C K7:		 3.5
+C P4 m0:	  ?
+C P4 m1:	  ?
+C P4 m2:	13.67
+C P4 m3:	  ?
+C P4 m4:	  ?
+C P6-13:	 5.1
 
 C TODO
 C  * Optimize for more x86 processors
@@ -76,17 +57,18 @@ PROLOGUE(mpn_bdiv_dbm1c)
 	cmp	$2, %eax
 	jc	L(b1)
 	jz	L(b2)
-
-L(b3):	lea	-8(%esi), %esi
-	lea	8(%edi), %edi
-	add	$-3, %ebp
-	jmp	L(3)
+	jmp	L(b3)
 
 L(b0):	mov	4(%esi), %eax
 	lea	-4(%esi), %esi
 	lea	12(%edi), %edi
 	add	$-4, %ebp
 	jmp	L(0)
+L(b3):
+	lea	-8(%esi), %esi
+	lea	8(%edi), %edi
+	add	$-3, %ebp
+	jmp	L(3)
 
 L(b2):	mov	4(%esi), %eax
 	lea	4(%esi), %esi
@@ -95,7 +77,8 @@ L(b2):	mov	4(%esi), %eax
 	jmp	L(2)
 
 	ALIGN(8)
-L(top):	mov	4(%esi), %eax
+L(top):
+	mov	4(%esi), %eax
 	mul	%ecx
 	lea	16(%edi), %edi
 	sub	%eax, %ebx
diff --git a/gmp/mpn/x86/bdiv_q_1.asm b/gmp/mpn/x86/bdiv_q_1.asm
deleted file mode 100644
index 825cd296a1..0000000000
--- a/gmp/mpn/x86/bdiv_q_1.asm
+++ /dev/null
@@ -1,208 +0,0 @@
-dnl  x86 mpn_bdiv_q_1 -- mpn by limb exact division.
-
-dnl  Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
-
-dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C     cycles/limb
-C P54    30.0
-C P55    29.0
-C P6     13.0 odd divisor, 12.0 even (strangely)
-C K6     14.0
-C K7     12.0
-C P4     42.0
-
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-
-defframe(PARAM_SHIFT,  24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE,   12)
-defframe(PARAM_SRC,    8)
-defframe(PARAM_DST,    4)
-
-dnl  re-use parameter space
-define(VAR_INVERSE,`PARAM_SRC')
-
-	TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C		    mp_limb_t inverse, int shift)
-
-	ALIGN(16)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_SHIFT, %ecx
-	pushl	%ebp	FRAME_pushl()
-
-	movl	PARAM_INVERSE, %eax
-	movl	PARAM_SIZE, %ebp
-	pushl	%ebx	FRAME_pushl()
-L(common):
-	pushl	%edi	FRAME_pushl()
-	pushl	%esi	FRAME_pushl()
-
-	movl	PARAM_SRC, %esi
-	movl	PARAM_DST, %edi
-
-	leal	(%esi,%ebp,4), %esi	C src end
-	leal	(%edi,%ebp,4), %edi	C dst end
-	negl	%ebp			C -size
-
-	movl	%eax, VAR_INVERSE
-	movl	(%esi,%ebp,4), %eax	C src[0]
-
-	xorl	%ebx, %ebx
-	xorl	%edx, %edx
-
-	incl	%ebp
-	jz	L(one)
-
-	movl	(%esi,%ebp,4), %edx	C src[1]
-
-	shrdl(	%cl, %edx, %eax)
-
-	movl	VAR_INVERSE, %edx
-	jmp	L(entry)
-
-
-	ALIGN(8)
-	nop	C k6 code alignment
-	nop
-L(top):
-	C eax	q
-	C ebx	carry bit, 0 or -1
-	C ecx	shift
-	C edx	carry limb
-	C esi	src end
-	C edi	dst end
-	C ebp	counter, limbs, negative
-
-	movl	-4(%esi,%ebp,4), %eax
-	subl	%ebx, %edx		C accumulate carry bit
-
-	movl	(%esi,%ebp,4), %ebx
-
-	shrdl(	%cl, %ebx, %eax)
-
-	subl	%edx, %eax		C apply carry limb
-	movl	VAR_INVERSE, %edx
-
-	sbbl	%ebx, %ebx
-
-L(entry):
-	imull	%edx, %eax
-
-	movl	%eax, -4(%edi,%ebp,4)
-	movl	PARAM_DIVISOR, %edx
-
-	mull	%edx
-
-	incl	%ebp
-	jnz	L(top)
-
-
-	movl	-4(%esi), %eax		C src high limb
-L(one):
-	shrl	%cl, %eax
-	popl	%esi	FRAME_popl()
-
-	addl	%ebx, %eax		C apply carry bit
-
-	subl	%edx, %eax		C apply carry limb
-
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, -4(%edi)
-
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-
-	ret
-
-EPILOGUE()
-
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                           mp_limb_t divisor);
-C
-
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_DIVISOR, %eax
-	pushl	%ebp	FRAME_pushl()
-
-	movl	$-1, %ecx		C shift count
-	movl	PARAM_SIZE, %ebp
-
-	pushl	%ebx	FRAME_pushl()
-
-L(strip_twos):
-	incl	%ecx
-
-	shrl	%eax
-	jnc	L(strip_twos)
-
-	leal	1(%eax,%eax), %ebx	C d without twos
-	andl	$127, %eax		C d/2, 7 bits
-
-ifdef(`PIC',`
-	LEA(	binvert_limb_table, %edx)
-	movzbl	(%eax,%edx), %eax		C inv 8 bits
-',`
-	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
-')
-
-	leal	(%eax,%eax), %edx	C 2*inv
-	movl	%ebx, PARAM_DIVISOR	C d without twos
-	imull	%eax, %eax		C inv*inv
-	imull	%ebx, %eax		C inv*inv*d
-	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
-
-	leal	(%edx,%edx), %eax	C 2*inv
-	imull	%edx, %edx		C inv*inv
-	imull	%ebx, %edx		C inv*inv*d
-	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
-
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
-	pushl	%eax	FRAME_pushl()
-	imull	PARAM_DIVISOR, %eax
-	cmpl	$1, %eax
-	popl	%eax	FRAME_popl()')
-
-	jmp	L(common)
-EPILOGUE()
-
diff --git a/gmp/mpn/x86/bobcat/gmp-mparam.h b/gmp/mpn/x86/bobcat/gmp-mparam.h
deleted file mode 100644
index 198081f9fd..0000000000
--- a/gmp/mpn/x86/bobcat/gmp-mparam.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* x86/bobcat gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1600 MHz AMD Bobcat Zacate E-350 */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         12
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           40
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                90
-#define MUL_TOOM44_THRESHOLD               154
-#define MUL_TOOM6H_THRESHOLD               270
-#define MUL_TOOM8H_THRESHOLD               490
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     107
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      95
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     110
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 38
-#define SQR_TOOM3_THRESHOLD                121
-#define SQR_TOOM4_THRESHOLD                212
-#define SQR_TOOM6_THRESHOLD                303
-#define SQR_TOOM8_THRESHOLD                454
-
-#define MULMID_TOOM42_THRESHOLD             74
-
-#define MULMOD_BNM1_THRESHOLD               18
-#define SQRMOD_BNM1_THRESHOLD               23
-
-#define MUL_FFT_MODF_THRESHOLD             660  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    660, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     27, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     63, 8}, {    127, 9}, {     79,10}, \
-    {     47, 9}, {    103,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
-    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543,11}, \
-    {    159, 9}, {    639,10}, {    335, 9}, {    671,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
-    {    799,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,10}, {    671,12}, {    191,11}, {    383,10}, \
-    {    799, 9}, {   1599,11}, {    415,13}, {    127,12}, \
-    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
-    {   1215,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,12}, {    447,11}, {    991,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1215,13}, {    639,12}, {   1471,13}, {    767,12}, \
-    {   1727,13}, {    895,12}, {   1919,14}, {    511,13}, \
-    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
-    {   1407,14}, {    767,13}, {   1663,12}, {   3455,13}, \
-    {   1919,15}, {    511,14}, {   1023,13}, {   2175,12}, \
-    {   4479,13}, {   2431,14}, {   1279,13}, {   2943,12}, \
-    {   5887,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 139
-#define MUL_FFT_THRESHOLD                 7552
-
-#define SQR_FFT_MODF_THRESHOLD             606  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    606, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
-    {     28, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
-    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543, 8}, {   1087,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671, 8}, {   1343,10}, \
-    {    351,11}, {    191,10}, {    383, 9}, {    767,10}, \
-    {    399, 9}, {    799,10}, {    415, 9}, {    831,12}, \
-    {    127,11}, {    255,10}, {    511, 9}, {   1023,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,11}, {    319,10}, {    671, 9}, {   1343,12}, \
-    {    191,11}, {    383,10}, {    799,11}, {    415,10}, \
-    {    831,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    543,10}, {   1087,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,10}, {   1471,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    863,12}, {    447,11}, {    991,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1471,13}, {    383,12}, {    767,11}, {   1599,12}, \
-    {    831,11}, {   1727,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1215,13}, {    639,12}, {   1471,13}, \
-    {    767,12}, {   1727,13}, {    895,12}, {   1983,14}, \
-    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
-    {   2431,13}, {   1407,14}, {    767,13}, {   1663,12}, \
-    {   3455,13}, {   1919,15}, {    511,14}, {   1023,13}, \
-    {   2175,12}, {   4479,13}, {   2431,14}, {   1279,13}, \
-    {   2943,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 147
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             5
-#define MULLO_DC_THRESHOLD                  45
-#define MULLO_MUL_N_THRESHOLD            14281
-
-#define DC_DIV_QR_THRESHOLD                 71
-#define DC_DIVAPPR_Q_THRESHOLD             238
-#define DC_BDIV_QR_THRESHOLD                67
-#define DC_BDIV_Q_THRESHOLD                151
-
-#define INV_MULMOD_BNM1_THRESHOLD           66
-#define INV_NEWTON_THRESHOLD               228
-#define INV_APPR_THRESHOLD                 222
-
-#define BINV_NEWTON_THRESHOLD              270
-#define REDC_1_TO_REDC_N_THRESHOLD          71
-
-#define MU_DIV_QR_THRESHOLD               1718
-#define MU_DIVAPPR_Q_THRESHOLD            1718
-#define MUPI_DIV_QR_THRESHOLD               91
-#define MU_BDIV_QR_THRESHOLD              1589
-#define MU_BDIV_Q_THRESHOLD               1718
-
-#define POWM_SEC_TABLE  1,16,96,416,1185
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                      88
-#define HGCD_APPR_THRESHOLD                137
-#define HGCD_REDUCE_THRESHOLD             3664
-#define GCD_DC_THRESHOLD                   465
-#define GCDEXT_DC_THRESHOLD                345
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                18
-#define GET_STR_PRECOMPUTE_THRESHOLD        34
-#define SET_STR_DC_THRESHOLD               270
-#define SET_STR_PRECOMPUTE_THRESHOLD       828
-
-#define FAC_DSC_THRESHOLD                  256
-#define FAC_ODD_THRESHOLD                   34
diff --git a/gmp/mpn/x86/cnd_aors_n.asm b/gmp/mpn/x86/cnd_aors_n.asm
deleted file mode 100644
index 74f4917ecc..0000000000
--- a/gmp/mpn/x86/cnd_aors_n.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl  X86 mpn_cnd_add_n, mpn_cnd_sub_n
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9   (Banias)		 ?
-C P6 model 13  (Dothan)		 5.4
-C P4 model 0-1 (Willamette)	 ?
-C P4 model 2   (Northwood)	14.5
-C P4 model 3-4 (Prescott)	21
-C Intel atom			11
-C AMD K6			 ?
-C AMD K7			 3.4
-C AMD K8			 ?
-
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebp')
-define(`n',   `%ecx')
-define(`cnd', `20(%esp)')
-define(`cy',  `%edx')
-
-ifdef(`OPERATION_cnd_add_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_cnd_add_n)')
-ifdef(`OPERATION_cnd_sub_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_cnd_sub_n)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	add	$-16, %esp
-	mov	%ebp, (%esp)
-	mov	%ebx, 4(%esp)
-	mov	%esi, 8(%esp)
-	mov	%edi, 12(%esp)
-
-	C make cnd into a full mask
-	mov	cnd, %eax
-	neg	%eax
-	sbb	%eax, %eax
-	mov	%eax, cnd
-
-	C load parameters into registers
-	mov	24(%esp), rp
-	mov	28(%esp), up
-	mov	32(%esp), vp
-	mov	36(%esp), n
-
-	mov	(vp), %eax
-	mov	(up), %ebx
-
-	C put operand pointers just beyond their last limb
-	lea	(vp,n,4), vp
-	lea	(up,n,4), up
-	lea	-4(rp,n,4), rp
-	neg	n
-
-	and	cnd, %eax
-	ADDSUB	%eax, %ebx
-	sbb	cy, cy
-	inc	n
-	je	L(end)
-
-	ALIGN(16)
-L(top):	mov	(vp,n,4), %eax
-	and	cnd, %eax
-	mov	%ebx, (rp,n,4)
-	mov	(up,n,4), %ebx
-	add	cy, cy
-	ADCSBB	%eax, %ebx
-	sbb	cy, cy
-	inc	n
-	jne	L(top)
-
-L(end):	mov	%ebx, (rp)
-	xor	%eax, %eax
-	sub	cy, %eax
-
-	mov	(%esp), %ebp
-	mov	4(%esp), %ebx
-	mov	8(%esp), %esi
-	mov	12(%esp), %edi
-	add	$16, %esp
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/copyd.asm b/gmp/mpn/x86/copyd.asm
index 51fa19568b..4ce3bbbc69 100644
--- a/gmp/mpn/x86/copyd.asm
+++ b/gmp/mpn/x86/copyd.asm
@@ -1,42 +1,31 @@
 dnl  x86 mpn_copyd -- copy limb vector, decrementing.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C     cycles/limb  startup (approx)
-C P5	  1.0	      40
-C P6	  2.4	      70
-C K6	  1.0	      55
-C K7	  1.3	      75
-C P4	  2.6	     175
+C P5:     1.0         40
+C P6      2.4         70
+C K6      1.0         55
+C K7:     1.3         75
+C P4:     2.6        175
 C
 C (Startup time includes some function call overheads.)
 
diff --git a/gmp/mpn/x86/copyi.asm b/gmp/mpn/x86/copyi.asm
index f6b0354b4f..c6bbaeee65 100644
--- a/gmp/mpn/x86/copyi.asm
+++ b/gmp/mpn/x86/copyi.asm
@@ -1,42 +1,31 @@
 dnl  x86 mpn_copyi -- copy limb vector, incrementing.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C     cycles/limb  startup (approx)
-C P5	  1.0	      35
-C P6	  0.75	      45
-C K6	  1.0	      30
-C K7	  1.3	      65
-C P4	  1.0	     120
+C P5:     1.0         35
+C P6      0.75        45
+C K6      1.0         30
+C K7:     1.3         65
+C P4:     1.0        120
 C
 C (Startup time includes some function call overheads.)
 
diff --git a/gmp/mpn/x86/core2/gmp-mparam.h b/gmp/mpn/x86/core2/gmp-mparam.h
deleted file mode 100644
index b370eb5877..0000000000
--- a/gmp/mpn/x86/core2/gmp-mparam.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* x86/core2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2133 MHz Core 2 (65nm) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              3
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           19
-
-#define MUL_TOOM22_THRESHOLD                26
-#define MUL_TOOM33_THRESHOLD                90
-#define MUL_TOOM44_THRESHOLD               144
-#define MUL_TOOM6H_THRESHOLD               286
-#define MUL_TOOM8H_THRESHOLD               430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     140
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     136
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 34
-#define SQR_TOOM3_THRESHOLD                114
-#define SQR_TOOM4_THRESHOLD                178
-#define SQR_TOOM6_THRESHOLD                262
-#define SQR_TOOM8_THRESHOLD                357
-
-#define MULMID_TOOM42_THRESHOLD             66
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               21
-
-#define MUL_FFT_MODF_THRESHOLD             600  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    600, 5}, {     25, 6}, {     13, 5}, {     28, 6}, \
-    {     25, 7}, {     13, 6}, {     29, 7}, {     15, 6}, \
-    {     33, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
-    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {     43, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    135,10}, {     79, 9}, {    159,10}, \
-    {     95, 9}, {    191,11}, {     63,10}, {    159,11}, \
-    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
-    {    271, 9}, {    543, 8}, {   1087,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
-    {    351,11}, {    191,10}, {    383, 9}, {    767,10}, \
-    {    399, 9}, {    799,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    607, 9}, {   1215,11}, {    319,10}, {    671,11}, \
-    {    351,12}, {    191,11}, {    383,10}, {    799, 9}, \
-    {   1599,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,11}, {    607,10}, {   1215,12}, {    319,11}, \
-    {    671,10}, {   1343,11}, {    735,12}, {    383,11}, \
-    {    799,10}, {   1599,11}, {    863,10}, {   1727,12}, \
-    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
-    {   1343,12}, {    703,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1727,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,11}, {   2239,10}, \
-    {   4479,12}, {   1215,13}, {    639,12}, {   1471,11}, \
-    {   2943,13}, {    767,12}, {   1727,13}, {    895,12}, \
-    {   1983,14}, {    511,13}, {   1023,12}, {   2239,11}, \
-    {   4479,13}, {   1151,12}, {   2495,13}, {   1279,12}, \
-    {   2623,13}, {   1407,12}, {   2815,14}, {    767,13}, \
-    {   1663,12}, {   3455,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,12}, {   4479,13}, {   2431,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 149
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             500  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    500, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
-    {     28, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     55,10}, {     31, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    127,10}, {     79, 9}, {    159,10}, \
-    {     95,11}, {     63,10}, {    143, 9}, {    287,10}, \
-    {    159,11}, {     95,12}, {     63,11}, {    127,10}, \
-    {    271, 9}, {    543,10}, {    287,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399, 9}, {    799,10}, {    415, 9}, \
-    {    831,10}, {    431,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    671,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    799,11}, \
-    {    415,10}, {    863,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,12}, {    447,11}, {    959,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,13}, \
-    {    383,12}, {    831,11}, {   1727,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,11}, {   2239,12}, \
-    {   1215,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1727,13}, {    895,12}, {   1983,14}, \
-    {    511,13}, {   1023,12}, {   2239,13}, {   1151,12}, \
-    {   2495,13}, {   1407,12}, {   2943,14}, {    767,13}, \
-    {   1663,12}, {   3455,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,12}, {   4479,13}, {   2431,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 145
-#define SQR_FFT_THRESHOLD                 5312
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  29
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 21
-#define DC_DIVAPPR_Q_THRESHOLD              50
-#define DC_BDIV_QR_THRESHOLD                79
-#define DC_BDIV_Q_THRESHOLD                174
-
-#define INV_MULMOD_BNM1_THRESHOLD           50
-#define INV_NEWTON_THRESHOLD                39
-#define INV_APPR_THRESHOLD                  37
-
-#define BINV_NEWTON_THRESHOLD              318
-#define REDC_1_TO_REDC_N_THRESHOLD          87
-
-#define MU_DIV_QR_THRESHOLD               1099
-#define MU_DIVAPPR_Q_THRESHOLD             792
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD              1442
-#define MU_BDIV_Q_THRESHOLD               1589
-
-#define POWM_SEC_TABLE  3,32,95,480,597,2657
-
-#define MATRIX22_STRASSEN_THRESHOLD         21
-#define HGCD_THRESHOLD                      83
-#define HGCD_APPR_THRESHOLD                159
-#define HGCD_REDUCE_THRESHOLD             3389
-#define GCD_DC_THRESHOLD                   379
-#define GCDEXT_DC_THRESHOLD                309
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                10
-#define GET_STR_PRECOMPUTE_THRESHOLD        25
-#define SET_STR_DC_THRESHOLD               442
-#define SET_STR_PRECOMPUTE_THRESHOLD      1104
-
-#define FAC_DSC_THRESHOLD                  155
-#define FAC_ODD_THRESHOLD                   34
diff --git a/gmp/mpn/x86/coreihwl/gmp-mparam.h b/gmp/mpn/x86/coreihwl/gmp-mparam.h
deleted file mode 100644
index e2b289cc3c..0000000000
--- a/gmp/mpn/x86/coreihwl/gmp-mparam.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/* x86/coreihwl gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2900 MHz Core i5 Haswell */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                16
-#define MOD_1_UNNORM_THRESHOLD              13
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      5
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD             15
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           19
-
-#define MUL_TOOM22_THRESHOLD                27
-#define MUL_TOOM33_THRESHOLD                90
-#define MUL_TOOM44_THRESHOLD               218
-#define MUL_TOOM6H_THRESHOLD               318
-#define MUL_TOOM8H_THRESHOLD               490
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     153
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     101
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     130
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 44
-#define SQR_TOOM3_THRESHOLD                137
-#define SQR_TOOM4_THRESHOLD                242
-#define SQR_TOOM6_THRESHOLD                351
-#define SQR_TOOM8_THRESHOLD                597
-
-#define MULMID_TOOM42_THRESHOLD             98
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               21
-
-#define MUL_FFT_MODF_THRESHOLD             630  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    630, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     15, 5}, {     31, 6}, {     28, 7}, {     15, 6}, \
-    {     33, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     55,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {    103,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    159,11}, {     95,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    335, 9}, {    671,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
-    {    799,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,10}, {   1727,12}, {    447,11}, {    959,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,10}, {   2431,12}, {    639,11}, {   1343,12}, \
-    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1727,10}, {   3455,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2239,12}, {   1215,11}, {   2431,13}, {    639,12}, \
-    {   1471,11}, {   2943,10}, {   5887,13}, {    767,12}, \
-    {   1727,11}, {   3455,13}, {    895,12}, {   1983,14}, \
-    {    511,13}, {   1023,12}, {   2239,13}, {   1151,12}, \
-    {   2495,13}, {   1279,12}, {   2559,13}, {   1407,12}, \
-    {   2943,11}, {   5887,14}, {    767,13}, {   1535,12}, \
-    {   3071,13}, {   1663,12}, {   3455,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2175,12}, {   4479,13}, \
-    {   2431,14}, {   1279,13}, {   2943,12}, {   5887,14}, \
-    {   1535,13}, {   3455,14}, {   1791,13}, {   3967,12}, \
-    {   7935,15}, {   1023,14}, {   2047,13}, {   4479,14}, \
-    {   2303,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 168
-#define MUL_FFT_THRESHOLD                 7424
-
-#define SQR_FFT_MODF_THRESHOLD             530  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    530, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
-    {     28, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     36, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
-    {     27, 7}, {     55, 9}, {     15, 8}, {     31, 7}, \
-    {     63, 8}, {     39, 9}, {     23, 8}, {     55,10}, \
-    {     15, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
-    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671,10}, {    351,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
-    {    799,12}, {    127,11}, {    255,10}, {    511, 9}, \
-    {   1023,10}, {    543,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,11}, {    351,12}, {    191,11}, \
-    {    383,10}, {    799,11}, {    415,10}, {    831,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,10}, {   1087,11}, {    607,12}, {    319,11}, \
-    {    671,10}, {   1343,11}, {    735,10}, {   1471,12}, \
-    {    383,11}, {    799,10}, {   1599,11}, {    863,10}, \
-    {   1727,12}, {    447,11}, {    991,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,12}, {    959,11}, {   1983,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2047,12}, {   1087,11}, \
-    {   2239,12}, {   1215,11}, {   2431,13}, {    639,12}, \
-    {   1471,11}, {   2943,13}, {    767,12}, {   1727,13}, \
-    {    895,12}, {   1983,14}, {    511,13}, {   1023,12}, \
-    {   2239,13}, {   1151,12}, {   2495,13}, {   1279,12}, \
-    {   2623,13}, {   1407,12}, {   2943,14}, {    767,13}, \
-    {   1535,12}, {   3071,13}, {   1663,12}, {   3455,13}, \
-    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
-    {   2175,12}, {   4479,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {   1535,13}, \
-    {   3455,14}, {   1791,13}, {   3967,15}, {   1023,14}, \
-    {   2047,13}, {   4479,14}, {   2303,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 170
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  57
-#define MULLO_MUL_N_THRESHOLD            14281
-
-#define DC_DIV_QR_THRESHOLD                 23
-#define DC_DIVAPPR_Q_THRESHOLD              63
-#define DC_BDIV_QR_THRESHOLD                87
-#define DC_BDIV_Q_THRESHOLD                204
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD                75
-#define INV_APPR_THRESHOLD                  67
-
-#define BINV_NEWTON_THRESHOLD              296
-#define REDC_1_TO_REDC_N_THRESHOLD          79
-
-#define MU_DIV_QR_THRESHOLD                872
-#define MU_DIVAPPR_Q_THRESHOLD             654
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD              1858
-#define MU_BDIV_Q_THRESHOLD               2089
-
-#define POWM_SEC_TABLE  1,17,127,508,1603
-
-#define MATRIX22_STRASSEN_THRESHOLD         19
-#define HGCD_THRESHOLD                      61
-#define HGCD_APPR_THRESHOLD                 60
-#define HGCD_REDUCE_THRESHOLD             3810
-#define GCD_DC_THRESHOLD                   263
-#define GCDEXT_DC_THRESHOLD                278
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               527
-#define SET_STR_PRECOMPUTE_THRESHOLD      1178
-
-#define FAC_DSC_THRESHOLD                  187
-#define FAC_ODD_THRESHOLD                   34
diff --git a/gmp/mpn/x86/coreinhm/gmp-mparam.h b/gmp/mpn/x86/coreinhm/gmp-mparam.h
deleted file mode 100644
index 13289c0c23..0000000000
--- a/gmp/mpn/x86/coreinhm/gmp-mparam.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* x86/coreinhm gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2667 MHz Core i7 Nehalem */
-/* FFT tuning limit = 100000000 */
-/* Generated by tuneup.c, 2014-03-19, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                25
-#define MOD_1_UNNORM_THRESHOLD              15
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          8
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      3
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD             18
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           15
-
-#define MUL_TOOM22_THRESHOLD                26
-#define MUL_TOOM33_THRESHOLD                89
-#define MUL_TOOM44_THRESHOLD               214
-#define MUL_TOOM6H_THRESHOLD               327
-#define MUL_TOOM8H_THRESHOLD               466
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     159
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      95
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     101
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     142
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 44
-#define SQR_TOOM3_THRESHOLD                145
-#define SQR_TOOM4_THRESHOLD                232
-#define SQR_TOOM6_THRESHOLD                342
-#define SQR_TOOM8_THRESHOLD                502
-
-#define MULMID_TOOM42_THRESHOLD             78
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               21
-
-#define MUL_FFT_MODF_THRESHOLD             606  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    606, 5}, {     25, 6}, {     13, 5}, {     28, 6}, \
-    {     15, 5}, {     33, 6}, {     29, 7}, {     15, 6}, \
-    {     33, 7}, {     17, 6}, {     36, 7}, {     19, 6}, \
-    {     39, 7}, {     23, 6}, {     47, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     49, 8}, \
-    {     31, 7}, {     63, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    159,10}, {     95, 9}, {    191,10}, {    111,11}, \
-    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399,12}, {    127,11}, {    255,10}, \
-    {    511, 9}, {   1023,10}, {    543,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    639,12}, {    191,11}, \
-    {    383,10}, {    767,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,11}, \
-    {    607,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,10}, {   1727,12}, {    447,11}, {    927,10}, \
-    {   1855,11}, {    991,13}, {    255,12}, {    511,11}, \
-    {   1119,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,12}, {    895,11}, {   1855,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1023,11}, {   2111,12}, \
-    {   1087,11}, {   2239,10}, {   4479,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,13}, {    767,12}, \
-    {   1727,11}, {   3455,13}, {    895,12}, {   1983,11}, \
-    {   3967,14}, {    511,13}, {   1023,12}, {   2239,11}, \
-    {   4479,13}, {   1151,12}, {   2495,11}, {   4991,13}, \
-    {   1279,12}, {   2623,13}, {   1407,12}, {   2943,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,12}, \
-    {   3455,13}, {   1919,12}, {   3967,15}, {    511,14}, \
-    {   1023,13}, {   2175,12}, {   4479,13}, {   2431,12}, \
-    {   4991,14}, {   1279,13}, {   2687,12}, {   5503,13}, \
-    {   2943,12}, {   6015,14}, {   1535,13}, {   3455,14}, \
-    {   1791,13}, {   3967,12}, {   7935,15}, {   1023,14}, \
-    {   2047,13}, {   4479,14}, {   2303,13}, {   4991,12}, \
-    {   9983,14}, {   2559,13}, {   5503,14}, {   2815,13}, \
-    {   6015,15}, {   1535,14}, {   3839,13}, {   7935,16}, \
-    {   1023,15}, {   2047,14}, {   4095,13}, {   8191,12}, \
-    {  16383,11}, {  32767,10}, {  65535, 9}, { 131071, 8}, \
-    {    256, 9}, {    512,10}, {   1024,11}, {   2048,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 192
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             555  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    555, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
-    {     16, 5}, {     33, 6}, {     29, 7}, {     15, 6}, \
-    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     49, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159,10}, {     95,11}, \
-    {     63,10}, {    143, 9}, {    287,10}, {    159,11}, \
-    {     95,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511, 8}, {   1023,10}, {    271, 9}, {    543,10}, \
-    {    287,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    335, 9}, {    671,10}, {    351,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
-    {    415,12}, {    127,11}, {    255,10}, {    511, 9}, \
-    {   1023,10}, {    543,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,11}, {    351,12}, {    191,11}, \
-    {    383,10}, {    799,11}, {    415,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
-    {   1087,11}, {    607,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    735,10}, {   1471,12}, {    383,11}, \
-    {    799,10}, {   1599,11}, {    863,10}, {   1727,12}, \
-    {    447,11}, {    991,10}, {   1983,13}, {    255,12}, \
-    {    511,11}, {   1023,10}, {   2047,11}, {   1087,12}, \
-    {    575,11}, {   1215,10}, {   2431,12}, {    639,11}, \
-    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
-    {    767,11}, {   1599,12}, {    831,11}, {   1727,10}, \
-    {   3455,12}, {    895,11}, {   1791,12}, {    959,11}, \
-    {   1983,14}, {    255,13}, {    511,12}, {   1023,11}, \
-    {   2111,12}, {   1087,11}, {   2239,10}, {   4479,12}, \
-    {   1215,11}, {   2431,13}, {    639,12}, {   1471,11}, \
-    {   2943,13}, {    767,12}, {   1727,11}, {   3455,13}, \
-    {    895,12}, {   1983,11}, {   3967,14}, {    511,13}, \
-    {   1023,12}, {   2239,11}, {   4479,13}, {   1151,12}, \
-    {   2495,13}, {   1279,12}, {   2623,13}, {   1407,12}, \
-    {   2943,14}, {    767,13}, {   1663,12}, {   3455,13}, \
-    {   1919,12}, {   3967,15}, {    511,14}, {   1023,13}, \
-    {   2175,12}, {   4479,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {   1535,13}, \
-    {   3455,14}, {   1791,13}, {   3967,12}, {   7935,15}, \
-    {   1023,14}, {   2047,13}, {   4479,14}, {   2303,13}, \
-    {   4991,12}, {   9983,14}, {   2815,13}, {   5887,15}, \
-    {   1535,14}, {   3327,13}, {   6655,14}, {   3839,13}, \
-    {   7935,16}, {   1023,15}, {   2047,14}, {   4095,13}, \
-    {   8191,12}, {  16383,11}, {  32767,10}, {  65535, 9}, \
-    { 131071, 8}, {    256, 9}, {    512,10}, {   1024,11}, \
-    {   2048,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 201
-#define SQR_FFT_THRESHOLD                 5312
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  38
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 22
-#define DC_DIVAPPR_Q_THRESHOLD              43
-#define DC_BDIV_QR_THRESHOLD                78
-#define DC_BDIV_Q_THRESHOLD                157
-
-#define INV_MULMOD_BNM1_THRESHOLD           50
-#define INV_NEWTON_THRESHOLD                15
-#define INV_APPR_THRESHOLD                  18
-
-#define BINV_NEWTON_THRESHOLD              351
-#define REDC_1_TO_REDC_N_THRESHOLD          84
-
-#define MU_DIV_QR_THRESHOLD                889
-#define MU_DIVAPPR_Q_THRESHOLD             483
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD              1589
-#define MU_BDIV_Q_THRESHOLD               1787
-
-#define POWM_SEC_TABLE  2,25,95,473,1357
-
-#define MATRIX22_STRASSEN_THRESHOLD         20
-#define HGCD_THRESHOLD                      52
-#define HGCD_APPR_THRESHOLD                 51
-#define HGCD_REDUCE_THRESHOLD             3524
-#define GCD_DC_THRESHOLD                   213
-#define GCDEXT_DC_THRESHOLD                249
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               145
-#define SET_STR_PRECOMPUTE_THRESHOLD       545
-
-#define FAC_DSC_THRESHOLD                   91
-#define FAC_ODD_THRESHOLD                   29
diff --git a/gmp/mpn/x86/coreisbr/gmp-mparam.h b/gmp/mpn/x86/coreisbr/gmp-mparam.h
deleted file mode 100644
index 9b227a71ba..0000000000
--- a/gmp/mpn/x86/coreisbr/gmp-mparam.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/* x86/coreisbr gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 3300 MHz Core i5 Sandy Bridge */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                18
-#define MOD_1_UNNORM_THRESHOLD              11
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      9
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD             16
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           19
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                99
-#define MUL_TOOM44_THRESHOLD               160
-#define MUL_TOOM6H_THRESHOLD               268
-#define MUL_TOOM8H_THRESHOLD               490
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     106
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     140
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     109
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     108
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     137
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 48
-#define SQR_TOOM3_THRESHOLD                105
-#define SQR_TOOM4_THRESHOLD                256
-#define SQR_TOOM6_THRESHOLD                366
-#define SQR_TOOM8_THRESHOLD                562
-
-#define MULMID_TOOM42_THRESHOLD             98
-
-#define MULMOD_BNM1_THRESHOLD               19
-#define SQRMOD_BNM1_THRESHOLD               23
-
-#define MUL_FFT_MODF_THRESHOLD             636  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    636, 5}, {     27, 6}, {     28, 7}, {     15, 6}, \
-    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     55,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {    103,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
-    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    399, 9}, {    799,11}, \
-    {    223,12}, {    127,11}, {    255,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,12}, {    447,11}, {    959,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,10}, \
-    {   2431,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1471,13}, {    383,12}, {    767,11}, {   1599,12}, \
-    {    831,11}, {   1727,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1087,11}, {   2239,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1727,13}, {    895,12}, {   1983,14}, \
-    {    511,13}, {   1023,12}, {   2239,13}, {   1151,12}, \
-    {   2431,13}, {   1279,12}, {   2559,13}, {   1407,12}, \
-    {   2943,14}, {    767,13}, {   1535,12}, {   3071,13}, \
-    {   1663,12}, {   3455,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,12}, {   4479,13}, {   2431,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 149
-#define MUL_FFT_THRESHOLD                 7424
-
-#define SQR_FFT_MODF_THRESHOLD             555  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    555, 5}, {     28, 6}, {     15, 5}, {     31, 6}, \
-    {     29, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     36, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     29, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {     43, 9}, {     23, 8}, {     51, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    159,11}, \
-    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    543,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    335, 9}, {    671,10}, {    351,11}, \
-    {    191,10}, {    383, 9}, {    767,10}, {    399, 9}, \
-    {    799,10}, {    415,12}, {    127,11}, {    255,10}, \
-    {    511, 9}, {   1023,10}, {    543,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    671,11}, {    351,12}, \
-    {    191,11}, {    383,10}, {    799,11}, {    415,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,10}, {   1727,12}, {    447,11}, {    959,10}, \
-    {   1919,11}, {    991,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,12}, {    959,11}, {   1919,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2047,12}, {   1087,11}, \
-    {   2239,12}, {   1215,11}, {   2431,13}, {    639,12}, \
-    {   1471,11}, {   2943,13}, {    767,12}, {   1727,13}, \
-    {    895,12}, {   1983,14}, {    511,13}, {   1023,12}, \
-    {   2239,13}, {   1151,12}, {   2495,13}, {   1279,12}, \
-    {   2623,13}, {   1407,12}, {   2943,14}, {    767,13}, \
-    {   1663,12}, {   3455,13}, {   1919,12}, {   3839,15}, \
-    {    511,14}, {   1023,13}, {   2175,12}, {   4479,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2943,12}, \
-    {   5887,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 159
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  62
-#define MULLO_MUL_N_THRESHOLD            14281
-
-#define DC_DIV_QR_THRESHOLD                 25
-#define DC_DIVAPPR_Q_THRESHOLD              43
-#define DC_BDIV_QR_THRESHOLD                99
-#define DC_BDIV_Q_THRESHOLD                240
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD                14
-#define INV_APPR_THRESHOLD                  13
-
-#define BINV_NEWTON_THRESHOLD              363
-#define REDC_1_TO_REDC_N_THRESHOLD          90
-
-#define MU_DIV_QR_THRESHOLD                998
-#define MU_DIVAPPR_Q_THRESHOLD             667
-#define MUPI_DIV_QR_THRESHOLD                0  /* always */
-#define MU_BDIV_QR_THRESHOLD              1787
-#define MU_BDIV_Q_THRESHOLD               2130
-
-#define POWM_SEC_TABLE  1,16,126,480,1317
-
-#define MATRIX22_STRASSEN_THRESHOLD         21
-#define HGCD_THRESHOLD                      61
-#define HGCD_APPR_THRESHOLD                 56
-#define HGCD_REDUCE_THRESHOLD             3810
-#define GCD_DC_THRESHOLD                   283
-#define GCDEXT_DC_THRESHOLD                309
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               399
-#define SET_STR_PRECOMPUTE_THRESHOLD      1183
-
-#define FAC_DSC_THRESHOLD                  194
-#define FAC_ODD_THRESHOLD                   34
diff --git a/gmp/mpn/x86/darwin.m4 b/gmp/mpn/x86/darwin.m4
index f8363db3f7..7ef8dfc105 100644
--- a/gmp/mpn/x86/darwin.m4
+++ b/gmp/mpn/x86/darwin.m4
@@ -1,82 +1,40 @@
 divert(-1)
-dnl  Copyright 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2007 Free Software Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`DARWIN')
 
-
 dnl  Usage LEA(symbol,reg)
 dnl
-dnl  We maintain lists of stuff to append in load_eip and darwin_bd.  The
-dnl  `index' stuff is needed to suppress repeated definitions.  To avoid
-dnl  getting fooled by "var" and "var1", we add 'bol ' (the end of
-dnl  'indirect_symbol') at the beginning and and a newline at the end.  This
-dnl  might be a bit fragile.
+dnl  FIXME: Only handles one symbol per assembly file because of the
+dnl  way EPILOGUE_cpu is handled.
 
-define(`LEA',
-m4_assert_numargs(2)
-`ifdef(`PIC',`
-ifelse(index(defn(`load_eip'), `$2'),-1,
-`m4append(`load_eip',
-`L(movl_eip_`'substr($2,1)):
+define(`LEA',`
+define(`EPILOGUE_cpu',
+`	L(movl_eip_`'substr($2,1)):
 	movl	(%esp), $2
 	ret_internal
-')')
-ifelse(index(defn(`darwin_bd'), `bol $1
-'),-1,
-`m4append(`darwin_bd',
-`	.section __IMPORT,__pointers,non_lazy_symbol_pointers
+	.section __IMPORT,__pointers,non_lazy_symbol_pointers
 L($1`'$non_lazy_ptr):
 	.indirect_symbol $1
 	.long	 0
-')')
+')
 	call	L(movl_eip_`'substr($2,1))
 	movl	L($1`'$non_lazy_ptr)-.($2), $2
-',`
-	movl	`$'$1, $2
-')')
-
-
-dnl EPILOGUE_cpu
-
-define(`EPILOGUE_cpu',`load_eip`'darwin_bd')
-
-define(`load_eip', `')		dnl updated in LEA
-define(`darwin_bd', `')		dnl updated in LEA
-
-
-dnl  Usage: CALL(funcname)
-dnl
-
-define(`CALL',
-m4_assert_numargs(1)
-`call	GSYM_PREFIX`'$1')
-
-undefine(`PIC_WITH_EBX')
+')
 
 divert`'dnl
diff --git a/gmp/mpn/x86/dive_1.asm b/gmp/mpn/x86/dive_1.asm
index 9a6cbb7931..d2d02f9f72 100644
--- a/gmp/mpn/x86/dive_1.asm
+++ b/gmp/mpn/x86/dive_1.asm
@@ -1,32 +1,21 @@
 dnl  x86 mpn_divexact_1 -- mpn by limb exact division.
 
 dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -108,7 +97,7 @@ ifdef(`PIC',`
 
 	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax	FRAME_pushl()
 	imull	PARAM_DIVISOR, %eax
 	cmpl	$1, %eax
diff --git a/gmp/mpn/x86/divrem_1.asm b/gmp/mpn/x86/divrem_1.asm
index 255d4935c3..a5fb88071d 100644
--- a/gmp/mpn/x86/divrem_1.asm
+++ b/gmp/mpn/x86/divrem_1.asm
@@ -1,32 +1,22 @@
 dnl  x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
 
-dnl  Copyright 1999-2003, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/divrem_2.asm b/gmp/mpn/x86/divrem_2.asm
index 4c38ad0acb..bbadda921c 100644
--- a/gmp/mpn/x86/divrem_2.asm
+++ b/gmp/mpn/x86/divrem_2.asm
@@ -3,30 +3,19 @@ dnl  x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
 dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -92,7 +81,7 @@ PROLOGUE(mpn_divrem_2)
 	seta	%dl
 	cmp	20(%esp), %ebp
 	setae	%al
-	orb	%dl, %al		C "orb" form to placate Sun tools
+	or	%dl, %al
 	jne	L(35)
 L(8):
 	mov	60(%esp), %esi		C fn
@@ -185,7 +174,7 @@ L(9):	mov	64(%esp), %esi		C up
 L(fix):	seta	%dl
 	cmp	20(%esp), %ebp
 	setae	%al
-	orb	%dl, %al		C "orb" form to placate Sun tools
+	or	%dl, %al
 	je	L(bck)
 	inc	%edi
 	sub	20(%esp), %ebp
diff --git a/gmp/mpn/x86/fat/com.c b/gmp/mpn/x86/fat/com.c
deleted file mode 100644
index d359d4ce73..0000000000
--- a/gmp/mpn/x86/fat/com.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_com.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/com.c"
diff --git a/gmp/mpn/x86/fat/diveby3.c b/gmp/mpn/x86/fat/diveby3.c
new file mode 100644
index 0000000000..7ea0161b72
--- /dev/null
+++ b/gmp/mpn/x86/fat/diveby3.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_divexact_by3c.
+
+Copyright 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+#include "mpn/generic/diveby3.c"
diff --git a/gmp/mpn/x86/fat/fat.c b/gmp/mpn/x86/fat/fat.c
index 1740813886..c3d1866c69 100644
--- a/gmp/mpn/x86/fat/fat.c
+++ b/gmp/mpn/x86/fat/fat.c
@@ -4,33 +4,22 @@
    THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
    COMPLETELY IN FUTURE GNU MP RELEASES.
 
-Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc.
+Copyright 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h>    /* for printf */
 #include <stdlib.h>   /* for getenv */
@@ -42,10 +31,14 @@ see https://www.gnu.org/licenses/.  */
 /* Change this to "#define TRACE(x) x" for some traces. */
 #define TRACE(x)
 
+/* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */
+#define WANT_FAKE_CPUID  0
+
 
 /* fat_entry.asm */
-long __gmpn_cpuid (char [12], int);
-int  __gmpn_cpuid_available (void);
+long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
+int  __gmpn_cpuid_available __GMP_PROTO ((void));
+
 
 
 #if WANT_FAKE_CPUID
@@ -56,9 +49,8 @@ int  __gmpn_cpuid_available (void);
 #define __gmpn_cpuid            fake_cpuid
 #define __gmpn_cpuid_available  fake_cpuid_available
 
-#define MAKE_FMS(family, model)						\
-  ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
-   + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
+#define MAKE_FMS(family, model) \
+  (((family) << 8) + ((model << 4)))
 
 static struct {
   const char  *name;
@@ -72,29 +64,17 @@ static struct {
   { "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
   { "pentium2",   "GenuineIntel", MAKE_FMS (6, 2) },
   { "pentium3",   "GenuineIntel", MAKE_FMS (6, 7) },
-  { "pentium4",   "GenuineIntel", MAKE_FMS (15, 2) },
-  { "prescott",   "GenuineIntel", MAKE_FMS (15, 3) },
-  { "nocona",     "GenuineIntel", MAKE_FMS (15, 4) },
-  { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
-  { "coreinhm",   "GenuineIntel", MAKE_FMS (6, 0x1a) },
-  { "coreiwsm",   "GenuineIntel", MAKE_FMS (6, 0x25) },
-  { "coreisbr",   "GenuineIntel", MAKE_FMS (6, 0x2a) },
-  { "coreihwl",   "GenuineIntel", MAKE_FMS (6, 0x3c) },
-  { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
+  { "pentium4",   "GenuineIntel", MAKE_FMS (7, 0) },
 
   { "k5",         "AuthenticAMD", MAKE_FMS (5, 0) },
   { "k6",         "AuthenticAMD", MAKE_FMS (5, 3) },
   { "k62",        "AuthenticAMD", MAKE_FMS (5, 8) },
   { "k63",        "AuthenticAMD", MAKE_FMS (5, 9) },
   { "athlon",     "AuthenticAMD", MAKE_FMS (6, 0) },
-  { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
-  { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
-  { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
-  { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
+  { "x86_64",     "AuthenticAMD", MAKE_FMS (15, 0) },
 
   { "viac3",      "CentaurHauls", MAKE_FMS (6, 0) },
   { "viac32",     "CentaurHauls", MAKE_FMS (6, 9) },
-  { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
 };
 
 static int
@@ -148,46 +128,28 @@ typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
 
 struct cpuvec_t __gmpn_cpuvec = {
   __MPN(add_n_init),
-  0,
-  0,
   __MPN(addmul_1_init),
-  0,
-  __MPN(bdiv_dbm1c_init),
-  __MPN(cnd_add_n_init),
-  __MPN(cnd_sub_n_init),
-  __MPN(com_init),
   __MPN(copyd_init),
   __MPN(copyi_init),
   __MPN(divexact_1_init),
+  __MPN(divexact_by3c_init),
   __MPN(divrem_1_init),
   __MPN(gcd_1_init),
   __MPN(lshift_init),
-  __MPN(lshiftc_init),
   __MPN(mod_1_init),
-  __MPN(mod_1_1p_init),
-  __MPN(mod_1_1p_cps_init),
-  __MPN(mod_1s_2p_init),
-  __MPN(mod_1s_2p_cps_init),
-  __MPN(mod_1s_4p_init),
-  __MPN(mod_1s_4p_cps_init),
   __MPN(mod_34lsub1_init),
   __MPN(modexact_1c_odd_init),
   __MPN(mul_1_init),
   __MPN(mul_basecase_init),
-  __MPN(mullo_basecase_init),
   __MPN(preinv_divrem_1_init),
   __MPN(preinv_mod_1_init),
-  __MPN(redc_1_init),
-  __MPN(redc_2_init),
   __MPN(rshift_init),
   __MPN(sqr_basecase_init),
   __MPN(sub_n_init),
-  0,
   __MPN(submul_1_init),
   0
 };
 
-int __gmpn_cpuvec_initialized = 0;
 
 /* The following setups start with generic x86, then overwrite with
    specifics for a chip, and higher versions of that chip.
@@ -257,107 +219,21 @@ __gmpn_cpuvec_init (void)
             case 6:
               TRACE (printf ("  p6\n"));
               CPUVEC_SETUP_p6;
-	      switch (model)
-		{
-		case 0x00:
-		case 0x01:
-		  TRACE (printf ("  pentiumpro\n"));
-		  break;
-
-		case 0x02:
-		case 0x03:
-		case 0x04:
-		case 0x05:
-		case 0x06:
-		  TRACE (printf ("  pentium2\n"));
-                  CPUVEC_SETUP_p6_mmx;
-		  break;
-
-		case 0x07:
-		case 0x08:
-		case 0x0a:
-		case 0x0b:
-		case 0x0c:
-		  TRACE (printf ("  pentium3\n"));
-                  CPUVEC_SETUP_p6_mmx;
-                  CPUVEC_SETUP_p6_p3mmx;
-		  break;
-
-		case 0x09:		/* Banias */
-		case 0x0d:		/* Dothan */
-		case 0x0e:		/* Yonah */
-		  TRACE (printf ("  Banias/Dothan/Yonah\n"));
-                  CPUVEC_SETUP_p6_mmx;
-                  CPUVEC_SETUP_p6_p3mmx;
-                  CPUVEC_SETUP_p6_sse2;
-		  break;
-
-		case 0x0f:		/* Conroe Merom Kentsfield Allendale */
-		case 0x10:
-		case 0x11:
-		case 0x12:
-		case 0x13:
-		case 0x14:
-		case 0x15:
-		case 0x16:
-		case 0x17:		/* PNR Wolfdale Yorkfield */
-		case 0x18:
-		case 0x19:
-		case 0x1d:		/* PNR Dunnington */
-		  TRACE (printf ("  Conroe\n"));
-                  CPUVEC_SETUP_p6_mmx;
-                  CPUVEC_SETUP_p6_p3mmx;
-                  CPUVEC_SETUP_p6_sse2;
-		  CPUVEC_SETUP_core2;
-		  break;
-
-		case 0x1c:		/* Atom Silverthorne */
-		case 0x26:		/* Atom Lincroft */
-		case 0x27:		/* Atom Saltwell */
-		case 0x36:		/* Atom Cedarview/Saltwell */
-		  TRACE (printf ("  atom\n"));
-		  CPUVEC_SETUP_atom;
-		  CPUVEC_SETUP_atom_mmx;
-		  CPUVEC_SETUP_atom_sse2;
-		  break;
-
-		case 0x1a:		/* NHM Gainestown */
-		case 0x1b:
-		case 0x1e:		/* NHM Lynnfield/Jasper */
-		case 0x1f:
-		case 0x20:
-		case 0x21:
-		case 0x22:
-		case 0x23:
-		case 0x24:
-		case 0x25:		/* WSM Clarkdale/Arrandale */
-		case 0x28:
-		case 0x29:
-		case 0x2b:
-		case 0x2c:		/* WSM Gulftown */
-		case 0x2e:		/* NHM Beckton */
-		case 0x2f:		/* WSM Eagleton */
-		  TRACE (printf ("  nehalem/westmere\n"));
-                  CPUVEC_SETUP_p6_mmx;
-                  CPUVEC_SETUP_p6_p3mmx;
-                  CPUVEC_SETUP_p6_sse2;
-		  CPUVEC_SETUP_core2;
-		  CPUVEC_SETUP_coreinhm;
-		  break;
-
-		case 0x2a:		/* SBR */
-		case 0x2d:		/* SBR-EP */
-		case 0x3a:		/* IBR */
-		case 0x3c:		/* Haswell */
-		  TRACE (printf ("  sandybridge\n"));
+              if (model >= 2)
+                {
+                  TRACE (printf ("  pentium2\n"));
                   CPUVEC_SETUP_p6_mmx;
+                }
+              if (model >= 7)
+                {
+                  TRACE (printf ("  pentium3\n"));
                   CPUVEC_SETUP_p6_p3mmx;
+                }
+              if (model >= 0xD || model == 9)
+                {
+                  TRACE (printf ("  p6 with sse2\n"));
                   CPUVEC_SETUP_p6_sse2;
-		  CPUVEC_SETUP_core2;
-		  CPUVEC_SETUP_coreinhm;
-		  CPUVEC_SETUP_coreisbr;
-		  break;
-		}
+                }
               break;
 
             case 15:
@@ -395,40 +271,13 @@ __gmpn_cpuvec_init (void)
               break;
             case 6:
               TRACE (printf ("  athlon\n"));
+            athlon:
               CPUVEC_SETUP_k7;
               CPUVEC_SETUP_k7_mmx;
               break;
-
-            case 0x0f:		/* k8 */
-            case 0x11:		/* "fam 11h", mix of k8 and k10 */
-            case 0x13:		/* unknown, conservatively assume k8  */
-            case 0x16:		/* unknown, conservatively assume k8  */
-            case 0x17:		/* unknown, conservatively assume k8  */
-              TRACE (printf ("  k8\n"));
-              CPUVEC_SETUP_k7;
-              CPUVEC_SETUP_k7_mmx;
-              CPUVEC_SETUP_k8;
-	      break;
-
-            case 0x10:		/* k10 */
-            case 0x12:		/* k10 (llano) */
-              TRACE (printf ("  k10\n"));
-              CPUVEC_SETUP_k7;
-              CPUVEC_SETUP_k7_mmx;
-	      break;
-
-            case 0x14:		/* bobcat */
-              TRACE (printf ("  bobcat\n"));
-              CPUVEC_SETUP_k7;
-              CPUVEC_SETUP_k7_mmx;
-              CPUVEC_SETUP_bobcat;
-	      break;
-
-            case 0x15:		/* bulldozer */
-              TRACE (printf ("  bulldozer\n"));
-              CPUVEC_SETUP_k7;
-              CPUVEC_SETUP_k7_mmx;
-	      break;
+            case 15:
+              TRACE (printf ("  x86_64\n"));
+              goto athlon;
             }
         }
       else if (strcmp (vendor_string, "CentaurHauls") == 0)
@@ -441,11 +290,6 @@ __gmpn_cpuvec_init (void)
                 {
                   TRACE (printf ("  viac32\n"));
                 }
-	      if (model >= 15)
-		{
-                  TRACE (printf ("  nano\n"));
-		  CPUVEC_SETUP_nano;
-		}
               break;
             }
         }
@@ -469,5 +313,5 @@ __gmpn_cpuvec_init (void)
 
   /* Set this once the threshold fields are ready.
      Use volatile to prevent it getting moved.  */
-  *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
+  ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
 }
diff --git a/gmp/mpn/x86/fat/fat_entry.asm b/gmp/mpn/x86/fat/fat_entry.asm
index 6e3cb44dd5..bd46e4e8bd 100644
--- a/gmp/mpn/x86/fat/fat_entry.asm
+++ b/gmp/mpn/x86/fat/fat_entry.asm
@@ -1,32 +1,21 @@
 dnl  x86 fat binary entrypoints.
 
-dnl  Copyright 2003, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2003 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -129,7 +118,7 @@ EPILOGUE()
 L(fat_init):
 	C al	__gmpn_cpuvec byte offset
 
-	movzbl	%al, %eax
+	movsbl	%al, %eax
 	pushl	%eax
 
 ifdef(`PIC',`
diff --git a/gmp/mpn/x86/fat/gcd_1.c b/gmp/mpn/x86/fat/gcd_1.c
index f809bd8092..5bd000618c 100644
--- a/gmp/mpn/x86/fat/gcd_1.c
+++ b/gmp/mpn/x86/fat/gcd_1.c
@@ -5,28 +5,17 @@ Copyright 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "mpn/generic/gcd_1.c"
diff --git a/gmp/mpn/x86/fat/gmp-mparam.h b/gmp/mpn/x86/fat/gmp-mparam.h
index 3641a6bafa..9127d1425f 100644
--- a/gmp/mpn/x86/fat/gmp-mparam.h
+++ b/gmp/mpn/x86/fat/gmp-mparam.h
@@ -1,35 +1,25 @@
 /* Fat binary x86 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2003, 2011 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* mpn_divexact_1 is faster than mpn_divrem_1 at all sizes.  The only time
@@ -44,17 +34,15 @@ see https://www.gnu.org/licenses/.  */
    preinv.  */
 #define USE_PREINV_DIVREM_1   1
 
-#define BMOD_1_TO_MOD_1_THRESHOLD           20
-
 /* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
-   for mpn_sqr to call the latter.  */
+   for mpn_sqr_n to call the latter.  */
 #define SQR_BASECASE_THRESHOLD 0
 
 /* Sensible fallbacks for these, when not taken from a cpu-specific
    gmp-mparam.h.  */
-#define MUL_TOOM22_THRESHOLD      20
-#define MUL_TOOM33_THRESHOLD     130
-#define SQR_TOOM2_THRESHOLD       30
+#define MUL_KARATSUBA_THRESHOLD   20
+#define MUL_TOOM3_THRESHOLD      130
+#define SQR_KARATSUBA_THRESHOLD   30
 #define SQR_TOOM3_THRESHOLD      200
 
 /* These are values more or less in the middle of what the typical x86 chips
diff --git a/gmp/mpn/x86/fat/lshiftc.c b/gmp/mpn/x86/fat/lshiftc.c
deleted file mode 100644
index 9ecf48978f..0000000000
--- a/gmp/mpn/x86/fat/lshiftc.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_lshiftc.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/lshiftc.c"
diff --git a/gmp/mpn/x86/fat/mod_1.c b/gmp/mpn/x86/fat/mod_1.c
deleted file mode 100644
index 4f149cc353..0000000000
--- a/gmp/mpn/x86/fat/mod_1.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mod_1.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/mod_1.c"
diff --git a/gmp/mpn/x86/fat/mod_1_1.c b/gmp/mpn/x86/fat/mod_1_1.c
deleted file mode 100644
index 92eaa7a87f..0000000000
--- a/gmp/mpn/x86/fat/mod_1_1.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Fat binary fallback mpn_mod_1_1p.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/*
-PROLOGUE(mpn_mod_1_1p_cps)
-*/
-
-#define OPERATION_mod_1_1_cps 1
-#include "mpn/generic/mod_1_1.c"
diff --git a/gmp/mpn/x86/fat/mod_1_2.c b/gmp/mpn/x86/fat/mod_1_2.c
deleted file mode 100644
index 9095a61c93..0000000000
--- a/gmp/mpn/x86/fat/mod_1_2.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Fat binary fallback mpn_mod_1s_2p.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/*
-PROLOGUE(mpn_mod_1s_2p_cps)
-*/
-
-#define OPERATION_mod_1_2_cps 1
-#include "mpn/generic/mod_1_2.c"
diff --git a/gmp/mpn/x86/fat/mod_1_4.c b/gmp/mpn/x86/fat/mod_1_4.c
deleted file mode 100644
index 51c0def443..0000000000
--- a/gmp/mpn/x86/fat/mod_1_4.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Fat binary fallback mpn_mod_1s_4p.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/*
-PROLOGUE(mpn_mod_1s_4p_cps)
-*/
-
-#define OPERATION_mod_1_4_cps 1
-#include "mpn/generic/mod_1_4.c"
diff --git a/gmp/mpn/x86/fat/mode1o.c b/gmp/mpn/x86/fat/mode1o.c
index 870ddb899b..a5244cae44 100644
--- a/gmp/mpn/x86/fat/mode1o.c
+++ b/gmp/mpn/x86/fat/mode1o.c
@@ -5,28 +5,17 @@ Copyright 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "mpn/generic/mode1o.c"
diff --git a/gmp/mpn/x86/fat/mullo_basecase.c b/gmp/mpn/x86/fat/mullo_basecase.c
deleted file mode 100644
index 7f86be64c5..0000000000
--- a/gmp/mpn/x86/fat/mullo_basecase.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mullo_basecase.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/mullo_basecase.c"
diff --git a/gmp/mpn/x86/fat/redc_1.c b/gmp/mpn/x86/fat/redc_1.c
deleted file mode 100644
index 0025403353..0000000000
--- a/gmp/mpn/x86/fat/redc_1.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_redc_1.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/redc_1.c"
diff --git a/gmp/mpn/x86/fat/redc_2.c b/gmp/mpn/x86/fat/redc_2.c
deleted file mode 100644
index 1932d58323..0000000000
--- a/gmp/mpn/x86/fat/redc_2.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_redc_2.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/redc_2.c"
diff --git a/gmp/mpn/x86/geode/gmp-mparam.h b/gmp/mpn/x86/geode/gmp-mparam.h
deleted file mode 100644
index cc9c9f1789..0000000000
--- a/gmp/mpn/x86/geode/gmp-mparam.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2002, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* Generated by tuneup.c, 2011-01-30, gcc 3.4 */
-
-#define MOD_1_NORM_THRESHOLD                 6
-#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         17
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD  MP_SIZE_T_MAX  /* never */
-#define USE_PREINV_DIVREM_1                  0
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           42
-
-#define MUL_TOOM22_THRESHOLD                18
-#define MUL_TOOM33_THRESHOLD                66
-#define MUL_TOOM44_THRESHOLD               105
-#define MUL_TOOM6H_THRESHOLD               141
-#define MUL_TOOM8H_THRESHOLD               212
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      62
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      67
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 33
-#define SQR_TOOM3_THRESHOLD                 60
-#define SQR_TOOM4_THRESHOLD                136
-#define SQR_TOOM6_THRESHOLD                196
-#define SQR_TOOM8_THRESHOLD                292
-
-#define MULMOD_BNM1_THRESHOLD               14
-#define SQRMOD_BNM1_THRESHOLD               16
-
-#define MUL_FFT_MODF_THRESHOLD             468  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    468, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
-    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287,10}, {    159,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 61
-#define MUL_FFT_THRESHOLD                 5504
-
-#define SQR_FFT_MODF_THRESHOLD             396  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    396, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
-    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    135,10}, {     79, 9}, {    159, 8}, \
-    {    319,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511,10}, {    143, 9}, \
-    {    287, 8}, {    575,10}, {    159,11}, {     95,10}, \
-    {    191,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 61
-#define SQR_FFT_THRESHOLD                 3712
-
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  37
-#define MULLO_MUL_N_THRESHOLD            10950
-
-#define DC_DIV_QR_THRESHOLD                 59
-#define DC_DIVAPPR_Q_THRESHOLD             189
-#define DC_BDIV_QR_THRESHOLD                55
-#define DC_BDIV_Q_THRESHOLD                136
-
-#define INV_MULMOD_BNM1_THRESHOLD           50
-#define INV_NEWTON_THRESHOLD               183
-#define INV_APPR_THRESHOLD                 181
-
-#define BINV_NEWTON_THRESHOLD              204
-#define REDC_1_TO_REDC_N_THRESHOLD          54
-
-#define MU_DIV_QR_THRESHOLD               1142
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD               81
-#define MU_BDIV_QR_THRESHOLD               889
-#define MU_BDIV_Q_THRESHOLD                998
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                     133
-#define GCD_DC_THRESHOLD                   451
-#define GCDEXT_DC_THRESHOLD                318
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        30
-#define SET_STR_DC_THRESHOLD               547
-#define SET_STR_PRECOMPUTE_THRESHOLD      1049
diff --git a/gmp/mpn/x86/gmp-mparam.h b/gmp/mpn/x86/gmp-mparam.h
index 2cb1984889..22ee86f7e1 100644
--- a/gmp/mpn/x86/gmp-mparam.h
+++ b/gmp/mpn/x86/gmp-mparam.h
@@ -1,35 +1,24 @@
 /* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all
diff --git a/gmp/mpn/x86/i486/gmp-mparam.h b/gmp/mpn/x86/i486/gmp-mparam.h
index aa7dbad45b..aaddea9f18 100644
--- a/gmp/mpn/x86/i486/gmp-mparam.h
+++ b/gmp/mpn/x86/i486/gmp-mparam.h
@@ -1,46 +1,35 @@
 /* 80486 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 2001-2003 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* 100MHz DX4 */
 
 /* Generated by tuneup.c, 2003-02-13, gcc 2.95 */
 
-#define MUL_TOOM22_THRESHOLD             18
-#define MUL_TOOM33_THRESHOLD            228
+#define MUL_KARATSUBA_THRESHOLD          18
+#define MUL_TOOM3_THRESHOLD             228
 
 #define SQR_BASECASE_THRESHOLD           13
-#define SQR_TOOM2_THRESHOLD              49
+#define SQR_KARATSUBA_THRESHOLD          49
 #define SQR_TOOM3_THRESHOLD             238
 
 #define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
diff --git a/gmp/mpn/x86/k10/gmp-mparam.h b/gmp/mpn/x86/k10/gmp-mparam.h
deleted file mode 100644
index 2a1ae5a6bb..0000000000
--- a/gmp/mpn/x86/k10/gmp-mparam.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* x86/k10 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2400 MHz K10 Barcelona */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         12
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     15
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           32
-
-#define MUL_TOOM22_THRESHOLD                24
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               130
-#define MUL_TOOM6H_THRESHOLD               189
-#define MUL_TOOM8H_THRESHOLD               430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      82
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      90
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     112
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 38
-#define SQR_TOOM3_THRESHOLD                 77
-#define SQR_TOOM4_THRESHOLD                184
-#define SQR_TOOM6_THRESHOLD                262
-#define SQR_TOOM8_THRESHOLD                369
-
-#define MULMID_TOOM42_THRESHOLD             56
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               18
-
-#define MUL_FFT_MODF_THRESHOLD             765  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    765, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     23, 6}, {     47, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95,10}, {     31, 9}, \
-    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
-    {    103,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    199,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    263,10}, \
-    {    175,11}, {     95,10}, {    207,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    543, 8}, {   1087, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    671, 8}, \
-    {   1343, 9}, {    735,11}, {    191, 9}, {    799, 8}, \
-    {   1599,10}, {    415, 9}, {    863,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    543, 9}, {   1087,10}, \
-    {    607, 9}, {   1215, 8}, {   2431,11}, {    319,10}, \
-    {    671, 9}, {   1343,10}, {    735,12}, {    191,11}, \
-    {    383,10}, {    799, 9}, {   1599,11}, {    415,10}, \
-    {    863, 9}, {   1727,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215, 9}, \
-    {   2431,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,10}, {   1471, 9}, {   2943, 8}, {   5887,12}, \
-    {    383,11}, {    799,10}, {   1599,11}, {    863,10}, \
-    {   1727,12}, {    447,11}, {    959,10}, {   1919,11}, \
-    {    991,10}, {   1983,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,10}, \
-    {   2943, 9}, {   5887,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1727,10}, {   3455,12}, \
-    {    959,11}, {   1983,14}, {    255,13}, {    511,12}, \
-    {   1087,11}, {   2239,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1471,11}, {   2943,10}, {   5887,13}, \
-    {    767,12}, {   1727,11}, {   3455,13}, {    895,12}, \
-    {   1983,14}, {    511,13}, {   1023,12}, {   2239,13}, \
-    {   1151,12}, {   2495,13}, {   1407,12}, {   2943,11}, \
-    {   5887,14}, {    767,13}, {   1663,12}, {   3455,13}, \
-    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
-    {   2175,12}, {   4351,13}, {   2431,14}, {   1279,13}, \
-    {   2943,12}, {   5887,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 172
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             555  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    555, 5}, {     21, 6}, {     11, 5}, {     25, 6}, \
-    {     13, 5}, {     27, 6}, {     27, 7}, {     15, 6}, \
-    {     32, 7}, {     17, 6}, {     35, 7}, {     19, 6}, \
-    {     39, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127,10}, \
-    {     79, 9}, {    167,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    143, 9}, {    287, 8}, \
-    {    575,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    607,11}, {    159,10}, \
-    {    319, 9}, {    671, 8}, {   1343,10}, {    351, 9}, \
-    {    735, 8}, {   1471,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399, 9}, {    799, 8}, {   1599,10}, \
-    {    415, 9}, {    863,11}, {    223,10}, {    479,12}, \
-    {    127,11}, {    255,10}, {    543, 9}, {   1087,11}, \
-    {    287,10}, {    607, 9}, {   1215, 8}, {   2431,11}, \
-    {    319,10}, {    671, 9}, {   1343,11}, {    351,10}, \
-    {    735, 9}, {   1471,12}, {    191,11}, {    383,10}, \
-    {    799, 9}, {   1599,11}, {    415,10}, {    863, 9}, \
-    {   1727,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,11}, \
-    {    607,10}, {   1215, 9}, {   2431,12}, {    319,11}, \
-    {    671,10}, {   1343,11}, {    735,10}, {   1471, 9}, \
-    {   2943,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,10}, {   1727,12}, {    447,11}, {    959,10}, \
-    {   1919,11}, {    991,10}, {   1983,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,10}, {   2431,12}, \
-    {    639,11}, {   1343,12}, {    703,11}, {   1471,10}, \
-    {   2943,13}, {    383,12}, {    767,11}, {   1599,12}, \
-    {    831,11}, {   1727,10}, {   3455,12}, {    959,11}, \
-    {   1983,13}, {    511,12}, {   1215,11}, {   2431,13}, \
-    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1727,11}, {   3455,13}, {    895,12}, {   1983,14}, \
-    {    511,13}, {   1023,12}, {   2111,13}, {   1151,12}, \
-    {   2431,13}, {   1407,12}, {   2943,14}, {    767,13}, \
-    {   1663,12}, {   3455,13}, {   1919,12}, {   3839,15}, \
-    {    511,14}, {   1023,13}, {   2431,14}, {   1279,13}, \
-    {   2943,12}, {   5887,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 172
-#define SQR_FFT_THRESHOLD                 5504
-
-#define MULLO_BASECASE_THRESHOLD             7
-#define MULLO_DC_THRESHOLD                  40
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 59
-#define DC_DIVAPPR_Q_THRESHOLD             270
-#define DC_BDIV_QR_THRESHOLD                55
-#define DC_BDIV_Q_THRESHOLD                206
-
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD               254
-#define INV_APPR_THRESHOLD                 252
-
-#define BINV_NEWTON_THRESHOLD              274
-#define REDC_1_TO_REDC_N_THRESHOLD          74
-
-#define MU_DIV_QR_THRESHOLD               1589
-#define MU_DIVAPPR_Q_THRESHOLD            1589
-#define MUPI_DIV_QR_THRESHOLD              106
-#define MU_BDIV_QR_THRESHOLD              1470
-#define MU_BDIV_Q_THRESHOLD               1558
-
-#define POWM_SEC_TABLE  1,16,114,428,1240
-
-#define MATRIX22_STRASSEN_THRESHOLD         19
-#define HGCD_THRESHOLD                     136
-#define HGCD_APPR_THRESHOLD                175
-#define HGCD_REDUCE_THRESHOLD             3389
-#define GCD_DC_THRESHOLD                   595
-#define GCDEXT_DC_THRESHOLD                424
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               100
-#define SET_STR_PRECOMPUTE_THRESHOLD      1360
-
-#define FAC_DSC_THRESHOLD                  224
-#define FAC_ODD_THRESHOLD                   29
diff --git a/gmp/mpn/x86/k6/README b/gmp/mpn/x86/k6/README
index 1d65af3851..f488cbd1d8 100644
--- a/gmp/mpn/x86/k6/README
+++ b/gmp/mpn/x86/k6/README
@@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/x86/k6/aors_n.asm b/gmp/mpn/x86/k6/aors_n.asm
index 168f9b4ae4..09afd8f688 100644
--- a/gmp/mpn/x86/k6/aors_n.asm
+++ b/gmp/mpn/x86/k6/aors_n.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/aorsmul_1.asm b/gmp/mpn/x86/k6/aorsmul_1.asm
index eaa92ebb24..c3795e3abb 100644
--- a/gmp/mpn/x86/k6/aorsmul_1.asm
+++ b/gmp/mpn/x86/k6/aorsmul_1.asm
@@ -1,52 +1,42 @@
 dnl  AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
 
-dnl  Copyright 1999-2003, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12		 5.94
-C P6 model 9  (Banias)		 5.51
-C P6 model 13 (Dothan)		 5.57
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)            5.94
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           5.57
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C AMD K6			7.65-8.5 (data dependent)
-C AMD K7
-C AMD K8
+C K6:                           7.65-8.5 (data dependent)
+C K7:
+C K8:
 
 
-dnl  K6:           large multipliers  small multipliers
+dnl  K6:           large multpliers  small multpliers
 dnl  UNROLL_COUNT    cycles/limb       cycles/limb
 dnl        4             9.5              7.78
 dnl        8             9.0              7.78
@@ -257,7 +247,7 @@ C registers at the point of doing the mul for the initial two carry limbs.
 C
 C The add/adc for the initial carry in %esi is necessary only for the
 C mpn_addmul/submul_1c entry points.  Duplicating the startup code to
-C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
 C idea.
 
 dnl  overlapping with parameters already fetched
diff --git a/gmp/mpn/x86/k6/cross.pl b/gmp/mpn/x86/k6/cross.pl
index fc921a56b7..cf476d603b 100755
--- a/gmp/mpn/x86/k6/cross.pl
+++ b/gmp/mpn/x86/k6/cross.pl
@@ -2,31 +2,20 @@
 
 # Copyright 2000, 2001 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage: cross.pl [filename.o]...
diff --git a/gmp/mpn/x86/k6/divrem_1.asm b/gmp/mpn/x86/k6/divrem_1.asm
index b4cea4fa2a..1c86d9bd6c 100644
--- a/gmp/mpn/x86/k6/divrem_1.asm
+++ b/gmp/mpn/x86/k6/divrem_1.asm
@@ -1,32 +1,22 @@
 dnl  AMD K6 mpn_divrem_1 -- mpn by limb division.
 
-dnl  Copyright 1999-2003, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/gcd_1.asm b/gmp/mpn/x86/k6/gcd_1.asm
index 0c233ff362..58aff08221 100644
--- a/gmp/mpn/x86/k6/gcd_1.asm
+++ b/gmp/mpn/x86/k6/gcd_1.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_gcd_1 -- mpn by 1 gcd.
 
-dnl  Copyright 2000-2002, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/gmp-mparam.h b/gmp/mpn/x86/k6/gmp-mparam.h
index f03f1b2d91..c04446a573 100644
--- a/gmp/mpn/x86/k6/gmp-mparam.h
+++ b/gmp/mpn/x86/k6/gmp-mparam.h
@@ -1,166 +1,68 @@
 /* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2004, 2009, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+/* 450MHz K6-2 */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+/* Generated by tuneup.c, 2009-01-05, gcc 3.4 */
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define MUL_KARATSUBA_THRESHOLD          19
+#define MUL_TOOM3_THRESHOLD              73
+#define MUL_TOOM44_THRESHOLD            104
 
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          32
+#define SQR_TOOM3_THRESHOLD             105
+#define SQR_TOOM4_THRESHOLD             143
 
-/* 450MHz K6-2 */
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              64
+#define MULLOW_MUL_N_THRESHOLD          232
+
+#define DIV_SB_PREINV_THRESHOLD           4
+#define DIV_DC_THRESHOLD                 67
+#define POWM_THRESHOLD                  110
+
+#define MATRIX22_STRASSEN_THRESHOLD      21
+#define HGCD_THRESHOLD                  195
+#define GCD_DC_THRESHOLD                602
+#define GCDEXT_DC_THRESHOLD             662
+#define JACOBI_BASE_METHOD                2
+
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             31
+#define GET_STR_PRECOMPUTE_THRESHOLD     52
+#define SET_STR_DC_THRESHOLD           1127
+#define SET_STR_PRECOMPUTE_THRESHOLD   1795
+
+#define MUL_FFT_TABLE  { 336, 672, 1152, 3584, 10240, 24576, 163840, 393216, 0 }
+#define MUL_FFT_MODF_THRESHOLD          352
+#define MUL_FFT_THRESHOLD              7168
 
-#define MOD_1_NORM_THRESHOLD                12
-#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         41
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         32
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         3
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD    128
-#define USE_PREINV_DIVREM_1                  0
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD        MP_SIZE_T_MAX  /* never */
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                69
-#define MUL_TOOM44_THRESHOLD               106
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               199
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      69
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      65
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      64
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 32
-#define SQR_TOOM3_THRESHOLD                 97
-#define SQR_TOOM4_THRESHOLD                143
-#define SQR_TOOM6_THRESHOLD                222
-#define SQR_TOOM8_THRESHOLD                272
-
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    476, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     11, 5}, {     23, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     11, 6}, {     23, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     31, 7}, {     17, 6}, \
-    {     35, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    167,10}, {     95, 9}, {    191,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 9}, {    543,10}, \
-    {    287,11}, {    159,10}, {    351,11}, {    191,10}, \
-    {    415, 9}, {    831,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    543,11}, {    287,10}, {    575,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    415,10}, \
-    {    831,13}, {    127,12}, {    255,11}, {    543,10}, \
-    {   1087,11}, {    575,12}, {    319,11}, {    703,12}, \
-    {    383,11}, {    831,12}, {    447,11}, {    895,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1151,12}, {    703,13}, {    383,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1215,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 106
-#define MUL_FFT_THRESHOLD                 7424
-
-#define SQR_FFT_MODF_THRESHOLD             432  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    432, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     24, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     21, 8}, {     11, 7}, {     29, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 7}, {     93, 8}, {     47, 7}, \
-    {     95, 8}, {     51,10}, {     15, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     71, 8}, \
-    {    143, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    167,10}, {     95, 9}, {    191,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287, 8}, \
-    {    575,10}, {    159, 9}, {    319,11}, {     95,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
-    {    159,10}, {    319, 9}, {    639,10}, {    351, 9}, \
-    {    703,11}, {    191,10}, {    415,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    543,11}, {    287,10}, \
-    {    607,11}, {    319,10}, {    639,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    415,10}, {    831,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,12}, {    319,11}, {    703,12}, {    383,11}, \
-    {    831,12}, {    447,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,12}, {    703,13}, \
-    {    383,12}, {    895,14}, {    255,13}, {    511,12}, \
-    {   1215,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 112
-#define SQR_FFT_THRESHOLD                 7040
-
-#define MULLO_BASECASE_THRESHOLD             3
-#define MULLO_DC_THRESHOLD                  60
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 78
-#define DC_DIVAPPR_Q_THRESHOLD             252
-#define DC_BDIV_QR_THRESHOLD                84
-#define DC_BDIV_Q_THRESHOLD                171
-
-#define INV_MULMOD_BNM1_THRESHOLD           55
-#define INV_NEWTON_THRESHOLD               234
-#define INV_APPR_THRESHOLD                 236
-
-#define BINV_NEWTON_THRESHOLD              268
-#define REDC_1_TO_REDC_N_THRESHOLD          67
-
-#define MU_DIV_QR_THRESHOLD               1308
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD              134
-#define MU_BDIV_QR_THRESHOLD              1164
-#define MU_BDIV_Q_THRESHOLD               1164
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     182
-#define GCD_DC_THRESHOLD                   591
-#define GCDEXT_DC_THRESHOLD                472
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                24
-#define GET_STR_PRECOMPUTE_THRESHOLD        40
-#define SET_STR_DC_THRESHOLD               834
-#define SET_STR_PRECOMPUTE_THRESHOLD      2042
+#define SQR_FFT_TABLE  { 272, 672, 1408, 4608, 10240, 24576, 163840, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD          336
+#define SQR_FFT_THRESHOLD              3840
diff --git a/gmp/mpn/x86/k6/k62mmx/copyd.asm b/gmp/mpn/x86/k6/k62mmx/copyd.asm
index f80a5a1cdb..227ed78783 100644
--- a/gmp/mpn/x86/k6/k62mmx/copyd.asm
+++ b/gmp/mpn/x86/k6/k62mmx/copyd.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6-2 mpn_copyd -- copy limb vector, decrementing.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/k62mmx/lshift.asm b/gmp/mpn/x86/k6/k62mmx/lshift.asm
index c86575feed..e48e73e19a 100644
--- a/gmp/mpn/x86/k6/k62mmx/lshift.asm
+++ b/gmp/mpn/x86/k6/k62mmx/lshift.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6-2 mpn_lshift -- mpn left shift.
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/k62mmx/rshift.asm b/gmp/mpn/x86/k6/k62mmx/rshift.asm
index f604a7bd52..b3114d0e6e 100644
--- a/gmp/mpn/x86/k6/k62mmx/rshift.asm
+++ b/gmp/mpn/x86/k6/k62mmx/rshift.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6-2 mpn_rshift -- mpn right shift.
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/mmx/com.asm b/gmp/mpn/x86/k6/mmx/com_n.asm
index b747454627..42e6ab392a 100644
--- a/gmp/mpn/x86/k6/mmx/com.asm
+++ b/gmp/mpn/x86/k6/mmx/com_n.asm
@@ -1,32 +1,21 @@
-dnl  AMD K6-2 mpn_com -- mpn bitwise one's complement.
+dnl  AMD K6-2 mpn_com_n -- mpn bitwise one's complement.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -39,7 +28,7 @@ C K6-2  1.0   1.18  1.18  1.18  cycles/limb
 C K6    1.5   1.85  1.75  1.85
 
 
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C
 C Take the bitwise ones-complement of src,size and write it to dst,size.
 
@@ -49,7 +38,7 @@ defframe(PARAM_DST, 4)
 
 	TEXT
 	ALIGN(16)
-PROLOGUE(mpn_com)
+PROLOGUE(mpn_com_n)
 deflit(`FRAME',0)
 
 	movl	PARAM_SIZE, %ecx
diff --git a/gmp/mpn/x86/k6/mmx/dive_1.asm b/gmp/mpn/x86/k6/mmx/dive_1.asm
index b644dca8cd..9cc90d88a5 100644
--- a/gmp/mpn/x86/k6/mmx/dive_1.asm
+++ b/gmp/mpn/x86/k6/mmx/dive_1.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_divexact_1 -- mpn by limb exact division.
 
-dnl  Copyright 2000-2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -128,7 +117,7 @@ Zdisp(	movzbl,	0,(%eax,%ebp), %eax)
 	subl	%ebp, %eax		C inv = 2*inv - inv*inv*d
 	subl	$1, %edx		C shift amount, and clear carry
 
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax	FRAME_pushl()
 	imull	PARAM_DIVISOR, %eax
 	cmpl	$1, %eax
diff --git a/gmp/mpn/x86/k6/mmx/logops_n.asm b/gmp/mpn/x86/k6/mmx/logops_n.asm
index e17930bb2d..a6272131a2 100644
--- a/gmp/mpn/x86/k6/mmx/logops_n.asm
+++ b/gmp/mpn/x86/k6/mmx/logops_n.asm
@@ -1,33 +1,22 @@
 dnl  AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
 dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/mmx/lshift.asm b/gmp/mpn/x86/k6/mmx/lshift.asm
index 45be582633..1492025171 100644
--- a/gmp/mpn/x86/k6/mmx/lshift.asm
+++ b/gmp/mpn/x86/k6/mmx/lshift.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_lshift -- mpn left shift.
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/mmx/popham.asm b/gmp/mpn/x86/k6/mmx/popham.asm
index 2b19d0b5ee..a0a651d39c 100644
--- a/gmp/mpn/x86/k6/mmx/popham.asm
+++ b/gmp/mpn/x86/k6/mmx/popham.asm
@@ -1,33 +1,22 @@
 dnl  AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and
 dnl  hamming distance.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/mmx/rshift.asm b/gmp/mpn/x86/k6/mmx/rshift.asm
index cd0382f322..80cd6fb05a 100644
--- a/gmp/mpn/x86/k6/mmx/rshift.asm
+++ b/gmp/mpn/x86/k6/mmx/rshift.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_rshift -- mpn right shift.
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/mod_34lsub1.asm b/gmp/mpn/x86/k6/mod_34lsub1.asm
index 7e30503e54..a5b7ee1064 100644
--- a/gmp/mpn/x86/k6/mod_34lsub1.asm
+++ b/gmp/mpn/x86/k6/mod_34lsub1.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/mode1o.asm b/gmp/mpn/x86/k6/mode1o.asm
index a13f647b81..f299877911 100644
--- a/gmp/mpn/x86/k6/mode1o.asm
+++ b/gmp/mpn/x86/k6/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_modexact_1_odd -- exact division style remainder.
 
-dnl  Copyright 2000-2003, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -114,7 +103,7 @@ Zdisp(	movzbl,	0,(%ecx,%edi), %edi)		C inv 8 bits
 
 	subl	%ecx, %edi		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax
 	movl	%esi, %eax
 	imull	%edi, %eax
diff --git a/gmp/mpn/x86/k6/mul_1.asm b/gmp/mpn/x86/k6/mul_1.asm
index 3ef7ec24fe..e1c468fe34 100644
--- a/gmp/mpn/x86/k6/mul_1.asm
+++ b/gmp/mpn/x86/k6/mul_1.asm
@@ -1,49 +1,38 @@
 dnl  AMD K6 mpn_mul_1 -- mpn by limb multiply.
 
 dnl  Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12		 5.5
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)            5.5
 C P6 model 9  (Banias)
-C P6 model 13 (Dothan)		 4.87
+C P6 model 13 (Dothan)           4.87
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C AMD K6			 6.25
-C AMD K7
-C AMD K8
+C K6:                            6.25
+C K7:
+C K8:
 
 
 C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/k6/mul_basecase.asm b/gmp/mpn/x86/k6/mul_basecase.asm
index 7030001c3f..dcd4d70082 100644
--- a/gmp/mpn/x86/k6/mul_basecase.asm
+++ b/gmp/mpn/x86/k6/mul_basecase.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_mul_basecase -- multiply two mpn numbers.
 
-dnl  Copyright 1999-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/pre_mod_1.asm b/gmp/mpn/x86/k6/pre_mod_1.asm
index 34db20d386..3231539bfd 100644
--- a/gmp/mpn/x86/k6/pre_mod_1.asm
+++ b/gmp/mpn/x86/k6/pre_mod_1.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor.
 
 dnl  Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k6/sqr_basecase.asm b/gmp/mpn/x86/k6/sqr_basecase.asm
index b7ecb5cc8a..3392d38812 100644
--- a/gmp/mpn/x86/k6/sqr_basecase.asm
+++ b/gmp/mpn/x86/k6/sqr_basecase.asm
@@ -1,32 +1,21 @@
 dnl  AMD K6 mpn_sqr_basecase -- square an mpn number.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -36,35 +25,35 @@ C     product (measured on the speed difference between 17 and 33 limbs,
 C     which is roughly the Karatsuba recursing range).
 
 
-dnl  SQR_TOOM2_THRESHOLD_MAX is the maximum SQR_TOOM2_THRESHOLD this
+dnl  SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this
 dnl  code supports.  This value is used only by the tune program to know
 dnl  what it can go up to.  (An attempt to compile with a bigger value will
 dnl  trigger some m4_assert()s in the code, making the build fail.)
 dnl
 dnl  The value is determined by requiring the displacements in the unrolled
 dnl  addmul to fit in single bytes.  This means a maximum UNROLL_COUNT of
-dnl  63, giving a maximum SQR_TOOM2_THRESHOLD of 66.
+dnl  63, giving a maximum SQR_KARATSUBA_THRESHOLD of 66.
 
-deflit(SQR_TOOM2_THRESHOLD_MAX, 66)
+deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66)
 
 
 dnl  Allow a value from the tune program to override config.m4.
 
-ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
-`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
+`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
 
 
 dnl  UNROLL_COUNT is the number of code chunks in the unrolled addmul.  The
-dnl  number required is determined by SQR_TOOM2_THRESHOLD, since
-dnl  mpn_sqr_basecase only needs to handle sizes < SQR_TOOM2_THRESHOLD.
+dnl  number required is determined by SQR_KARATSUBA_THRESHOLD, since
+dnl  mpn_sqr_basecase only needs to handle sizes < SQR_KARATSUBA_THRESHOLD.
 dnl
 dnl  The first addmul is the biggest, and this takes the second least
 dnl  significant limb and multiplies it by the third least significant and
-dnl  up.  Hence for a maximum operand size of SQR_TOOM2_THRESHOLD-1
-dnl  limbs, UNROLL_COUNT needs to be SQR_TOOM2_THRESHOLD-3.
+dnl  up.  Hence for a maximum operand size of SQR_KARATSUBA_THRESHOLD-1
+dnl  limbs, UNROLL_COUNT needs to be SQR_KARATSUBA_THRESHOLD-3.
 
-m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
-deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
 
 
 C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/gmp/mpn/x86/k7/README b/gmp/mpn/x86/k7/README
index 5711b612c5..e2c5e0c18d 100644
--- a/gmp/mpn/x86/k7/README
+++ b/gmp/mpn/x86/k7/README
@@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/x86/k7/addlsh1_n.asm b/gmp/mpn/x86/k7/addlsh1_n.asm
deleted file mode 100644
index a957b6f78e..0000000000
--- a/gmp/mpn/x86/k7/addlsh1_n.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-dnl  AMD K7 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This is an attempt at an addlsh1_n for x86-32, not relying on sse2 insns.
-C The innerloop is 2*3-way unrolled, which is best we can do with the available
-C registers.  It seems tricky to use the same structure for rsblsh1_n, since we
-C cannot feed carry between operations there.
-
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)		 5.4	(worse than add_n + lshift)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 6
-C AMD K6			 ?
-C AMD K7			 2.5
-C AMD K8
-
-C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
-C processors.  It uses 2*3-way unrolling, for good reasons.  Unfortunately,
-C that means we need an initial magic multiply.
-C
-C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern.  We
-C cannot do rsblsh1_n since we feed carry from the shift blocks to the
-C add/subtract blocks, which is right for addition but reversed for
-C subtraction.  We could perhaps do sublsh1_n, with some extra move insns,
-C without losing any time, since we're not issue limited but carry recurrency
-C latency.
-C
-C Breaking carry recurrency might be a good idea.  We would then need separate
-C registers for the shift carry and add/subtract carry, which in turn would
-C force is to 2*2-way unrolling.
-
-defframe(PARAM_SIZE,	16)
-defframe(PARAM_DBLD,	12)
-defframe(PARAM_SRC,	 8)
-defframe(PARAM_DST,	 4)
-
-dnl  re-use parameter space
-define(VAR_COUNT,`PARAM_DST')
-define(VAR_TMP,`PARAM_DBLD')
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-PROLOGUE(mpn_addlsh1_n)
-deflit(`FRAME',0)
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-define(`vp',  `%ebp')
-
-	mov	$0x2aaaaaab, %eax
-
-	push	%ebx			FRAME_pushl()
-	mov	PARAM_SIZE, %ebx	C size
-
-	push	rp			FRAME_pushl()
-	mov	PARAM_DST, rp
-
-	mul	%ebx
-
-	push	up			FRAME_pushl()
-	mov	PARAM_SRC, up
-
-	not	%edx			C count = -(size\8)-1
-	mov	%edx, VAR_COUNT
-
-	push	vp			FRAME_pushl()
-	mov	PARAM_DBLD, vp
-
-	lea	3(%edx,%edx,2), %ecx	C count*3+3 = -(size\6)*3
-	xor	%edx, %edx
-	lea	(%ebx,%ecx,2), %ebx	C size + (count*3+3)*2 = size % 6
-	or	%ebx, %ebx
-	jz	L(exact)
-
-L(oop):
-ifdef(`CPU_P6',`
-	shr	%edx ')			C restore 2nd saved carry bit
-	mov	(vp), %eax
-	adc	%eax, %eax
-	rcr	%edx			C restore 1st saved carry bit
-	lea	4(vp), vp
-	adc	(up), %eax
-	lea	4(up), up
-	adc	%edx, %edx		C save a carry bit in edx
-ifdef(`CPU_P6',`
-	adc	%edx, %edx ')		C save another carry bit in edx
-	dec	%ebx
-	mov	%eax, (rp)
-	lea	4(rp), rp
-	jnz	L(oop)
-	mov	vp, VAR_TMP
-L(exact):
-	incl	VAR_COUNT
-	jz	L(end)
-
-	ALIGN(16)
-L(top):
-ifdef(`CPU_P6',`
-	shr	%edx ')			C restore 2nd saved carry bit
-	mov	(vp), %eax
-	adc	%eax, %eax
-	mov	4(vp), %ebx
-	adc	%ebx, %ebx
-	mov	8(vp), %ecx
-	adc	%ecx, %ecx
-
-	rcr	%edx			C restore 1st saved carry bit
-
-	adc	(up), %eax
-	mov	%eax, (rp)
-	adc	4(up), %ebx
-	mov	%ebx, 4(rp)
-	adc	8(up), %ecx
-	mov	%ecx, 8(rp)
-
-	mov	12(vp), %eax
-	adc	%eax, %eax
-	mov	16(vp), %ebx
-	adc	%ebx, %ebx
-	mov	20(vp), %ecx
-	adc	%ecx, %ecx
-
-	lea	24(vp), vp
-	adc	%edx, %edx		C save a carry bit in edx
-
-	adc	12(up), %eax
-	mov	%eax, 12(rp)
-	adc	16(up), %ebx
-	mov	%ebx, 16(rp)
-	adc	20(up), %ecx
-
-	lea	24(up), up
-
-ifdef(`CPU_P6',`
-	adc	%edx, %edx ')		C save another carry bit in edx
-	mov	%ecx, 20(rp)
-	incl	VAR_COUNT
-	lea	24(rp), rp
-	jne	L(top)
-
-L(end):
-	pop	vp			FRAME_popl()
-	pop	up			FRAME_popl()
-
-ifdef(`CPU_P6',`
-	xor	%eax, %eax
-	shr	$1, %edx
-	adc	%edx, %eax
-',`
-	adc	$0, %edx
-	mov	%edx, %eax
-')
-	pop	rp			FRAME_popl()
-	pop	%ebx			FRAME_popl()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/k7/aors_n.asm b/gmp/mpn/x86/k7/aors_n.asm
index 1a08072029..d84de3ee98 100644
--- a/gmp/mpn/x86/k7/aors_n.asm
+++ b/gmp/mpn/x86/k7/aors_n.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract.
 
-dnl  Copyright 1999-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/aorsmul_1.asm b/gmp/mpn/x86/k7/aorsmul_1.asm
index eec8df6de2..b247c29131 100644
--- a/gmp/mpn/x86/k7/aorsmul_1.asm
+++ b/gmp/mpn/x86/k7/aorsmul_1.asm
@@ -1,49 +1,39 @@
 dnl  AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
 
-dnl  Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)		 6.5
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)
+C P6 model 9  (Banias)
 C P6 model 13 (Dothan)
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C AMD K6
-C AMD K7			 3.75
-C AMD K8
+C K6:
+C K7:                            3.75
+C K8:
 
 C TODO
 C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
diff --git a/gmp/mpn/x86/k7/bdiv_q_1.asm b/gmp/mpn/x86/k7/bdiv_q_1.asm
deleted file mode 100644
index df3477f539..0000000000
--- a/gmp/mpn/x86/k7/bdiv_q_1.asm
+++ /dev/null
@@ -1,244 +0,0 @@
-dnl  AMD K7 mpn_bdiv_q_1 -- mpn by limb exact division.
-
-dnl  Rearranged from mpn/x86/k7/dive_1.asm by Marco Bodrato.
-
-dnl  Copyright 2001, 2002, 2004, 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C          cycles/limb
-C Athlon:     11.0
-C Hammer:      9.0
-
-
-C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                      mp_limb_t divisor);
-C
-C The dependent chain is mul+imul+sub for 11 cycles and that speed is
-C achieved with no special effort.  The load and shrld latencies are hidden
-C by out of order execution.
-C
-C It's a touch faster on size==1 to use the mul-by-inverse than divl.
-
-defframe(PARAM_SHIFT,  24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE,   12)
-defframe(PARAM_SRC,    8)
-defframe(PARAM_DST,    4)
-
-defframe(SAVE_EBX,     -4)
-defframe(SAVE_ESI,     -8)
-defframe(SAVE_EDI,    -12)
-defframe(SAVE_EBP,    -16)
-defframe(VAR_INVERSE, -20)
-defframe(VAR_DST_END, -24)
-
-deflit(STACK_SPACE, 24)
-
-	TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C		    mp_limb_t inverse, int shift)
-	ALIGN(16)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
-	subl	$STACK_SPACE, %esp	deflit(`FRAME',STACK_SPACE)
-	movl	PARAM_SHIFT, %ecx	C shift count
-
-	movl	%ebp, SAVE_EBP
-	movl	PARAM_SIZE, %ebp
-
-	movl	%esi, SAVE_ESI
-	movl	PARAM_SRC, %esi
-
-	movl	%edi, SAVE_EDI
-	movl	PARAM_DST, %edi
-
-	movl	%ebx, SAVE_EBX
-
-	leal	(%esi,%ebp,4), %esi	C src end
-	leal	(%edi,%ebp,4), %edi	C dst end
-	negl	%ebp			C -size
-
-	movl	PARAM_INVERSE, %eax	C inv
-
-L(common):
-	movl	%eax, VAR_INVERSE
-	movl	(%esi,%ebp,4), %eax	C src[0]
-
-	incl	%ebp
-	jz	L(one)
-
-	movl	(%esi,%ebp,4), %edx	C src[1]
-
-	shrdl(	%cl, %edx, %eax)
-
-	movl	%edi, VAR_DST_END
-	xorl	%ebx, %ebx
-	jmp	L(entry)
-
-	ALIGN(8)
-L(top):
-	C eax	q
-	C ebx	carry bit, 0 or 1
-	C ecx	shift
-	C edx
-	C esi	src end
-	C edi	dst end
-	C ebp	counter, limbs, negative
-
-	mull	PARAM_DIVISOR		C carry limb in edx
-
-	movl	-4(%esi,%ebp,4), %eax
-	movl	(%esi,%ebp,4), %edi
-
-	shrdl(	%cl, %edi, %eax)
-
-	subl	%ebx, %eax		C apply carry bit
-	setc	%bl
-	movl	VAR_DST_END, %edi
-
-	subl	%edx, %eax		C apply carry limb
-	adcl	$0, %ebx
-
-L(entry):
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, -4(%edi,%ebp,4)
-	incl	%ebp
-	jnz	L(top)
-
-
-	mull	PARAM_DIVISOR		C carry limb in edx
-
-	movl	-4(%esi), %eax		C src high limb
-	shrl	%cl, %eax
-	movl	SAVE_ESI, %esi
-
-	subl	%ebx, %eax		C apply carry bit
-	movl	SAVE_EBX, %ebx
-	movl	SAVE_EBP, %ebp
-
-	subl	%edx, %eax		C apply carry limb
-
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, -4(%edi)
-	movl	SAVE_EDI, %edi
-	addl	$STACK_SPACE, %esp
-
-	ret
-
-L(one):
-	shrl	%cl, %eax
-	movl	SAVE_ESI, %esi
-	movl	SAVE_EBX, %ebx
-
-	imull	VAR_INVERSE, %eax
-
-	movl	SAVE_EBP, %ebp
-
-	movl	%eax, -4(%edi)
-	movl	SAVE_EDI, %edi
-	addl	$STACK_SPACE, %esp
-
-	ret
-EPILOGUE()
-
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                           mp_limb_t divisor);
-C
-
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_DIVISOR, %eax
-	subl	$STACK_SPACE, %esp	deflit(`FRAME',STACK_SPACE)
-	movl	$-1, %ecx		C shift count
-
-	movl	%ebp, SAVE_EBP
-	movl	PARAM_SIZE, %ebp
-
-	movl	%esi, SAVE_ESI
-	movl	%edi, SAVE_EDI
-
-	C If there's usually only one or two trailing zero bits then this
-	C should be faster than bsfl.
-L(strip_twos):
-	incl	%ecx
-	shrl	%eax
-	jnc	L(strip_twos)
-
-	movl	%ebx, SAVE_EBX
-	leal	1(%eax,%eax), %ebx	C d without twos
-	andl	$127, %eax		C d/2, 7 bits
-
-ifdef(`PIC',`
-	LEA(	binvert_limb_table, %edx)
-	movzbl	(%eax,%edx), %eax		C inv 8 bits
-',`
-	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
-')
-
-	leal	(%eax,%eax), %edx	C 2*inv
-	movl	%ebx, PARAM_DIVISOR	C d without twos
-
-	imull	%eax, %eax		C inv*inv
-
-	movl	PARAM_SRC, %esi
-	movl	PARAM_DST, %edi
-
-	imull	%ebx, %eax		C inv*inv*d
-
-	subl	%eax, %edx		C inv = 2*inv - inv*inv*d
-	leal	(%edx,%edx), %eax	C 2*inv
-
-	imull	%edx, %edx		C inv*inv
-
-	leal	(%esi,%ebp,4), %esi	C src end
-	leal	(%edi,%ebp,4), %edi	C dst end
-	negl	%ebp			C -size
-
-	imull	%ebx, %edx		C inv*inv*d
-
-	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
-
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
-	pushl	%eax	FRAME_pushl()
-	imull	PARAM_DIVISOR, %eax
-	cmpl	$1, %eax
-	popl	%eax	FRAME_popl()')
-
-	jmp	L(common)
-EPILOGUE()
diff --git a/gmp/mpn/x86/k7/dive_1.asm b/gmp/mpn/x86/k7/dive_1.asm
index 8eb4f45ac0..c994e0fb06 100644
--- a/gmp/mpn/x86/k7/dive_1.asm
+++ b/gmp/mpn/x86/k7/dive_1.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_divexact_1 -- mpn by limb exact division.
 
 dnl  Copyright 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -116,7 +105,7 @@ ifdef(`PIC',`
 
 	subl	%edx, %eax		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax	FRAME_pushl()
 	imull	PARAM_DIVISOR, %eax
 	cmpl	$1, %eax
diff --git a/gmp/mpn/x86/k7/gcd_1.asm b/gmp/mpn/x86/k7/gcd_1.asm
index c7d12c83c0..f912f43730 100644
--- a/gmp/mpn/x86/k7/gcd_1.asm
+++ b/gmp/mpn/x86/k7/gcd_1.asm
@@ -1,186 +1,369 @@
-dnl  x86 mpn_gcd_1 optimised for AMD K7.
+dnl  AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
 
-dnl  Contributed to the GNU project by by Kevin Ryde.  Rehacked by Torbjorn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C	     cycles/bit (approx)
-C AMD K7	 5.31
-C AMD K8,K9	 5.33
-C AMD K10	 5.30
-C AMD bd1	 ?
-C AMD bobcat	 7.02
-C Intel P4-2	10.1
-C Intel P4-3/4	10.0
-C Intel P6/13	 5.88
-C Intel core2	 6.26
-C Intel NHM	 6.83
-C Intel SBR	 8.50
-C Intel atom	 8.90
-C VIA nano	 ?
-C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
-
-C TODO
-C  * Tune overhead, this takes 2-3 cycles more than old code when v0 is tiny.
-C  * Stream things better through registers, avoiding some copying.
-
-C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+C K7: 6.75 cycles/bit (approx)  1x1 gcd
+C     11.0 cycles/limb          Nx1 reduction (modexact_1_odd)
+
+
+dnl  Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
+dnl  where x is the larger of the two.  See tune/README for more.
+dnl
+dnl  divl at 40 cycles compared to the gcd at about 7 cycles/bitpair
+dnl  suggests 40/7*2=11.4 but 7 seems to be about right.
+
+deflit(DIV_THRESHOLD, 7)
+
 
+C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+C
+C This is mixed in with the code, but as per the k7 optimization manual it's
+C a full cache line and suitably aligned so it won't get swapped between
+C code and data.  Having it in TEXT rather than RODATA saves needing a GOT
+C entry when PIC.
+C
+C Actually, there doesn't seem to be a measurable difference between this in
+C it's own cache line or plonked in the middle of the code.  Presumably
+C since TEXT is read-only there's no worries about coherency.
+
+deflit(MASK, 63)
 deflit(MAXSHIFT, 6)
-deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
 
-DEF_OBJECT(ctz_table,64)
+	TEXT
+	ALIGN(64)
+L(table):
 	.byte	MAXSHIFT
 forloop(i,1,MASK,
 `	.byte	m4_count_trailing_zeros(i)
 ')
-END_OBJECT(ctz_table)
 
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`DIV_THRES_LOG2', 7)
 
+C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb);
+C
+
+defframe(PARAM_LIMB,   12)
+defframe(PARAM_SIZE,    8)
+defframe(PARAM_SRC,     4)
 
-define(`up',    `%edi')
-define(`n',     `%esi')
-define(`v0',    `%edx')
+defframe(SAVE_EBX,     -4)
+defframe(SAVE_ESI,     -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+defframe(CALL_DIVISOR,-20)
+defframe(CALL_SIZE,   -24)
+defframe(CALL_SRC,    -28)
 
+deflit(STACK_SPACE, 28)
 
-ASM_START()
 	TEXT
 	ALIGN(16)
+
 PROLOGUE(mpn_gcd_1)
-	push	%edi
-	push	%esi
+deflit(`FRAME',0)
+
+	ASSERT(ne, `cmpl $0, PARAM_LIMB')	C y!=0
+	ASSERT(ae, `cmpl $1, PARAM_SIZE')	C size>=1
+
+	movl	PARAM_SRC, %eax
+	movl	PARAM_LIMB, %edx
+	subl	$STACK_SPACE, %esp	deflit(`FRAME',STACK_SPACE)
 
-	mov	12(%esp), up
-	mov	16(%esp), n
-	mov	20(%esp), v0
+	movl	%esi, SAVE_ESI
+	movl	%ebx, SAVE_EBX
 
-	mov	(up), %eax		C U low limb
-	or	v0, %eax		C x | y
-	mov	$-1, %ecx
+	movl	(%eax), %esi		C src low limb
+
+ifdef(`PIC',`
+	movl	%edi, SAVE_EDI
+	call	L(movl_eip_to_edi)
+L(here):
+	addl	$L(table)-L(here), %edi
+')
+
+	movl	%esi, %ebx
+	orl	%edx, %esi	C x|y
+	movl	$-1, %ecx
 
 L(twos):
-	inc	%ecx
-	shr	%eax
-	jnc	L(twos)
+	incl	%ecx
+	shrl	%esi
+	jnc	L(twos)		C 3/4 chance of x or y odd already
 
-	shr	%cl, v0
-	mov	%ecx, %eax		C common twos
+	shrl	%cl, %ebx
+	shrl	%cl, %edx
+	movl	%ecx, %esi	C common twos
 
-L(divide_strip_y):
-	shr	v0
-	jnc	L(divide_strip_y)
-	adc	v0, v0
-
-	push	%eax
-	push	v0
-
-	cmp	$1, n
-	jnz	L(reduce_nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	mov	(up), %ecx
-	mov	%ecx, %eax
-	shr	$DIV_THRES_LOG2, %ecx
-	cmp	%ecx, v0
-	ja	L(reduced)
-
-	mov	v0, %esi
-	xor	%edx, %edx
-	div	%esi
-	mov	%edx, %eax
-	jmp	L(reduced)
-
-L(reduce_nby1):
-ifdef(`PIC_WITH_EBX',`
-	push	%ebx
-	call	L(movl_eip_to_ebx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
+	movl	PARAM_SIZE, %ecx
+	cmpl	$1, %ecx
+	ja	L(divide)
+
+
+	C eax
+	C ebx	x
+	C ecx
+	C edx	y
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp
+
+	movl	%edx, %eax
+	cmpl	%ebx, %edx
+
+	cmovb(	%ebx, %eax)	C swap to make x bigger than y
+	cmovb(	%edx, %ebx)
+
+
+L(strip_y):
+	C eax	x
+	C ebx	y
+	C ecx
+	C edx
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp
+
+	ASSERT(nz,`orl %ebx,%ebx')
+	shrl	%ebx
+	jnc	L(strip_y)
+	rcll	%ebx
+
+
+	C eax	x
+	C ebx	y (odd)
+	C ecx
+	C edx
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp
+
+	movl	%eax, %ecx
+	movl	%ebx, %edx
+	shrl	$DIV_THRESHOLD, %eax
+
+	cmpl	%eax, %ebx
+	movl	%ecx, %eax
+	ja	L(strip_x_entry)	C do x%y if x much bigger than y
+
+
+	xorl	%edx, %edx
+
+	divl	%ebx
+
+	orl	%edx, %edx
+	movl	%edx, %eax		C remainder -> x
+	movl	%ebx, %edx		C y
+
+	jz	L(done_ebx)
+	jmp	L(strip_x)
+
+
+	C Offset 0x9D here for non-PIC.  About 0.4 cycles/bit is saved by
+	C ensuring the end of the jnz at the end of this loop doesn't cross
+	C into the next cache line at 0xC0.
+	C
+	C PIC on the other hand is offset 0xAC here and extends to 0xC9, so
+	C it crosses but doesn't suffer any measurable slowdown.
+
+L(top):
+	C eax	x
+	C ebx	y-x
+	C ecx	x-y
+	C edx	y
+	C esi	twos, for use at end
+	C edi	[PIC] L(table)
+
+	cmovc(	%ebx, %ecx)		C if x-y gave carry, use x and y-x
+	cmovc(	%eax, %edx)
+
+L(strip_x):
+	movl	%ecx, %eax
+L(strip_x_entry):
+	andl	$MASK, %ecx
+
+	ASSERT(nz, `orl %eax, %eax')
+
+ifdef(`PIC',`
+	movb	(%ecx,%edi), %cl
+',`
+	movb	L(table) (%ecx), %cl
 ')
-	push	v0			C param 3
-	push	n			C param 2
-	push	up			C param 1
-	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
-	jl	L(bmod)
-	CALL(	mpn_mod_1)
-	jmp	L(called)
-L(bmod):
-	CALL(	mpn_modexact_1_odd)
-
-L(called):
-	add	$12, %esp		C deallocate params
-ifdef(`PIC_WITH_EBX',`
-	pop	%ebx
+
+	shrl	%cl, %eax
+	cmpb	$MAXSHIFT, %cl
+
+	movl	%eax, %ecx
+	movl	%edx, %ebx
+	je	L(strip_x)
+
+	ASSERT(nz, `testl $1, %eax')	C both odd
+	ASSERT(nz, `testl $1, %edx')
+
+	subl	%eax, %ebx
+	subl	%edx, %ecx
+	jnz	L(top)
+
+
+L(done):
+	movl	%esi, %ecx
+	movl	SAVE_ESI, %esi
+ifdef(`PIC',`
+	movl	SAVE_EDI, %edi
 ')
-L(reduced):
-	pop	%edx
-
-	LEA(	ctz_table, %esi)
-	test	%eax, %eax
-	mov	%eax, %ecx
-	jnz	L(mid)
-	jmp	L(end)
-
-	ALIGN(16)			C               K8    BC    P4    NHM   SBR
-L(top):	cmovc(	%ecx, %eax)		C if x-y < 0	0
-	cmovc(	%edi, %edx)		C use x,y-x	0
-L(mid):	and	$MASK, %ecx		C		0
-	movzbl	(%esi,%ecx), %ecx	C		1
-	jz	L(shift_alot)		C		1
-	shr	%cl, %eax		C		3
-	mov	%eax, %edi		C		4
-	mov	%edx, %ecx		C		3
-	sub	%eax, %ecx		C		4
-	sub	%edx, %eax		C		4
-	jnz	L(top)			C		5
-
-L(end):	pop	%ecx
-	mov	%edx, %eax
-	shl	%cl, %eax
-	pop	%esi
-	pop	%edi
-	ret
 
-L(shift_alot):
-	shr	$MAXSHIFT, %eax
-	mov	%eax, %ecx
-	jmp	L(mid)
+	shll	%cl, %eax
+	movl	SAVE_EBX, %ebx
+	addl	$FRAME, %esp
 
-ifdef(`PIC_WITH_EBX',`
-L(movl_eip_to_ebx):
-	mov	(%esp), %ebx
 	ret
+
+
+
+C -----------------------------------------------------------------------------
+C two or more limbs
+
+dnl  MODEXACT_THRESHOLD is the size at which it's better to call
+dnl  mpn_modexact_1_odd than do an inline loop.
+
+deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5))
+
+L(divide):
+	C eax	src
+	C ebx
+	C ecx	size
+	C edx	y
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp
+
+L(divide_strip_y):
+	ASSERT(nz,`orl %edx,%edx')
+	shrl	%edx
+	jnc	L(divide_strip_y)
+	leal	1(%edx,%edx), %ebx		C y now odd
+
+	movl	%ebp, SAVE_EBP
+	movl	%eax, %ebp
+	movl	-4(%eax,%ecx,4), %eax		C src high limb
+
+	cmp	$MODEXACT_THRESHOLD, %ecx
+	jae	L(modexact)
+
+	cmpl	%ebx, %eax			C high cmp divisor
+	movl	$0, %edx
+
+	cmovc(	%eax, %edx)			C skip a div if high<divisor
+	sbbl	$0, %ecx
+
+
+L(divide_top):
+	C eax	scratch (quotient)
+	C ebx	y
+	C ecx	counter (size to 1, inclusive)
+	C edx	carry (remainder)
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp	src
+
+	movl	-4(%ebp,%ecx,4), %eax
+
+	divl	%ebx
+
+	decl	%ecx
+	jnz	L(divide_top)
+
+
+	C eax
+	C ebx	y (odd)
+	C ecx
+	C edx	x
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp
+
+	orl	%edx, %edx
+	movl	SAVE_EBP, %ebp
+	movl	%edx, %eax
+
+	movl	%edx, %ecx
+	movl	%ebx, %edx
+	jnz	L(strip_x_entry)
+
+
+L(done_ebx):
+	movl	%ebx, %eax
+	jmp	L(done)
+
+
+
+L(modexact):
+	C eax
+	C ebx	y
+	C ecx	size
+	C edx
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp	src
+
+ifdef(`PIC',`
+	movl	%ebp, CALL_SRC
+	movl	%ebx, %ebp		C y
+	movl	%edi, %ebx		C L(table)
+
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx
+	movl	%ebp, CALL_DIVISOR
+	movl	%ecx, CALL_SIZE
+
+	call	GSYM_PREFIX`'mpn_modexact_1_odd@PLT
+',`
+dnl non-PIC
+	movl	%ebx, CALL_DIVISOR
+	movl	%ebp, CALL_SRC
+	movl	%ecx, CALL_SIZE
+
+	call	GSYM_PREFIX`'mpn_modexact_1_odd
 ')
+
+	C eax	x
+	C ebx	[non-PIC] y
+	C ecx
+	C edx
+	C esi	common twos
+	C edi	[PIC] L(table)
+	C ebp	[PIC] y
+
+	orl	%eax, %eax
+	movl	ifdef(`PIC',`%ebp',`%ebx'), %edx
+	movl	SAVE_EBP, %ebp
+
+	movl	%eax, %ecx
+	jnz	L(strip_x_entry)
+
+	movl	%edx, %eax
+	jmp	L(done)
+
+
+ifdef(`PIC', `
+L(movl_eip_to_edi):
+	movl	(%esp), %edi
+	ret_internal
+')
+
 EPILOGUE()
diff --git a/gmp/mpn/x86/k7/gmp-mparam.h b/gmp/mpn/x86/k7/gmp-mparam.h
index 9977a113e2..ced0c020f7 100644
--- a/gmp/mpn/x86/k7/gmp-mparam.h
+++ b/gmp/mpn/x86/k7/gmp-mparam.h
@@ -1,241 +1,73 @@
 /* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2008 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2083 MHz K7 Barton */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.2 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        24
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              3
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                85
-#define MUL_TOOM44_THRESHOLD               147
-#define MUL_TOOM6H_THRESHOLD               216
-#define MUL_TOOM8H_THRESHOLD               309
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      85
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      98
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     124
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 50
-#define SQR_TOOM3_THRESHOLD                 81
-#define SQR_TOOM4_THRESHOLD                216
-#define SQR_TOOM6_THRESHOLD                306
-#define SQR_TOOM8_THRESHOLD                446
-
-#define MULMID_TOOM42_THRESHOLD             56
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define MUL_FFT_MODF_THRESHOLD             904  /* k = 6 */
-#define MUL_FFT_TABLE3                                      \
-  { {    904, 6}, {     21, 7}, {     11, 6}, {     25, 7}, \
-    {     13, 6}, {     27, 7}, {     15, 6}, {     31, 7}, \
-    {     17, 6}, {     35, 7}, {     19, 6}, {     39, 7}, \
-    {     23, 6}, {     47, 7}, {     27, 8}, {     15, 7}, \
-    {     31, 6}, {     63, 7}, {     35, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 7}, {     47, 8}, {     31, 7}, \
-    {     63, 8}, {     39, 7}, {     79, 9}, {     23, 8}, \
-    {     47, 7}, {     95, 8}, {     51, 9}, {     31, 8}, \
-    {     71, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     63, 8}, \
-    {    127, 9}, {     71, 8}, {    143, 9}, {     79, 8}, \
-    {    159,10}, {     47, 9}, {     95, 8}, {    191, 9}, \
-    {    103,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    143,10}, {     79, 9}, {    167,10}, \
-    {     95, 9}, {    199,10}, {    111,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
-    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
-    {    383,10}, {    207,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271, 8}, {   1087,10}, \
-    {    287,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    191,10}, {    383, 9}, {    767, 8}, {   1535, 9}, \
-    {    799, 8}, {   1599,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    511, 9}, {   1023,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    575, 9}, {   1151,10}, \
-    {    607, 9}, {   1215, 8}, {   2431,11}, {    319,10}, \
-    {    639, 9}, {   1279,10}, {    671, 9}, {   1343,12}, \
-    {    191,11}, {    383,10}, {    767, 9}, {   1535,10}, \
-    {    799, 9}, {   1599,10}, {    831, 9}, {   1663,10}, \
-    {    863,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    543,10}, {   1087,11}, {    575,10}, \
-    {   1151,11}, {    607,10}, {   1215, 9}, {   2431,12}, \
-    {    319,11}, {    639,10}, {   1407,11}, {    735,10}, \
-    {   1471, 9}, {   2943,12}, {    383,11}, {    767,10}, \
-    {   1535,11}, {    799,10}, {   1599,11}, {    831,10}, \
-    {   1663,11}, {    895,10}, {   1791,11}, {    959,10}, \
-    {   1919,13}, {    255,12}, {    511,11}, {   1023,10}, \
-    {   2047,11}, {   1087,12}, {    575,11}, {   1151,10}, \
-    {   2303,11}, {   1215,10}, {   2431,12}, {    639,11}, \
-    {   1279,10}, {   2559,11}, {   1407,10}, {   2815,11}, \
-    {   1471,10}, {   2943,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1663,12}, {    895,11}, \
-    {   1791,10}, {   3583,12}, {    959,11}, {   1919,10}, \
-    {   3839,14}, {    255,13}, {    511,12}, {   1023,11}, \
-    {   2047,12}, {   1087,11}, {   2175,12}, {   1151,11}, \
-    {   2303,12}, {   1215,11}, {   2431,13}, {    639,12}, \
-    {   1407,11}, {   2815,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1663,11}, {   3327,13}, {    895,12}, \
-    {   1791,11}, {   3583,12}, {   1919,11}, {   3839,12}, \
-    {   1983,11}, {   3967,14}, {    511,13}, {   1023,12}, \
-    {   2239,13}, {   1151,12}, {   2495,13}, {   1279,12}, \
-    {   2559,13}, {   1407,12}, {   2943,11}, {   5887,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,12}, \
-    {   3327,13}, {   1791,12}, {   3583,13}, {   1919,12}, \
-    {   3967,15}, {    511,14}, {   1023,13}, {   2047,12}, \
-    {   4095,13}, {   2175,12}, {   4351,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2559,12}, {   5119,13}, \
-    {   2943,12}, {   5887,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 228
-#define MUL_FFT_THRESHOLD                 7808
-
-#define SQR_FFT_MODF_THRESHOLD             888  /* k = 6 */
-#define SQR_FFT_TABLE3                                      \
-  { {    888, 6}, {     21, 7}, {     11, 6}, {     25, 7}, \
-    {     13, 6}, {     27, 7}, {     15, 6}, {     31, 7}, \
-    {     17, 6}, {     35, 7}, {     19, 6}, {     39, 7}, \
-    {     23, 6}, {     47, 7}, {     27, 8}, {     15, 7}, \
-    {     31, 6}, {     63, 7}, {     35, 8}, {     19, 7}, \
-    {     39, 8}, {     23, 7}, {     47, 8}, {     31, 7}, \
-    {     63, 8}, {     39, 9}, {     23, 8}, {     47, 7}, \
-    {     95, 8}, {     51, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     79,10}, {     47, 9}, {     95, 8}, {    191,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255, 9}, \
-    {    143,10}, {     79, 9}, {    167,10}, {     95, 9}, \
-    {    191,10}, {    111,11}, {     63,10}, {    127, 9}, \
-    {    255, 8}, {    511,10}, {    143, 9}, {    287, 8}, \
-    {    575,10}, {    159,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543, 8}, {   1087,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
-    {    639, 8}, {   1279, 9}, {    671,11}, {    191,10}, \
-    {    383, 9}, {    799, 8}, {   1599, 9}, {    831,11}, \
-    {    223,12}, {    127,11}, {    255,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    575, 9}, {   1215, 8}, \
-    {   2431,11}, {    319,10}, {    639, 9}, {   1279,10}, \
-    {    671, 9}, {   1407,12}, {    191,10}, {    799, 9}, \
-    {   1599,10}, {    831, 9}, {   1663,10}, {    863, 9}, \
-    {   1727,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087, 9}, \
-    {   2175,10}, {   1119,11}, {    575,10}, {   1151,11}, \
-    {    607,10}, {   1215, 9}, {   2431,12}, {    319,11}, \
-    {    639,10}, {   1279,11}, {    671,10}, {   1343, 9}, \
-    {   2687,11}, {    703,10}, {   1407,11}, {    735,10}, \
-    {   1471, 9}, {   2943,10}, {   1503,12}, {    383,11}, \
-    {    767,10}, {   1535,11}, {    799,10}, {   1599,11}, \
-    {    863,10}, {   1727,12}, {    447,11}, {    895,10}, \
-    {   1791,11}, {    959,10}, {   1919,13}, {    255,12}, \
-    {    511,11}, {   1023,10}, {   2047,11}, {   1087,10}, \
-    {   2175,11}, {   1119,12}, {    575,11}, {   1151,10}, \
-    {   2303,11}, {   1215,10}, {   2431,12}, {    639,11}, \
-    {   1407,10}, {   2815,11}, {   1471,10}, {   2943,12}, \
-    {    767,11}, {   1599,12}, {    831,11}, {   1663,10}, \
-    {   3327,12}, {    895,11}, {   1791,10}, {   3583,12}, \
-    {    959,11}, {   1919,10}, {   3839,11}, {   1983,14}, \
-    {    255,13}, {    511,12}, {   1023,11}, {   2047,12}, \
-    {   1087,11}, {   2175,12}, {   1151,11}, {   2303,12}, \
-    {   1215,11}, {   2431,13}, {    639,12}, {   1407,11}, \
-    {   2815,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1663,11}, {   3327,12}, {   1727,13}, {    895,12}, \
-    {   1791,11}, {   3583,12}, {   1919,11}, {   3839,12}, \
-    {   1983,11}, {   3967,14}, {    511,13}, {   1023,12}, \
-    {   2175,13}, {   1151,12}, {   2495,13}, {   1279,12}, \
-    {   2559,13}, {   1407,12}, {   2943,11}, {   5887,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,12}, \
-    {   3327,13}, {   1791,12}, {   3583,13}, {   1919,12}, \
-    {   3967,15}, {    511,14}, {   1023,13}, {   2047,12}, \
-    {   4095,13}, {   2175,12}, {   4351,13}, {   2431,14}, \
-    {   1279,13}, {   2943,12}, {   5887,14}, {  16384,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 229
-#define SQR_FFT_THRESHOLD                 7552
-
-#define MULLO_BASECASE_THRESHOLD             8
-#define MULLO_DC_THRESHOLD                  36
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 45
-#define DC_DIVAPPR_Q_THRESHOLD             208
-#define DC_BDIV_QR_THRESHOLD                43
-#define DC_BDIV_Q_THRESHOLD                140
-
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD               204
-#define INV_APPR_THRESHOLD                 204
-
-#define BINV_NEWTON_THRESHOLD              230
-#define REDC_1_TO_REDC_N_THRESHOLD          59
-
-#define MU_DIV_QR_THRESHOLD               1752
-#define MU_DIVAPPR_Q_THRESHOLD            1528
-#define MUPI_DIV_QR_THRESHOLD               82
-#define MU_BDIV_QR_THRESHOLD              1360
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  1,16,102,336,1221
-
-#define MATRIX22_STRASSEN_THRESHOLD         16
-#define HGCD_THRESHOLD                     120
-#define HGCD_APPR_THRESHOLD                143
-#define HGCD_REDUCE_THRESHOLD             4818
-#define GCD_DC_THRESHOLD                   474
-#define GCDEXT_DC_THRESHOLD                345
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        33
-#define SET_STR_DC_THRESHOLD               298
-#define SET_STR_PRECOMPUTE_THRESHOLD      1187
-
-#define FAC_DSC_THRESHOLD                  602
-#define FAC_ODD_THRESHOLD                   29
+/* 2083 MHz Athlon */
+
+/* Generated by tuneup.c, 2008-12-23, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          28
+#define MUL_TOOM3_THRESHOLD              89
+#define MUL_TOOM44_THRESHOLD            130
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          52
+#define SQR_TOOM3_THRESHOLD              89
+#define SQR_TOOM4_THRESHOLD             196
+
+#define MULLOW_BASECASE_THRESHOLD        10
+#define MULLOW_DC_THRESHOLD              96
+#define MULLOW_MUL_N_THRESHOLD          234
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 86
+#define POWM_THRESHOLD                  134
+#define MATRIX22_STRASSEN_THRESHOLD      18
+#define HGCD_THRESHOLD                  163
+#define GCD_DC_THRESHOLD                665
+#define GCDEXT_DC_THRESHOLD             605
+#define JACOBI_BASE_METHOD                1
+
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             19
+#define GET_STR_PRECOMPUTE_THRESHOLD     35
+#define SET_STR_DC_THRESHOLD            826
+#define SET_STR_PRECOMPUTE_THRESHOLD   1691
+
+#define MUL_FFT_TABLE  { 432, 864, 1664, 4608, 10240, 40960, 163840, 655360, 0 }
+#define MUL_FFT_MODF_THRESHOLD          496
+#define MUL_FFT_THRESHOLD              4864
+
+#define SQR_FFT_TABLE  { 432, 864, 1664, 4608, 10240, 40960, 98304, 655360, 0 }
+#define SQR_FFT_MODF_THRESHOLD          432
+#define SQR_FFT_THRESHOLD              3840
+
+/* These tables need to be updated.  */
+
+#define MUL_FFT_TABLE2 {{1, 4}, {401, 5}, {801, 6}, {817, 5}, {865, 6}, {1025, 5}, {1057, 6}, {1601, 7}, {1633, 6}, {1729, 7}, {1921, 6}, {2113, 7}, {2177, 6}, {2241, 7}, {2433, 6}, {2497, 7}, {2945, 6}, {3009, 7}, {3457, 8}, {3521, 7}, {4481, 8}, {4865, 7}, {5249, 8}, {5889, 7}, {6017, 8}, {7553, 9}, {7681, 8}, {9985, 9}, {11777, 8}, {13057, 9}, {13825, 8}, {14081, 9}, {15873, 8}, {16641, 9}, {16897, 8}, {17153, 9}, {19969, 8}, {20225, 9}, {20737, 8}, {20993, 9}, {24065, 8}, {24577, 9}, {25089, 8}, {25345, 9}, {27393, 10}, {27649, 9}, {28161, 10}, {31745, 9}, {38913, 10}, {39425, 9}, {40449, 10}, {48129, 9}, {48641, 11}, {63489, 10}, {98305, 11}, {99329, 10}, {100353, 11}, {101377, 10}, {103425, 11}, {104449, 10}, {110593, 11}, {112641, 10}, {113665, 11}, {129025, 10}, {162817, 11}, {194561, 10}, {195585, 12}, {258049, 11}, {391169, 12}, {520193, 11}, {718849, 12}, {782337, 11}, {849921, 13}, {1040385, 12}, {2879489, 13}, {3137537, 12}, {3928065, 13}, {4186113, 12}, {4976641, 13}, {5234689, 12}, {6025217, 13}, {6283265, 12}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1, 4}, {401, 5}, {417, 4}, {433, 5}, {881, 6}, {961, 5}, {993, 6}, {1857, 7}, {1921, 6}, {2049, 7}, {2177, 6}, {2241, 7}, {2433, 6}, {2497, 7}, {3457, 8}, {3841, 7}, {4481, 8}, {4609, 7}, {4737, 8}, {4865, 7}, {5249, 8}, {5889, 7}, {6273, 8}, {7041, 9}, {7681, 8}, {9985, 9}, {10241, 8}, {10497, 9}, {11777, 8}, {13057, 9}, {15873, 8}, {16385, 9}, {16897, 8}, {17153, 9}, {19969, 8}, {20225, 9}, {20737, 8}, {20993, 9}, {24065, 8}, {24321, 9}, {24577, 10}, {24833, 9}, {25601, 10}, {27137, 9}, {27649, 10}, {31745, 9}, {38401, 10}, {38913, 9}, {40449, 10}, {48129, 9}, {48641, 11}, {63489, 10}, {99329, 11}, {101377, 10}, {103425, 11}, {104449, 10}, {107521, 11}, {110593, 10}, {113665, 11}, {129025, 10}, {154625, 11}, {155649, 10}, {162817, 11}, {194561, 12}, {258049, 11}, {391169, 12}, {520193, 11}, {718849, 12}, {727041, 11}, {729089, 12}, {782337, 11}, {849921, 13}, {1040385, 12}, {2879489, 13}, {3137537, 12}, {3928065, 13}, {4186113, 12}, {4714497, 13}, {5234689, 12}, {6025217, 13}, {6283265, 12}, {7073793, 13}, {7331841, 12}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/x86/k7/invert_limb.asm b/gmp/mpn/x86/k7/invert_limb.asm
deleted file mode 100644
index 6cce455a9d..0000000000
--- a/gmp/mpn/x86/k7/invert_limb.asm
+++ /dev/null
@@ -1,193 +0,0 @@
-dnl  x86 mpn_invert_limb
-
-dnl  Contributed to the GNU project by Niels Möller
-
-dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles (approx)	div
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9  (Banias)		 ?
-C P6 model 13 (Dothan)		 ?
-C P4 model 0  (Willamette)	 ?
-C P4 model 1  (?)		 ?
-C P4 model 2  (Northwood)	 ?
-C P4 model 3  (Prescott)	 ?
-C P4 model 4  (Nocona)		 ?
-C AMD K6			 ?
-C AMD K7			41		53
-C AMD K8			 ?
-
-C TODO
-C  * These c/l numbers are for a non-PIC build.  Consider falling back to using
-C    the 'div' instruction for PIC builds.
-C  * Perhaps use this file--or at least the algorithm--for more machines than k7.
-
-C Register usage:
-C   Input D in %edi
-C   Current approximation is in %eax and/or %ecx
-C   %ebx and %edx are temporaries
-C   %esi and %ebp are unused
-
-defframe(PARAM_DIVISOR,4)
-
-ASM_START()
-
-C Make approx_tab global to work around Apple relocation bug.
-ifdef(`DARWIN',`
-	deflit(`approx_tab', MPN(invert_limb_tab))
-	GLOBL	approx_tab')
-
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_invert_limb)
-deflit(`FRAME', 0)
-	mov	PARAM_DIVISOR, %eax
-	C Avoid push/pop on k7.
-	sub	$8, %esp	FRAME_subl_esp(8)
-	mov	%ebx, (%esp)
-	mov	%edi, 4(%esp)
-
-	mov	%eax, %edi
-	shr	$22, %eax
-ifdef(`PIC',`
-	LEA(	approx_tab, %ebx)
-	movzwl	-1024(%ebx, %eax, 2), %eax
-',`
-	movzwl	-1024+approx_tab(%eax, %eax), %eax	C %eax = v0
-')
-
-	C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
-	mov	%eax, %ecx
-	imul	%eax, %eax
-	mov	%edi, %ebx
-	shr	$11, %ebx
-	inc	%ebx
-	mul	%ebx
-	mov	%edi, %ebx				C Prepare
-	shr	%ebx
-	sbb	%eax, %eax
-	sub	%eax, %ebx				C %ebx = d_31, %eax = mask
-	shl	$4, %ecx
-	dec	%ecx
-	sub	%edx, %ecx				C %ecx = v1
-
-	C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
-	imul	%ecx, %ebx
-	and	%ecx, %eax
-	shr	%eax
-	sub	%ebx, %eax
-	mul	%ecx
-	mov	%edi, %eax				C Prepare for next mul
-	shl	$15, %ecx
-	shr	%edx
-	add	%edx, %ecx				C %ecx = v2
-
-	mul	%ecx
-	add	%edi, %eax
-	mov	%ecx, %eax
-	adc	%edi, %edx
-	sub	%edx, %eax				C %eax = v3
-
-	mov	(%esp), %ebx
-	mov	4(%esp), %edi
-	add	$8, %esp
-
-	ret
-
-EPILOGUE()
-
-DEF_OBJECT(approx_tab,2)
-	.value	0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
-	.value	0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
-	.value	0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
-	.value	0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
-	.value	0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
-	.value	0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
-	.value	0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
-	.value	0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
-	.value	0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
-	.value	0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
-	.value	0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
-	.value	0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
-	.value	0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
-	.value	0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
-	.value	0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
-	.value	0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
-	.value	0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
-	.value	0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
-	.value	0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
-	.value	0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
-	.value	0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
-	.value	0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
-	.value	0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
-	.value	0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
-	.value	0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
-	.value	0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
-	.value	0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
-	.value	0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
-	.value	0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
-	.value	0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
-	.value	0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
-	.value	0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
-	.value	0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
-	.value	0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
-	.value	0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
-	.value	0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
-	.value	0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
-	.value	0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
-	.value	0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
-	.value	0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
-	.value	0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
-	.value	0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
-	.value	0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
-	.value	0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
-	.value	0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
-	.value	0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
-	.value	0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
-	.value	0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
-	.value	0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
-	.value	0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
-	.value	0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
-	.value	0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
-	.value	0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
-	.value	0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
-	.value	0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
-	.value	0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
-	.value	0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
-	.value	0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
-	.value	0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
-	.value	0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
-	.value	0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
-	.value	0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
-	.value	0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
-	.value	0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
-END_OBJECT(approx_tab)
diff --git a/gmp/mpn/x86/k7/mmx/com.asm b/gmp/mpn/x86/k7/mmx/com_n.asm
index a258c224f1..068c01f076 100644
--- a/gmp/mpn/x86/k7/mmx/com.asm
+++ b/gmp/mpn/x86/k7/mmx/com_n.asm
@@ -1,32 +1,21 @@
-dnl  AMD Athlon mpn_com -- mpn bitwise one's complement.
+dnl  AMD Athlon mpn_com_n -- mpn bitwise one's complement.
 
 dnl  Copyright 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -34,7 +23,7 @@ include(`../config.m4')
 C K7: 1.0 cycles/limb
 
 
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C
 C The loop form below is necessary for the claimed speed.  It needs to be
 C aligned to a 16 byte boundary and only 16 bytes long.  Maybe that's so it
@@ -62,7 +51,7 @@ defframe(PARAM_DST, 4)
 	TEXT
 	ALIGN(16)
 
-PROLOGUE(mpn_com)
+PROLOGUE(mpn_com_n)
 deflit(`FRAME',0)
 
 	movl	PARAM_DST, %edx
diff --git a/gmp/mpn/x86/k7/mmx/copyd.asm b/gmp/mpn/x86/k7/mmx/copyd.asm
index 59ece40920..4601fcd75a 100644
--- a/gmp/mpn/x86/k7/mmx/copyd.asm
+++ b/gmp/mpn/x86/k7/mmx/copyd.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_copyd -- copy limb vector, decrementing.
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/mmx/copyi.asm b/gmp/mpn/x86/k7/mmx/copyi.asm
index 9a28f927ec..a17d575ff4 100644
--- a/gmp/mpn/x86/k7/mmx/copyi.asm
+++ b/gmp/mpn/x86/k7/mmx/copyi.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_copyi -- copy limb vector, incrementing.
 
 dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/mmx/divrem_1.asm b/gmp/mpn/x86/k7/mmx/divrem_1.asm
index cf343280bb..fa5824c7b9 100644
--- a/gmp/mpn/x86/k7/mmx/divrem_1.asm
+++ b/gmp/mpn/x86/k7/mmx/divrem_1.asm
@@ -1,33 +1,22 @@
 dnl  AMD K7 mpn_divrem_1, mpn_divrem_1c, mpn_preinv_divrem_1 -- mpn by limb
 dnl  division.
 
-dnl  Copyright 1999-2002, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -456,7 +445,7 @@ C chain, and nothing better than 18 cycles has been found when using it.
 C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will
 C be an extremely rare event.
 C
-C Branch mispredictions will hit random occurrences of q1==0xFFFFFFFF, but
+C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but
 C if some special data is coming out with this always, the q1_ff special
 C case actually runs at 15 c/l.  0x2FFF...FFFD divided by 3 is a good way to
 C induce the q1_ff case, for speed measurements or testing.  Note that
@@ -735,12 +724,12 @@ C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always.
 C rnd() means rounding down to a multiple of d.
 C
 C	m*n2 + b*n2 <= m*(d-1) + b*(d-1)
-C		     = m*d + b*d - m - b
-C		     = floor((b(b-d)-1)/d)*d + b*d - m - b
-C		     = rnd(b(b-d)-1) + b*d - m - b
-C		     = rnd(b(b-d)-1 + b*d) - m - b
-C		     = rnd(b*b-1) - m - b
-C		     <= (b-2)*b
+C	             = m*d + b*d - m - b
+C	             = floor((b(b-d)-1)/d)*d + b*d - m - b
+C	             = rnd(b(b-d)-1) + b*d - m - b
+C	             = rnd(b(b-d)-1 + b*d) - m - b
+C	             = rnd(b*b-1) - m - b
+C	             <= (b-2)*b
 C
 C Unchanged from the general case is that the final quotient limb q can be
 C either q1 or q1+1, and the q1+1 case occurs often.  This can be seen from
diff --git a/gmp/mpn/x86/k7/mmx/lshift.asm b/gmp/mpn/x86/k7/mmx/lshift.asm
index b3383cf2c3..b3bff8ffd1 100644
--- a/gmp/mpn/x86/k7/mmx/lshift.asm
+++ b/gmp/mpn/x86/k7/mmx/lshift.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_lshift -- mpn left shift.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/mmx/mod_1.asm b/gmp/mpn/x86/k7/mmx/mod_1.asm
new file mode 100644
index 0000000000..2b42e55caf
--- /dev/null
+++ b/gmp/mpn/x86/k7/mmx/mod_1.asm
@@ -0,0 +1,509 @@
+dnl  AMD K7 mpn_mod_1 -- mpn by limb remainder.
+
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 17.0 cycles/limb.
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                       mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                             mp_limb_t inverse);
+C
+C The code here is the same as mpn_divrem_1, but with the quotient
+C discarded.  See mpn/x86/k7/mmx/divrem_1.c for some comments.
+
+
+dnl  MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl  used, rather than plain "divl"s.  Minimum value 2.
+dnl
+dnl  The inverse takes about 50 cycles to calculate, but after that the
+dnl  multiply is 17 c/l versus division at 41 c/l.
+dnl
+dnl  Using mul or div is about the same speed at 3 limbs, so the threshold
+dnl  is set to 4 to get the smaller div code used at 3.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_INVERSE,16)  dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY,  16)  dnl mpn_mod_1c
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,    8)
+defframe(PARAM_SRC,     4)
+
+defframe(SAVE_EBX,    -4)
+defframe(SAVE_ESI,    -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+
+defframe(VAR_NORM,    -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC_STOP,-28)
+
+deflit(STACK_SPACE, 28)
+
+	TEXT
+
+	ALIGN(32)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+	movl	PARAM_SRC, %ecx
+	movl	PARAM_SIZE, %eax
+	subl	$STACK_SPACE, %esp	FRAME_subl_esp(STACK_SPACE)
+
+	movl	%ebp, SAVE_EBP
+	movl	PARAM_DIVISOR, %ebp
+
+	movl	%edi, SAVE_EDI
+	movl	PARAM_INVERSE, %edx
+
+	movl	%esi, SAVE_ESI
+	movl	-4(%ecx,%eax,4), %edi		C src high limb
+	leal	-16(%ecx,%eax,4), %ecx		C &src[size-4]
+
+	movl	%ebx, SAVE_EBX
+	movl	PARAM_INVERSE, %edx
+
+	movl	$0, VAR_NORM			C l==0
+
+	movl	%edi, %esi
+	subl	%ebp, %edi			C high-divisor
+
+	cmovc(	%esi, %edi)			C restore if underflow
+	decl	%eax
+	jz	L(done_edi)			C size==1, high-divisor only
+
+	movl	8(%ecx), %esi			C src second high limb
+	movl	%edx, VAR_INVERSE
+
+	movl	$32, %ebx			C 32-l
+	decl	%eax
+	jz	L(inverse_one_left)		C size==2, one divide
+
+	movd	%ebx, %mm7			C 32-l
+	decl	%eax
+	jz	L(inverse_two_left)		C size==3, two divides
+
+	jmp	L(inverse_top)			C size>=4
+
+
+L(done_edi):
+	movl	SAVE_ESI, %esi
+	movl	SAVE_EBP, %ebp
+	movl	%edi, %eax
+
+	movl	SAVE_EDI, %edi
+	addl	$STACK_SPACE, %esp
+
+	ret
+
+EPILOGUE()
+
+
+	ALIGN(32)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+	movl	PARAM_CARRY, %edx
+	movl	PARAM_SIZE, %ecx
+	subl	$STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+	movl	%ebp, SAVE_EBP
+	movl	PARAM_DIVISOR, %ebp
+
+	movl	%esi, SAVE_ESI
+	movl	PARAM_SRC, %esi
+	jmp	L(start_1c)
+
+EPILOGUE()
+
+
+	ALIGN(32)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	movl	$0, %edx		C initial carry (if can't skip a div)
+	subl	$STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+	movl	%esi, SAVE_ESI
+	movl	PARAM_SRC, %esi
+
+	movl	%ebp, SAVE_EBP
+	movl	PARAM_DIVISOR, %ebp
+
+	orl	%ecx, %ecx
+	jz	L(divide_done)
+
+	movl	-4(%esi,%ecx,4), %eax	C src high limb
+
+	cmpl	%ebp, %eax		C carry flag if high<divisor
+
+	cmovc(	%eax, %edx)		C src high limb as initial carry
+	sbbl	$0, %ecx		C size-1 to skip one div
+	jz	L(divide_done)
+
+
+	ALIGN(16)
+L(start_1c):
+	C eax
+	C ebx
+	C ecx	size
+	C edx	carry
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	cmpl	$MUL_THRESHOLD, %ecx
+	jae	L(mul_by_inverse)
+
+
+
+C With a MUL_THRESHOLD of 4, this "loop" only ever does 1 to 3 iterations,
+C but it's already fast and compact, and there's nothing to gain by
+C expanding it out.
+C
+C Using PARAM_DIVISOR in the divl is a couple of cycles faster than %ebp.
+
+	orl	%ecx, %ecx
+	jz	L(divide_done)
+
+
+L(divide_top):
+	C eax	scratch (quotient)
+	C ebx
+	C ecx	counter, limbs, decrementing
+	C edx	scratch (remainder)
+	C esi	src
+	C edi
+	C ebp
+
+	movl	-4(%esi,%ecx,4), %eax
+
+	divl	PARAM_DIVISOR
+
+	decl	%ecx
+	jnz	L(divide_top)
+
+
+L(divide_done):
+	movl	SAVE_ESI, %esi
+	movl	SAVE_EBP, %ebp
+	addl	$STACK_SPACE, %esp
+
+	movl	%edx, %eax
+
+	ret
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+	C eax
+	C ebx
+	C ecx	size
+	C edx	carry
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	bsrl	%ebp, %eax		C 31-l
+
+	movl	%ebx, SAVE_EBX
+	movl	%ecx, %ebx		C size
+
+	movl	%edi, SAVE_EDI
+	movl	$31, %ecx
+
+	movl	%edx, %edi		C carry
+	movl	$-1, %edx
+
+	C
+
+	xorl	%eax, %ecx		C l
+	incl	%eax			C 32-l
+
+	shll	%cl, %ebp		C d normalized
+	movl	%ecx, VAR_NORM
+
+	movd	%eax, %mm7		C 32-l
+
+	movl	$-1, %eax
+	subl	%ebp, %edx		C (b-d)-1 so  edx:eax = b*(b-d)-1
+
+	divl	%ebp			C floor (b*(b-d)-1) / d
+
+	C
+
+	movl	%eax, VAR_INVERSE
+	leal	-12(%esi,%ebx,4), %eax	C &src[size-3]
+
+	movl	8(%eax), %esi		C src high limb
+	movl	4(%eax), %edx		C src second highest limb
+
+	shldl(	%cl, %esi, %edi)	C n2 = carry,high << l
+
+	shldl(	%cl, %edx, %esi)	C n10 = high,second << l
+
+	movl	%eax, %ecx		C &src[size-3]
+
+
+ifelse(MUL_THRESHOLD,2,`
+	cmpl	$2, %ebx
+	je	L(inverse_two_left)
+')
+
+
+C The dependent chain here is the same as in mpn_divrem_1, but a few
+C instructions are saved by not needing to store the quotient limbs.
+C Unfortunately this doesn't get the code down to the theoretical 16 c/l.
+C
+C There's four dummy instructions in the loop, all of which are necessary
+C for the claimed 17 c/l.  It's a 1 to 3 cycle slowdown if any are removed,
+C or changed from load to store or vice versa.  They're not completely
+C random, since they correspond to what mpn_divrem_1 has, but there's no
+C obvious reason why they're necessary.  Presumably they induce something
+C good in the out of order execution, perhaps through some load/store
+C ordering and/or decoding effects.
+C
+C The q1==0xFFFFFFFF case is handled here the same as in mpn_divrem_1.  On
+C on special data that comes out as q1==0xFFFFFFFF always, the loop runs at
+C about 13.5 c/l.
+
+	ALIGN(32)
+L(inverse_top):
+	C eax	scratch
+	C ebx	scratch (nadj, q1)
+	C ecx	src pointer, decrementing
+	C edx	scratch
+	C esi	n10
+	C edi	n2
+	C ebp	divisor
+	C
+	C mm0	scratch (src qword)
+	C mm7	rshift for normalization
+
+	cmpl	$0x80000000, %esi  C n1 as 0=c, 1=nc
+	movl	%edi, %eax         C n2
+	movl	PARAM_SIZE, %ebx   C dummy
+
+	leal	(%ebp,%esi), %ebx
+	cmovc(	%esi, %ebx)	   C nadj = n10 + (-n1 & d), ignoring overflow
+	sbbl	$-1, %eax          C n2+n1
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	movq	(%ecx), %mm0       C next src limb and the one below it
+	subl	$4, %ecx
+
+	movl	%ecx, PARAM_SIZE   C dummy
+
+	C
+
+	addl	%ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+	leal	1(%edi), %ebx      C n2+1
+	movl	%ebp, %eax	   C d
+
+	C
+
+	adcl	%edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+	jz	L(q1_ff)
+	nop			   C dummy
+
+	mull	%ebx		   C (q1+1)*d
+
+	psrlq	%mm7, %mm0
+	leal	(%ecx), %ecx	   C dummy
+
+	C
+
+	C
+
+	subl	%eax, %esi	   C low  n - (q1+1)*d
+	movl	PARAM_SRC, %eax
+
+	C
+
+	sbbl	%edx, %edi	   C high n - (q1+1)*d, 0 or -1
+	movl	%esi, %edi	   C remainder -> n2
+	leal	(%ebp,%esi), %edx
+
+	movd	%mm0, %esi
+
+	cmovc(	%edx, %edi)	   C n - q1*d if underflow from using q1+1
+	cmpl	%eax, %ecx
+	jae	L(inverse_top)
+
+
+L(inverse_loop_done):
+
+
+C -----------------------------------------------------------------------------
+
+L(inverse_two_left):
+	C eax	scratch
+	C ebx	scratch (nadj, q1)
+	C ecx	&src[-1]
+	C edx	scratch
+	C esi	n10
+	C edi	n2
+	C ebp	divisor
+	C
+	C mm0	scratch (src dword)
+	C mm7	rshift
+
+	cmpl	$0x80000000, %esi  C n1 as 0=c, 1=nc
+	movl	%edi, %eax         C n2
+
+	leal	(%ebp,%esi), %ebx
+	cmovc(	%esi, %ebx)	   C nadj = n10 + (-n1 & d), ignoring overflow
+	sbbl	$-1, %eax          C n2+n1
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	movd	4(%ecx), %mm0	   C src low limb
+
+	C
+
+	C
+
+	addl	%ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+	leal	1(%edi), %ebx      C n2+1
+	movl	%ebp, %eax	   C d
+
+	adcl	%edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+	sbbl	$0, %ebx
+
+	mull	%ebx		   C (q1+1)*d
+
+	psllq	$32, %mm0
+
+	psrlq	%mm7, %mm0
+
+	C
+
+	subl	%eax, %esi
+
+	C
+
+	sbbl	%edx, %edi	   C n - (q1+1)*d
+	movl	%esi, %edi	   C remainder -> n2
+	leal	(%ebp,%esi), %edx
+
+	movd	%mm0, %esi
+
+	cmovc(	%edx, %edi)	   C n - q1*d if underflow from using q1+1
+
+
+L(inverse_one_left):
+	C eax	scratch
+	C ebx	scratch (nadj, q1)
+	C ecx
+	C edx	scratch
+	C esi	n10
+	C edi	n2
+	C ebp	divisor
+	C
+	C mm0	src limb, shifted
+	C mm7	rshift
+
+	cmpl	$0x80000000, %esi  C n1 as 0=c, 1=nc
+	movl	%edi, %eax         C n2
+
+	leal	(%ebp,%esi), %ebx
+	cmovc(	%esi, %ebx)	   C nadj = n10 + (-n1 & d), ignoring overflow
+	sbbl	$-1, %eax          C n2+n1
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	movl	VAR_NORM, %ecx     C for final denorm
+
+	C
+
+	C
+
+	addl	%ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+	leal	1(%edi), %ebx      C n2+1
+	movl	%ebp, %eax	   C d
+
+	C
+
+	adcl	%edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+	sbbl	$0, %ebx
+
+	mull	%ebx		   C (q1+1)*d
+
+	movl	SAVE_EBX, %ebx
+
+	C
+
+	C
+
+	subl	%eax, %esi
+
+	movl	%esi, %eax	   C remainder
+	movl	SAVE_ESI, %esi
+
+	sbbl	%edx, %edi	   C n - (q1+1)*d
+	leal	(%ebp,%eax), %edx
+	movl	SAVE_EBP, %ebp
+
+	cmovc(	%edx, %eax)	   C n - q1*d if underflow from using q1+1
+	movl	SAVE_EDI, %edi
+
+	shrl	%cl, %eax	   C denorm remainder
+	addl	$STACK_SPACE, %esp
+	emms
+
+	ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+	C eax	(divisor)
+	C ebx	(q1+1 == 0)
+	C ecx	src pointer
+	C edx
+	C esi	n10
+	C edi	(n2)
+	C ebp	divisor
+
+	movl	PARAM_SRC, %edx
+	leal	(%ebp,%esi), %edi	C n-q*d remainder -> next n2
+	psrlq	%mm7, %mm0
+
+	movd	%mm0, %esi		C next n10
+
+	cmpl	%edx, %ecx
+	jae	L(inverse_top)
+	jmp	L(inverse_loop_done)
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/k7/mmx/popham.asm b/gmp/mpn/x86/k7/mmx/popham.asm
index 95965b74d4..5dc0a78c42 100644
--- a/gmp/mpn/x86/k7/mmx/popham.asm
+++ b/gmp/mpn/x86/k7/mmx/popham.asm
@@ -1,40 +1,29 @@
 dnl  AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming
 dnl  distance.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C			     popcount	     hamdist
 C P3 generic			6.5		7
-C P3 model 9  (Banias)          5.7		6.1
+C P3 model 9  (Banias)          ?		?
 C P3 model 13 (Dothan)		5.75		6
 C K7				5		6
 
diff --git a/gmp/mpn/x86/k7/mmx/rshift.asm b/gmp/mpn/x86/k7/mmx/rshift.asm
index 345d23a25e..3566ce85d7 100644
--- a/gmp/mpn/x86/k7/mmx/rshift.asm
+++ b/gmp/mpn/x86/k7/mmx/rshift.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_rshift -- mpn right shift.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/mod_1_1.asm b/gmp/mpn/x86/k7/mod_1_1.asm
deleted file mode 100644
index 1bbe6f92d7..0000000000
--- a/gmp/mpn/x86/k7/mod_1_1.asm
+++ /dev/null
@@ -1,221 +0,0 @@
-dnl  x86-32 mpn_mod_1_1p, requiring cmov.
-
-dnl  Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
-
-dnl  Copyright 2010, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9  (Banias)		 ?
-C P6 model 13 (Dothan)		 ?
-C P4 model 0  (Willamette)	 ?
-C P4 model 1  (?)		 ?
-C P4 model 2  (Northwood)	 ?
-C P4 model 3  (Prescott)	 ?
-C P4 model 4  (Nocona)		 ?
-C AMD K6			 ?
-C AMD K7			 7
-C AMD K8			 ?
-
-define(`B2mb', `%ebx')
-define(`r0', `%esi')
-define(`r2', `%ebp')
-define(`t0', `%edi')
-define(`ap', `%ecx')  C Also shift count
-
-C Stack frame
-C	pre	36(%esp)
-C	b	32(%esp)
-C	n	28(%esp)
-C	ap	24(%esp)
-C	return	20(%esp)
-C	%ebp	16(%esp)
-C	%edi	12(%esp)
-C	%esi	8(%esp)
-C	%ebx	4(%esp)
-C	B2mod	(%esp)
-
-define(`B2modb', `(%esp)')
-define(`n', `28(%esp)')
-define(`b', `32(%esp)')
-define(`pre', `36(%esp)')
-
-C mp_limb_t
-C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t pre[4])
-C
-C The pre array contains bi, cnt, B1modb, B2modb
-C Note: This implementation needs B1modb only when cnt > 0
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-PROLOGUE(mpn_mod_1_1p)
-	push	%ebp
-	push	%edi
-	push	%esi
-	push	%ebx
-	mov	32(%esp), %ebp		C pre[]
-
-	mov	12(%ebp), %eax		C B2modb
-	push	%eax			C Put it on stack
-
-	mov	n, %edx
-	mov	24(%esp), ap
-
-	lea	(ap, %edx, 4), ap
-	mov	-4(ap), %eax
-	cmp	$3, %edx
-	jnc	L(first)
-	mov	-8(ap), r0
-	jmp	L(reduce_two)
-
-L(first):
-	C First iteration, no r2
-	mull	B2modb
-	mov	-12(ap), r0
-	add	%eax, r0
-	mov	-8(ap), %eax
-	adc	%edx, %eax
-	sbb	r2, r2
-	subl	$3, n
-	lea	-16(ap), ap
-	jz	L(reduce_three)
-
-	mov	B2modb, B2mb
-	sub	b, B2mb
-	lea	(B2mb, r0), t0
-	jmp	L(mid)
-
-	ALIGN(16)
-L(top): C Loopmixed to 7 c/l on k7
-	add	%eax, r0
-	lea	(B2mb, r0), t0
-	mov	r2, %eax
-	adc	%edx, %eax
-	sbb	r2, r2
-L(mid):	mull	B2modb
-	and	B2modb, r2
-	add	r0, r2
-	decl	n
-	mov	(ap), r0
-	cmovc(	t0, r2)
-	lea	-4(ap), ap
-	jnz	L(top)
-
-	add	%eax, r0
-	mov	r2, %eax
-	adc	%edx, %eax
-	sbb	r2, r2
-
-L(reduce_three):
-	C Eliminate r2
-	and	b, r2
-	sub	r2, %eax
-
-L(reduce_two):
-	mov	pre, %ebp
-	movb	4(%ebp), %cl
-	test	%cl, %cl
-	jz	L(normalized)
-
-	C Unnormalized, use B1modb to reduce to size < B b
-	mull	8(%ebp)
-	xor	t0, t0
-	add	%eax, r0
-	adc	%edx, t0
-	mov	t0, %eax
-
-	C Left-shift to normalize
-	shld	%cl, r0, %eax C Always use shld?
-
-	shl	%cl, r0
-	jmp	L(udiv)
-
-L(normalized):
-	mov	%eax, t0
-	sub	b, t0
-	cmovnc(	t0, %eax)
-
-L(udiv):
-	lea	1(%eax), t0
-	mull	(%ebp)
-	mov	b, %ebx		C Needed in register for lea
-	add	r0, %eax
-	adc	t0, %edx
-	imul	%ebx, %edx
-	sub	%edx, r0
-	cmp	r0, %eax
-	lea	(%ebx, r0), %eax
-	cmovnc(	r0, %eax)
-	cmp	%ebx, %eax
-	jnc	L(fix)
-L(ok):	shr	%cl, %eax
-
-	add	$4, %esp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	pop	%ebp
-
-	ret
-L(fix):	sub	%ebx, %eax
-	jmp	L(ok)
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps)
-	push	%ebp
-	mov	12(%esp), %ebp
-	push	%esi
-	bsr	%ebp, %ecx
-	push	%ebx
-	xor	$31, %ecx
-	mov	16(%esp), %esi
-	sal	%cl, %ebp
-	mov	%ebp, %edx
-	not	%edx
-	mov	$-1, %eax
-	div	%ebp			C On K7, invert_limb would be a few cycles faster.
-	mov	%eax, (%esi)		C store bi
-	mov	%ecx, 4(%esi)		C store cnt
-	neg	%ebp
-	mov	$1, %edx
-	shld	%cl, %eax, %edx
-	imul	%ebp, %edx
-	shr	%cl, %edx
-	imul	%ebp, %eax
-	mov	%edx, 8(%esi)		C store B1modb
-	mov	%eax, 12(%esi)		C store B2modb
-	pop	%ebx
-	pop	%esi
-	pop	%ebp
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/k7/mod_1_4.asm b/gmp/mpn/x86/k7/mod_1_4.asm
deleted file mode 100644
index bb7597edd2..0000000000
--- a/gmp/mpn/x86/k7/mod_1_4.asm
+++ /dev/null
@@ -1,260 +0,0 @@
-dnl  x86-32 mpn_mod_1s_4p, requiring cmov.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9  (Banias)		 ?
-C P6 model 13 (Dothan)		 6
-C P4 model 0  (Willamette)	 ?
-C P4 model 1  (?)		 ?
-C P4 model 2  (Northwood)	15.5
-C P4 model 3  (Prescott)	 ?
-C P4 model 4  (Nocona)		 ?
-C AMD K6			 ?
-C AMD K7			 4.75
-C AMD K8			 ?
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p)
-	push	%ebp
-	push	%edi
-	push	%esi
-	push	%ebx
-	sub	$28, %esp
-	mov	60(%esp), %edi		C cps[]
-	mov	8(%edi), %eax
-	mov	12(%edi), %edx
-	mov	16(%edi), %ecx
-	mov	20(%edi), %esi
-	mov	24(%edi), %edi
-	mov	%eax, 4(%esp)
-	mov	%edx, 8(%esp)
-	mov	%ecx, 12(%esp)
-	mov	%esi, 16(%esp)
-	mov	%edi, 20(%esp)
-	mov	52(%esp), %eax		C n
-	xor	%edi, %edi
-	mov	48(%esp), %esi		C up
-	lea	-12(%esi,%eax,4), %esi
-	and	$3, %eax
-	je	L(b0)
-	cmp	$2, %eax
-	jc	L(b1)
-	je	L(b2)
-
-L(b3):	mov	4(%esi), %eax
-	mull	4(%esp)
-	mov	(%esi), %ebp
-	add	%eax, %ebp
-	adc	%edx, %edi
-	mov	8(%esi), %eax
-	mull	8(%esp)
-	lea	-12(%esi), %esi
-	jmp	L(m0)
-
-L(b0):	mov	(%esi), %eax
-	mull	4(%esp)
-	mov	-4(%esi), %ebp
-	add	%eax, %ebp
-	adc	%edx, %edi
-	mov	4(%esi), %eax
-	mull	8(%esp)
-	add	%eax, %ebp
-	adc	%edx, %edi
-	mov	8(%esi), %eax
-	mull	12(%esp)
-	lea	-16(%esi), %esi
-	jmp	L(m0)
-
-L(b1):	mov	8(%esi), %ebp
-	lea	-4(%esi), %esi
-	jmp	L(m1)
-
-L(b2):	mov	8(%esi), %edi
-	mov	4(%esi), %ebp
-	lea	-8(%esi), %esi
-	jmp	L(m1)
-
-	ALIGN(16)
-L(top):	mov	(%esi), %eax
-	mull	4(%esp)
-	mov	-4(%esi), %ebx
-	xor	%ecx, %ecx
-	add	%eax, %ebx
-	adc	%edx, %ecx
-	mov	4(%esi), %eax
-	mull	8(%esp)
-	add	%eax, %ebx
-	adc	%edx, %ecx
-	mov	8(%esi), %eax
-	mull	12(%esp)
-	add	%eax, %ebx
-	adc	%edx, %ecx
-	lea	-16(%esi), %esi
-	mov	16(%esp), %eax
-	mul	%ebp
-	add	%eax, %ebx
-	adc	%edx, %ecx
-	mov	20(%esp), %eax
-	mul	%edi
-	mov	%ebx, %ebp
-	mov	%ecx, %edi
-L(m0):	add	%eax, %ebp
-	adc	%edx, %edi
-L(m1):	subl	$4, 52(%esp)
-	ja	L(top)
-
-L(end):	mov	4(%esp), %eax
-	mul	%edi
-	mov	60(%esp), %edi
-	add	%eax, %ebp
-	adc	$0, %edx
-	mov	4(%edi), %ecx
-	mov	%edx, %esi
-	mov	%ebp, %eax
-	sal	%cl, %esi
-	mov	%ecx, %ebx
-	neg	%ecx
-	shr	%cl, %eax
-	or	%esi, %eax
-	lea	1(%eax), %esi
-	mull	(%edi)
-	mov	%ebx, %ecx
-	mov	%eax, %ebx
-	mov	%ebp, %eax
-	mov	56(%esp), %ebp
-	sal	%cl, %eax
-	add	%eax, %ebx
-	adc	%esi, %edx
-	imul	%ebp, %edx
-	sub	%edx, %eax
-	lea	(%eax,%ebp), %edx
-	cmp	%eax, %ebx
-	cmovc(	%edx, %eax)
-	mov	%eax, %edx
-	sub	%ebp, %eax
-	cmovc(	%edx, %eax)
-	add	$28, %esp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	pop	%ebp
-	shr	%cl, %eax
-	ret
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p_cps)
-C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
-	push	%ebp
-	push	%edi
-	push	%esi
-	push	%ebx
-	mov	20(%esp), %ebp		C FIXME: avoid bp for 0-idx
-	mov	24(%esp), %ebx
-	bsr	%ebx, %ecx
-	xor	$31, %ecx
-	sal	%cl, %ebx		C b << cnt
-	mov	%ebx, %edx
-	not	%edx
-	mov	$-1, %eax
-	div	%ebx
-	xor	%edi, %edi
-	sub	%ebx, %edi
-	mov	$1, %esi
-	mov	%eax, (%ebp)		C store bi
-	mov	%ecx, 4(%ebp)		C store cnt
-	shld	%cl, %eax, %esi
-	imul	%edi, %esi
-	mov	%eax, %edi
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 8(%ebp)		C store B1modb
-
-	not	%edx
-	imul	%ebx, %edx
-	lea	(%edx,%ebx), %esi
-	cmp	%edx, %eax
-	cmovnc(	%edx, %esi)
-	mov	%edi, %eax
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 12(%ebp)		C store B2modb
-
-	not	%edx
-	imul	%ebx, %edx
-	lea	(%edx,%ebx), %esi
-	cmp	%edx, %eax
-	cmovnc(	%edx, %esi)
-	mov	%edi, %eax
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 16(%ebp)		C store B3modb
-
-	not	%edx
-	imul	%ebx, %edx
-	lea	(%edx,%ebx), %esi
-	cmp	%edx, %eax
-	cmovnc(	%edx, %esi)
-	mov	%edi, %eax
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 20(%ebp)		C store B4modb
-
-	not	%edx
-	imul	%ebx, %edx
-	add	%edx, %ebx
-	cmp	%edx, %eax
-	cmovnc(	%edx, %ebx)
-
-	shr	%cl, %ebx
-	mov	%ebx, 24(%ebp)		C store B5modb
-
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	pop	%ebp
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/k7/mod_34lsub1.asm b/gmp/mpn/x86/k7/mod_34lsub1.asm
index ee3ad04099..f00e84dc42 100644
--- a/gmp/mpn/x86/k7/mod_34lsub1.asm
+++ b/gmp/mpn/x86/k7/mod_34lsub1.asm
@@ -1,32 +1,22 @@
 dnl  AMD K7 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
 
-dnl  Copyright 2000-2002, 2004, 2005, 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/mode1o.asm b/gmp/mpn/x86/k7/mode1o.asm
index 6472ec5949..ef858049a6 100644
--- a/gmp/mpn/x86/k7/mode1o.asm
+++ b/gmp/mpn/x86/k7/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_modexact_1_odd -- exact division style remainder.
 
-dnl  Copyright 2000-2002, 2004, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -122,7 +111,7 @@ ifdef(`PIC',`
 
 	subl	%eax, %edi		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	movl	%esi, %eax
 	imull	%edi, %eax
 	cmpl	$1, %eax')
diff --git a/gmp/mpn/x86/k7/mul_1.asm b/gmp/mpn/x86/k7/mul_1.asm
index 755cd2ed50..016262d594 100644
--- a/gmp/mpn/x86/k7/mul_1.asm
+++ b/gmp/mpn/x86/k7/mul_1.asm
@@ -1,38 +1,28 @@
 dnl  AMD K7 mpn_mul_1.
 
-dnl  Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P5
+C                           cycles/limb
+C P5:
 C P6 model 0-8,10-12)
 C P6 model 9  (Banias)
 C P6 model 13 (Dothan)
@@ -41,9 +31,9 @@ C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C AMD K6
-C AMD K7			 3.25
-C AMD K8
+C K6:
+C K7:                            3.25
+C K8:
 
 C TODO
 C  * Improve feed-in and wind-down code.  We beat the old code for all n != 1,
diff --git a/gmp/mpn/x86/k7/mul_basecase.asm b/gmp/mpn/x86/k7/mul_basecase.asm
index 4dfb500885..7f4c0002f7 100644
--- a/gmp/mpn/x86/k7/mul_basecase.asm
+++ b/gmp/mpn/x86/k7/mul_basecase.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_mul_basecase -- multiply two mpn numbers.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/k7/sqr_basecase.asm b/gmp/mpn/x86/k7/sqr_basecase.asm
index 7b6a97e0df..408a13dc9b 100644
--- a/gmp/mpn/x86/k7/sqr_basecase.asm
+++ b/gmp/mpn/x86/k7/sqr_basecase.asm
@@ -1,32 +1,21 @@
 dnl  AMD K7 mpn_sqr_basecase -- square an mpn number.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -39,18 +28,18 @@ C     roughly the Karatsuba recursing range).
 dnl  These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
 dnl  some comments.
 
-deflit(SQR_TOOM2_THRESHOLD_MAX, 66)
+deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66)
 
-ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
-`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
+`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
 
-m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
-deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
 
 
 C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C
-C With a SQR_TOOM2_THRESHOLD around 50 this code is about 1500 bytes,
+C With a SQR_KARATSUBA_THRESHOLD around 50 this code is about 1500 bytes,
 C which is quite a bit, but is considered good value since squares big
 C enough to use most of the code will be spending quite a few cycles in it.
 
diff --git a/gmp/mpn/x86/k7/sublsh1_n.asm b/gmp/mpn/x86/k7/sublsh1_n.asm
deleted file mode 100644
index 523b01218d..0000000000
--- a/gmp/mpn/x86/k7/sublsh1_n.asm
+++ /dev/null
@@ -1,173 +0,0 @@
-dnl  AMD K7 mpn_sublsh1_n_ip1 -- rp[] = rp[] - (up[] << 1)
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This is an attempt at a sublsh1_n for x86-32, not relying on sse2 insns.  The
-C innerloop is 2*3-way unrolled, which is best we can do with the available
-C registers.  It seems tricky to use the same structure for rsblsh1_n, since we
-C cannot feed carry between operations there.
-
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 6.75
-C AMD K6
-C AMD K7
-C AMD K8
-
-C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32
-C processors.  It uses 2*4-way unrolling, for good reasons.
-C
-C Breaking carry recurrency might be a good idea.  We would then need separate
-C registers for the shift carry and add/subtract carry, which in turn would
-C force is to 2*2-way unrolling.
-
-defframe(PARAM_SIZE,	12)
-defframe(PARAM_SRC,	 8)
-defframe(PARAM_DST,	 4)
-
-dnl  re-use parameter space
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-ASM_START()
-	TEXT
-	ALIGN(8)
-PROLOGUE(mpn_sublsh1_n_ip1)
-deflit(`FRAME',0)
-
-define(`rp',  `%edi')
-define(`up',  `%esi')
-
-	mov	PARAM_SIZE, %eax	C size
-	push	up			FRAME_pushl()
-	push	rp			FRAME_pushl()
-	xor	%edx, %edx
-	mov	PARAM_SRC, up
-	mov	PARAM_DST, rp
-	mov	%ebx, SAVE_EBX
-	mov	%eax, %ebx
-	shr	$3, %eax
-
-	not	%eax			C count = -(size\8)-i
-	and	$7, %ebx		C size % 8
-	jz	L(exact)
-
-L(oop):
-ifdef(`CPU_P6',`
-	shr	%edx ')			C restore 2nd saved carry bit
-	mov	(up), %ecx
-	adc	%ecx, %ecx
-	rcr	%edx			C restore 1st saved carry bit
-	lea	4(up), up
-	sbb	%ecx, (rp)
-	lea	4(rp), rp
-	adc	%edx, %edx		C save a carry bit in edx
-ifdef(`CPU_P6',`
-	adc	%edx, %edx ')		C save another carry bit in edx
-	dec	%ebx
-	jnz	L(oop)
-L(exact):
-	inc	%eax
-	jz	L(end)
-	mov	%eax, VAR_COUNT
-	mov	%ebp, SAVE_EBP
-
-	ALIGN(16)
-L(top):
-ifdef(`CPU_P6',`
-	shr	%edx ')			C restore 2nd saved carry bit
-	mov	(up), %eax
-	adc	%eax, %eax
-	mov	4(up), %ebx
-	adc	%ebx, %ebx
-	mov	8(up), %ecx
-	adc	%ecx, %ecx
-	mov	12(up), %ebp
-	adc	%ebp, %ebp
-
-	rcr	%edx			C restore 1st saved carry bit
-
-	sbb	%eax, (rp)
-	sbb	%ebx, 4(rp)
-	sbb	%ecx, 8(rp)
-	sbb	%ebp, 12(rp)
-
-	mov	16(up), %eax
-	adc	%eax, %eax
-	mov	20(up), %ebx
-	adc	%ebx, %ebx
-	mov	24(up), %ecx
-	adc	%ecx, %ecx
-	mov	28(up), %ebp
-	adc	%ebp, %ebp
-
-	lea	32(up), up
-	adc	%edx, %edx		C save a carry bit in edx
-
-	sbb	%eax, 16(rp)
-	sbb	%ebx, 20(rp)
-	sbb	%ecx, 24(rp)
-	sbb	%ebp, 28(rp)
-
-ifdef(`CPU_P6',`
-	adc	%edx, %edx ')		C save another carry bit in edx
-	incl	VAR_COUNT
-	lea	32(rp), rp
-	jne	L(top)
-
-	mov	SAVE_EBP, %ebp
-L(end):
-	mov	SAVE_EBX, %ebx
-
-ifdef(`CPU_P6',`
-	xor	%eax, %eax
-	shr	$1, %edx
-	adc	%edx, %eax
-',`
-	adc	$0, %edx
-	mov	%edx, %eax
-')
-	pop	rp			FRAME_popl()
-	pop	up			FRAME_popl()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/k8/gmp-mparam.h b/gmp/mpn/x86/k8/gmp-mparam.h
deleted file mode 100644
index 8d95fef80b..0000000000
--- a/gmp/mpn/x86/k8/gmp-mparam.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/* x86/k8 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2500 MHz K8 Brisbane */
-/* FFT tuning limit = 10000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        12
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     16
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 1
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           40
-
-#define MUL_TOOM22_THRESHOLD                24
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               130
-#define MUL_TOOM6H_THRESHOLD               303
-#define MUL_TOOM8H_THRESHOLD               430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      93
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      92
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     122
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 46
-#define SQR_TOOM3_THRESHOLD                 78
-#define SQR_TOOM4_THRESHOLD                202
-#define SQR_TOOM6_THRESHOLD                286
-#define SQR_TOOM8_THRESHOLD                422
-
-#define MULMID_TOOM42_THRESHOLD             56
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               18
-
-#define MUL_FFT_MODF_THRESHOLD             848  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    848, 5}, {     27, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     32, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {    103,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    167,10}, {     95, 9}, {    199,10}, {    111,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287,10}, {    159,11}, {     95,10}, {    207,12}, \
-    {     63,11}, {    127,10}, {    271, 9}, {    543,10}, \
-    {    287,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    335, 9}, {    671,11}, {    191,10}, {    383, 9}, \
-    {    799,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    671, 9}, {   1343,12}, {    191,11}, \
-    {    383,10}, {    799, 9}, {   1599,11}, {    415,10}, \
-    {    863, 9}, {   1727,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1119,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,10}, \
-    {   1471, 9}, {   2943,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    863,10}, {   1727,12}, {    447,11}, \
-    {    991,13}, {    255,12}, {    511,11}, {   1023,10}, \
-    {   2111,11}, {   1119,12}, {    575,11}, {   1215,10}, \
-    {   2431,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1471,10}, {   2943,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1727,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1023,11}, \
-    {   2047,12}, {   1087,11}, {   2239,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1727,13}, {    895,12}, {   1983,14}, \
-    {    511,13}, {   1023,12}, {   2239,13}, {   1151,12}, \
-    {   4096,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 144
-#define MUL_FFT_THRESHOLD                 7552
-
-#define SQR_FFT_MODF_THRESHOLD             618  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    618, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     28, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     51, 9}, {     31, 8}, {     67, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    135,10}, {     79, 9}, \
-    {    167,10}, {     95, 9}, {    191,10}, {    111,11}, \
-    {     63,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543, 8}, {   1087,10}, {    287,11}, \
-    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
-    {    671, 8}, {   1343,10}, {    351,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
-    {    415,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,11}, {    319,10}, {    671, 9}, {   1343,11}, \
-    {    351,12}, {    191,11}, {    383,10}, {    799, 9}, \
-    {   1599,11}, {    415,10}, {    863, 9}, {   1727,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,10}, {   1215,12}, {    319,11}, {    671,10}, \
-    {   1343,11}, {    735,10}, {   1471,12}, {    383,11}, \
-    {    799,10}, {   1599,11}, {    863,10}, {   1727,12}, \
-    {    447,11}, {    959,10}, {   1919,11}, {    991,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,10}, {   2431,12}, {    639,11}, {   1343,12}, \
-    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1727,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1087,11}, \
-    {   2239,12}, {   1215,11}, {   2431,13}, {    639,12}, \
-    {   1471,11}, {   2943,13}, {    767,12}, {   1727,11}, \
-    {   3455,13}, {    895,12}, {   1983,14}, {    511,13}, \
-    {   1023,12}, {   2239,13}, {   1151,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 147
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             8
-#define MULLO_DC_THRESHOLD                  31
-#define MULLO_MUL_N_THRESHOLD            14281
-
-#define DC_DIV_QR_THRESHOLD                 91
-#define DC_DIVAPPR_Q_THRESHOLD             280
-#define DC_BDIV_QR_THRESHOLD                87
-#define DC_BDIV_Q_THRESHOLD                222
-
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD               268
-#define INV_APPR_THRESHOLD                 270
-
-#define BINV_NEWTON_THRESHOLD              260
-#define REDC_1_TO_REDC_N_THRESHOLD          79
-
-#define MU_DIV_QR_THRESHOLD               1718
-#define MU_DIVAPPR_Q_THRESHOLD            1528
-#define MUPI_DIV_QR_THRESHOLD               97
-#define MU_BDIV_QR_THRESHOLD              1470
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  1,22,114,416,1464
-
-#define MATRIX22_STRASSEN_THRESHOLD         16
-#define HGCD_THRESHOLD                     149
-#define HGCD_APPR_THRESHOLD                204
-#define HGCD_REDUCE_THRESHOLD             4455
-#define GCD_DC_THRESHOLD                   599
-#define GCDEXT_DC_THRESHOLD                403
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               270
-#define SET_STR_PRECOMPUTE_THRESHOLD      1367
-
-#define FAC_DSC_THRESHOLD                  348
-#define FAC_ODD_THRESHOLD                   24
diff --git a/gmp/mpn/x86/lshift.asm b/gmp/mpn/x86/lshift.asm
index 6ee6153cc2..5598599f8b 100644
--- a/gmp/mpn/x86/lshift.asm
+++ b/gmp/mpn/x86/lshift.asm
@@ -1,43 +1,33 @@
 dnl  x86 mpn_lshift -- mpn left shift.
 
-dnl  Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C     cycles/limb
-C P54	 7.5
-C P55	 7.0
-C P6	 2.5
-C K6	 4.5
-C K7	 5.0
-C P4	14.5
+C P54:   7.5
+C P55:   7.0
+C P6:    2.5
+C K6:    4.5
+C K7:    5.0
+C P4:   14.5
 
 
 C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/mmx/sec_tabselect.asm b/gmp/mpn/x86/mmx/sec_tabselect.asm
deleted file mode 100644
index aae158abf7..0000000000
--- a/gmp/mpn/x86/mmx/sec_tabselect.asm
+++ /dev/null
@@ -1,163 +0,0 @@
-dnl  X86 MMX mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			     cycles/limb     cycles/limb
-C			      ali,evn n	     unal,evn n
-C P5
-C P6 model 0-8,10-12
-C P6 model 9  (Banias)
-C P6 model 13 (Dothan)		 1.33		 1.87
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)	 2.1		 2.63
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)		 1.7		 2.57
-C Intel Atom			 1.85		 2.7
-C AMD K6
-C AMD K7			 1.33		 1.33
-C AMD K8
-C AMD K10
-
-define(`rp',     `%edi')
-define(`tp',     `%esi')
-define(`n',      `%edx')
-define(`nents',  `%ecx')
-define(`which',  `')
-
-define(`i',      `%ebp')
-define(`j',      `%ebx')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
-	push	%ebx
-	push	%esi
-	push	%edi
-	push	%ebp
-
-	mov	20(%esp), rp
-	mov	24(%esp), tp
-	mov	28(%esp), n
-	mov	32(%esp), nents
-
-	movd	36(%esp), %mm6
-	punpckldq %mm6, %mm6		C 2 copies of `which'
-
-	mov	$1, %ebx
-	movd	%ebx, %mm7
-	punpckldq %mm7, %mm7		C 2 copies of 1
-
-	mov	n, j
-	add	$-4, j
-	js	L(outer_end)
-
-L(outer_top):
-	mov	nents, i
-	mov	tp, %eax
-	pxor	%mm1, %mm1
-	pxor	%mm4, %mm4
-	pxor	%mm5, %mm5
-	ALIGN(16)
-L(top):	movq	%mm6, %mm0
-	pcmpeqd	%mm1, %mm0
-	paddd	%mm7, %mm1
-	movq	(tp), %mm2
-	movq	8(tp), %mm3
-	pand	%mm0, %mm2
-	pand	%mm0, %mm3
-	por	%mm2, %mm4
-	por	%mm3, %mm5
-	lea	(tp,n,4), tp
-	add	$-1, i
-	jne	L(top)
-
-	movq	%mm4, (rp)
-	movq	%mm5, 8(rp)
-
-	lea	16(%eax), tp
-	lea	16(rp), rp
-	add	$-4, j
-	jns	L(outer_top)
-L(outer_end):
-
-	test	$2, %dl
-	jz	L(b0x)
-
-L(b1x):	mov	nents, i
-	mov	tp, %eax
-	pxor	%mm1, %mm1
-	pxor	%mm4, %mm4
-	ALIGN(16)
-L(tp2):	movq	%mm6, %mm0
-	pcmpeqd	%mm1, %mm0
-	paddd	%mm7, %mm1
-	movq	(tp), %mm2
-	pand	%mm0, %mm2
-	por	%mm2, %mm4
-	lea	(tp,n,4), tp
-	add	$-1, i
-	jne	L(tp2)
-
-	movq	%mm4, (rp)
-
-	lea	8(%eax), tp
-	lea	8(rp), rp
-
-L(b0x):	test	$1, %dl
-	jz	L(b00)
-
-L(b01):	mov	nents, i
-	pxor	%mm1, %mm1
-	pxor	%mm4, %mm4
-	ALIGN(16)
-L(tp1):	movq	%mm6, %mm0
-	pcmpeqd	%mm1, %mm0
-	paddd	%mm7, %mm1
-	movd	(tp), %mm2
-	pand	%mm0, %mm2
-	por	%mm2, %mm4
-	lea	(tp,n,4), tp
-	add	$-1, i
-	jne	L(tp1)
-
-	movd	%mm4, (rp)
-
-L(b00):	pop	%ebp
-	pop	%edi
-	pop	%esi
-	pop	%ebx
-	emms
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/mod_1.asm b/gmp/mpn/x86/mod_1.asm
new file mode 100644
index 0000000000..0fa3ce0def
--- /dev/null
+++ b/gmp/mpn/x86/mod_1.asm
@@ -0,0 +1,163 @@
+dnl  x86 mpn_mod_1 -- mpn by limb remainder.
+
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C      cycles/limb
+C 486     42 approx, maybe
+C P5      44
+C P6      39
+C K6      20
+C K7      41
+C P4      58
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                       mp_limb_t carry);
+C
+C Essentially this code is the same as the division based part of
+C mpn/generic/mod_1.c, but has the advantage that we get the desired divl
+C instruction even when gcc is not being used (where longlong.h only has the
+C rather slow generic C udiv_qrnnd().
+C
+C A test is done to see if the high limb is less than the divisor, and if so
+C one less div is done.  A div is between 20 and 40 cycles on the various
+C x86s, so assuming high<divisor about half the time, then this test saves
+C half that amount.  The branch misprediction penalty on each chip is less
+C than half a div.
+C
+C
+C Notes for K6:
+C
+C Back-to-back div instructions take 20 cycles, the same as the loop here,
+C so it seems there's nothing to gain by rearranging.  Pairing the mov and
+C loop instructions was found to gain nothing.  Normally we use a loop
+C instruction rather than decl/jnz, but it gains nothing here.
+C
+C A multiply-by-inverse is used in mpn/x86/k6/pre_mod_1.asm, but it saves
+C only 2 c/l so currently we haven't bothered with the same for mpn_mod_1.
+C If an inverse takes about 40 cycles for normalized or perhaps 60 for
+C unnormalized (due to bsfl being slow on k6) then the threshold would be at
+C least 20 or 30 limbs.
+C
+
+defframe(PARAM_CARRY,  16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,   8)
+defframe(PARAM_SRC,    4)
+
+	TEXT
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	pushl	%ebx		FRAME_pushl()
+
+	movl	PARAM_SRC, %ebx
+	pushl	%esi		FRAME_pushl()
+
+	orl	%ecx, %ecx
+	jz	L(done_zero)
+
+	movl	PARAM_DIVISOR, %esi
+	movl	-4(%ebx,%ecx,4), %eax	C src high limb
+
+	cmpl	%esi, %eax
+
+	sbbl	%edx, %edx		C -1 if high<divisor
+
+	addl	%edx, %ecx		C skip one division if high<divisor
+	jz	L(done_eax)
+
+	andl	%eax, %edx		C carry if high<divisor
+
+
+L(top):
+	C eax	scratch (quotient)
+	C ebx	src
+	C ecx	counter
+	C edx	carry (remainder)
+	C esi	divisor
+	C edi
+	C ebp
+
+	movl	-4(%ebx,%ecx,4), %eax
+
+	divl	%esi
+
+	decl	%ecx
+	jnz	L(top)
+
+
+	movl	%edx, %eax
+L(done_eax):
+	popl	%esi
+
+	popl	%ebx
+
+	ret
+
+EPILOGUE()
+
+
+	C This code located after mpn_mod_1, so the jump to L(top) here is
+	C back and hence will be predicted as taken.  (size==0 is considered
+	C unlikely.)
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	pushl	%ebx		FRAME_pushl()
+
+	movl	PARAM_SRC, %ebx
+	pushl	%esi		FRAME_pushl()
+
+	movl	PARAM_DIVISOR, %esi
+	orl	%ecx, %ecx
+
+	movl	PARAM_CARRY, %edx
+	jnz	L(top)
+
+	popl	%esi
+	movl	%edx, %eax
+
+	popl	%ebx
+
+	ret
+
+
+	C This code is for mpn_mod_1, but is positioned here to save some
+	C space in the alignment padding.
+	C
+L(done_zero):
+	popl	%esi
+	xorl	%eax, %eax
+
+	popl	%ebx
+
+	ret
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/mod_34lsub1.asm b/gmp/mpn/x86/mod_34lsub1.asm
index e09e702c6f..68b4a73dbc 100644
--- a/gmp/mpn/x86/mod_34lsub1.asm
+++ b/gmp/mpn/x86/mod_34lsub1.asm
@@ -1,42 +1,31 @@
 dnl  Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1.
 
-dnl  Copyright 2000-2002, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C      cycles/limb
-C P5	  3.0
-C P6	  3.66
-C K6	  3.0
-C K7	  1.3
-C P4	  9
+C P5:     3.0
+C P6:     3.66
+C K6:     3.0
+C K7:     1.3
+C P4:     9
 
 
 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
diff --git a/gmp/mpn/x86/mul_1.asm b/gmp/mpn/x86/mul_1.asm
index 421de62225..1d715ece7e 100644
--- a/gmp/mpn/x86/mul_1.asm
+++ b/gmp/mpn/x86/mul_1.asm
@@ -1,50 +1,40 @@
 dnl  x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
 dnl  with a limb and store the result in a second limb vector.
 
-dnl  Copyright 1992, 1994, 1997-2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free
+dnl  Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P5				12.5
-C P6 model 0-8,10-12		 5.5
+C                           cycles/limb
+C P5:                           12.5
+C P6 model 0-8,10-12)            5.5
 C P6 model 9  (Banias)
-C P6 model 13 (Dothan)		 5.25
-C P4 model 0  (Willamette)	19.0
-C P4 model 1  (?)		19.0
-C P4 model 2  (Northwood)	19.0
+C P6 model 13 (Dothan)           5.25
+C P4 model 0  (Willamette)      19.0
+C P4 model 1  (?)               19.0
+C P4 model 2  (Northwood)       19.0
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C AMD K6			10.5
-C AMD K7			 4.5
-C AMD K8
+C K6:                           10.5
+C K7:                            4.5
+C K8:
 
 
 C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/mul_basecase.asm b/gmp/mpn/x86/mul_basecase.asm
index 8339732a80..7918ea07f3 100644
--- a/gmp/mpn/x86/mul_basecase.asm
+++ b/gmp/mpn/x86/mul_basecase.asm
@@ -1,43 +1,33 @@
 dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
 dnl  in a third limb vector.
 
-dnl  Copyright 1996-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C     cycles/crossproduct
-C P5	  15
-C P6	   7.5
-C K6	  12.5
-C K7	   5.5
-C P4	  24
+C P5:     15
+C P6:      7.5
+C K6:     12.5
+C K7:      5.5
+C P4:     24
 
 
 C void mpn_mul_basecase (mp_ptr wp,
diff --git a/gmp/mpn/x86/nano/gmp-mparam.h b/gmp/mpn/x86/nano/gmp-mparam.h
deleted file mode 100644
index cd8ac4e1d6..0000000000
--- a/gmp/mpn/x86/nano/gmp-mparam.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/* x86/nano gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
-
-#define MOD_1_1P_METHOD                      1
-#define MOD_1_NORM_THRESHOLD                 3
-#define MOD_1_UNNORM_THRESHOLD               3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD         10
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0  /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        53
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
-#define USE_PREINV_DIVREM_1                  1
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           32
-
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD               132
-#define MUL_TOOM44_THRESHOLD               195
-#define MUL_TOOM6H_THRESHOLD               270
-#define MUL_TOOM8H_THRESHOLD               478
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     130
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     135
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                194
-#define SQR_TOOM4_THRESHOLD                502
-#define SQR_TOOM6_THRESHOLD                746
-#define SQR_TOOM8_THRESHOLD               1005
-
-#define MULMID_TOOM42_THRESHOLD             40
-
-#define MULMOD_BNM1_THRESHOLD               14
-#define SQRMOD_BNM1_THRESHOLD               19
-
-#define POWM_SEC_TABLE  4,23,258,828,2246
-
-#define MUL_FFT_MODF_THRESHOLD             308  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    308, 5}, {     13, 6}, {      7, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
-    {     13, 7}, {      7, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     11, 6}, {     24, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 8}, {     11, 7}, {     25, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 9}, {     15, 8}, {     31, 7}, \
-    {     63, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     31, 8}, {     63, 9}, {     47,10}, \
-    {     31, 9}, {     71,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
-    {     79, 9}, {    159,10}, {     95, 9}, {    191,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    543, 9}, \
-    {    287, 8}, {    575, 7}, {   1215,10}, {    159,11}, \
-    {     95,10}, {    191,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    543, 8}, {   1087,10}, {    287, 9}, \
-    {    607, 8}, {   1215,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    351, 9}, {    703, 8}, {   1407, 9}, \
-    {    735, 8}, {   1471,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    447, 9}, {    895,10}, {    479, 9}, {    959, 8}, \
-    {   1919,12}, {   4096,13}, {   8192,14}, {  16384,15}, \
-    {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 89
-#define MUL_FFT_THRESHOLD                 1856
-
-#define SQR_FFT_MODF_THRESHOLD             396  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    396, 5}, {     13, 6}, {      7, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     15, 6}, {     31, 7}, {     19, 6}, \
-    {     39, 7}, {     21, 8}, {     11, 7}, {     23, 6}, \
-    {     47, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 9}, {     23, 8}, {     47,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
-    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127,10}, \
-    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    543,10}, {    143, 9}, \
-    {    287, 8}, {    607, 7}, {   1215, 6}, {   2431,10}, \
-    {    159, 8}, {    639,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    607, 8}, {   1215,11}, \
-    {    159,10}, {    319, 9}, {    671,10}, {    351, 9}, \
-    {    703, 8}, {   1407, 9}, {    735, 8}, {   1471, 7}, \
-    {   2943,11}, {    191,10}, {    383, 9}, {    799,10}, \
-    {    415, 9}, {    895,10}, {    479,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 87
-#define SQR_FFT_THRESHOLD                 2368
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  51
-#define MULLO_MUL_N_THRESHOLD             3369
-
-#define DC_DIV_QR_THRESHOLD                 56
-#define DC_DIVAPPR_Q_THRESHOLD             183
-#define DC_BDIV_QR_THRESHOLD                55
-#define DC_BDIV_Q_THRESHOLD                118
-
-#define INV_MULMOD_BNM1_THRESHOLD           30
-#define INV_NEWTON_THRESHOLD               266
-#define INV_APPR_THRESHOLD                 218
-
-#define BINV_NEWTON_THRESHOLD              268
-#define REDC_1_TO_REDC_N_THRESHOLD          56
-
-#define MU_DIV_QR_THRESHOLD               1308
-#define MU_DIVAPPR_Q_THRESHOLD            1528
-#define MUPI_DIV_QR_THRESHOLD              124
-#define MU_BDIV_QR_THRESHOLD               855
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                     104
-#define HGCD_APPR_THRESHOLD                139
-#define HGCD_REDUCE_THRESHOLD             2121
-#define GCD_DC_THRESHOLD                   456
-#define GCDEXT_DC_THRESHOLD                321
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        25
-#define SET_STR_DC_THRESHOLD               542
-#define SET_STR_PRECOMPUTE_THRESHOLD       840
diff --git a/gmp/mpn/x86/p6/README b/gmp/mpn/x86/p6/README
index f19d47b94f..1ded4e7177 100644
--- a/gmp/mpn/x86/p6/README
+++ b/gmp/mpn/x86/p6/README
@@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -63,7 +52,7 @@ Some of these might be able to be improved.
 
 	mpn_mul_basecase          8.2 cycles/crossproduct (approx)
 	mpn_sqr_basecase          4.0 cycles/crossproduct (approx)
-				  or 7.75 cycles/triangleproduct (approx)
+	                          or 7.75 cycles/triangleproduct (approx)
 
 Pentium II and III have MMX and get the following improvements.
 
diff --git a/gmp/mpn/x86/p6/aors_n.asm b/gmp/mpn/x86/p6/aors_n.asm
index df51c2e6f7..f4652ec2cb 100644
--- a/gmp/mpn/x86/p6/aors_n.asm
+++ b/gmp/mpn/x86/p6/aors_n.asm
@@ -1,43 +1,32 @@
 dnl  Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
 
 dnl  Copyright 2006 Free Software Foundation, Inc.
-
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C TODO:
-C  * Avoid indexed addressing, it makes us stall on the two-ported register
+C  * Avoid indexed adressing, it makes us stall on the two-ported register
 C    file.
 
-C			    cycles/limb
-C P6 model 0-8,10-12		3.17
-C P6 model 9   (Banias)		2.15
-C P6 model 13  (Dothan)		2.25
+C                           cycles/limb
+C P6 model 0-8,10-12)           3.17
+C P6 model 9   (Banias)         ?
+C P6 model 13  (Dothan)         2.25
 
 
 define(`rp',	`%edi')
diff --git a/gmp/mpn/x86/p6/aorsmul_1.asm b/gmp/mpn/x86/p6/aorsmul_1.asm
index bc8c49c62e..746bf05f12 100644
--- a/gmp/mpn/x86/p6/aorsmul_1.asm
+++ b/gmp/mpn/x86/p6/aorsmul_1.asm
@@ -1,49 +1,38 @@
 dnl  Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
 
-dnl  Copyright 1999-2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P5
-C P6 model 0-8,10-12		 6.44
-C P6 model 9  (Banias)		 6.15
-C P6 model 13 (Dothan)		 6.11
+C                           cycles/limb
+C P5:
+C P6 model 0-8,10-12)            6.44
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)           6.11
 C P4 model 0  (Willamette)
 C P4 model 1  (?)
 C P4 model 2  (Northwood)
 C P4 model 3  (Prescott)
 C P4 model 4  (Nocona)
-C AMD K6
-C AMD K7
-C AMD K8
+C K6:
+C K7:
+C K8:
 
 
 dnl  P6 UNROLL_COUNT cycles/limb
@@ -181,7 +170,7 @@ C registers when doing the mul for the initial two carry limbs.
 C
 C The add/adc for the initial carry in %ebx is necessary only for the
 C mpn_add/submul_1c entry points.  Duplicating the startup code to
-C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
 C idea.
 
 dnl  overlapping with parameters already fetched
diff --git a/gmp/mpn/x86/p6/bdiv_q_1.asm b/gmp/mpn/x86/p6/bdiv_q_1.asm
deleted file mode 100644
index 2cc179c238..0000000000
--- a/gmp/mpn/x86/p6/bdiv_q_1.asm
+++ /dev/null
@@ -1,286 +0,0 @@
-dnl  Intel P6 mpn_modexact_1_odd -- exact division style remainder.
-
-dnl  Rearranged from mpn/x86/p6/dive_1.asm by Marco Bodrato.
-
-dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C       odd  even  divisor
-C P6:  10.0  12.0  cycles/limb
-
-C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-
-C The odd case is basically the same as mpn_modexact_1_odd, just with an
-C extra store, and it runs at the same 10 cycles which is the dependent
-C chain.
-C
-C The shifts for the even case aren't on the dependent chain so in principle
-C it could run the same too, but nothing running at 10 has been found.
-C Perhaps there's too many uops (an extra 4 over the odd case).
-
-defframe(PARAM_SHIFT,  24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE,   12)
-defframe(PARAM_SRC,     8)
-defframe(PARAM_DST,     4)
-
-defframe(SAVE_EBX,     -4)
-defframe(SAVE_ESI,     -8)
-defframe(SAVE_EDI,    -12)
-defframe(SAVE_EBP,    -16)
-deflit(STACK_SPACE, 16)
-
-dnl  re-use parameter space
-define(VAR_INVERSE,`PARAM_SRC')
-
-	TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C		    mp_limb_t inverse, int shift)
-
-	ALIGN(16)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
-	subl	$STACK_SPACE, %esp	FRAME_subl_esp(STACK_SPACE)
-
-	movl	%esi, SAVE_ESI
-	movl	PARAM_SRC, %esi
-
-	movl	%ebx, SAVE_EBX
-	movl	PARAM_SIZE, %ebx
-
-	movl	%ebp, SAVE_EBP
-	movl	PARAM_INVERSE, %ebp
-
-	movl	PARAM_SHIFT, %ecx	C trailing twos
-
-L(common):
-	movl	%edi, SAVE_EDI
-	movl	PARAM_DST, %edi
-
-	leal	(%esi,%ebx,4), %esi	C src end
-
-	leal	(%edi,%ebx,4), %edi	C dst end
-	negl	%ebx			C -size
-
-	movl	(%esi,%ebx,4), %eax	C src[0]
-
-	orl	%ecx, %ecx
-	jz	L(odd_entry)
-
-	movl	%edi, PARAM_DST
-	movl	%ebp, VAR_INVERSE
-
-L(even):
-	C eax	src[0]
-	C ebx	counter, limbs, negative
-	C ecx	shift
-	C edx
-	C esi
-	C edi
-	C ebp
-
-	xorl	%ebp, %ebp		C initial carry bit
-	xorl	%edx, %edx		C initial carry limb (for size==1)
-
-	incl	%ebx
-	jz	L(even_one)
-
-	movl	(%esi,%ebx,4), %edi	C src[1]
-
-	shrdl(	%cl, %edi, %eax)
-
-	jmp	L(even_entry)
-
-
-L(even_top):
-	C eax	scratch
-	C ebx	counter, limbs, negative
-	C ecx	shift
-	C edx	scratch
-	C esi	&src[size]
-	C edi	&dst[size] and scratch
-	C ebp	carry bit
-
-	movl	(%esi,%ebx,4), %edi
-
-	mull	PARAM_DIVISOR
-
-	movl	-4(%esi,%ebx,4), %eax
-	shrdl(	%cl, %edi, %eax)
-
-	subl	%ebp, %eax
-
-	sbbl	%ebp, %ebp
-	subl	%edx, %eax
-
-	sbbl	$0, %ebp
-
-L(even_entry):
-	imull	VAR_INVERSE, %eax
-
-	movl	PARAM_DST, %edi
-	negl	%ebp
-
-	movl	%eax, -4(%edi,%ebx,4)
-	incl	%ebx
-	jnz	L(even_top)
-
-	mull	PARAM_DIVISOR
-
-	movl	-4(%esi), %eax
-
-L(even_one):
-	shrl	%cl, %eax
-	movl	SAVE_ESI, %esi
-
-	subl	%ebp, %eax
-	movl	SAVE_EBP, %ebp
-
-	subl	%edx, %eax
-	movl	SAVE_EBX, %ebx
-
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, -4(%edi)
-	movl	SAVE_EDI, %edi
-	addl	$STACK_SPACE, %esp
-
-	ret
-
-C The dependent chain here is
-C
-C	subl	%edx, %eax       1
-C	imull	%ebp, %eax       4
-C	mull	PARAM_DIVISOR    5
-C			       ----
-C	total			10
-C
-C and this is the measured speed.  No special scheduling is necessary, out
-C of order execution hides the load latency.
-
-L(odd_top):
-	C eax	scratch (src limb)
-	C ebx	counter, limbs, negative
-	C ecx	carry bit
-	C edx	carry limb, high of last product
-	C esi	&src[size]
-	C edi	&dst[size]
-	C ebp	inverse
-
-	mull	PARAM_DIVISOR
-
-	movl	(%esi,%ebx,4), %eax
-	subl	%ecx, %eax
-
-	sbbl	%ecx, %ecx
-	subl	%edx, %eax
-
-	sbbl	$0, %ecx
-
-L(odd_entry):
-	imull	%ebp, %eax
-
-	movl	%eax, (%edi,%ebx,4)
-	negl	%ecx
-
-	incl	%ebx
-	jnz	L(odd_top)
-
-
-	movl	SAVE_ESI, %esi
-
-	movl	SAVE_EDI, %edi
-
-	movl	SAVE_EBP, %ebp
-
-	movl	SAVE_EBX, %ebx
-	addl	$STACK_SPACE, %esp
-
-	ret
-
-EPILOGUE()
-
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                           mp_limb_t divisor);
-C
-
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_DIVISOR, %eax
-	subl	$STACK_SPACE, %esp	FRAME_subl_esp(STACK_SPACE)
-
-	movl	%esi, SAVE_ESI
-	movl	PARAM_SRC, %esi
-
-	movl	%ebx, SAVE_EBX
-	movl	PARAM_SIZE, %ebx
-
-	bsfl	%eax, %ecx		C trailing twos
-
-	movl	%ebp, SAVE_EBP
-
-	shrl	%cl, %eax		C d without twos
-
-	movl	%eax, %edx
-	shrl	%eax			C d/2 without twos
-
-	movl	%edx, PARAM_DIVISOR
-	andl	$127, %eax
-
-ifdef(`PIC',`
-	LEA(	binvert_limb_table, %ebp)
-	movzbl	(%eax,%ebp), %ebp		C inv 8 bits
-',`
-	movzbl	binvert_limb_table(%eax), %ebp	C inv 8 bits
-')
-
-	leal	(%ebp,%ebp), %eax	C 2*inv
-
-	imull	%ebp, %ebp		C inv*inv
-	imull	%edx, %ebp	C inv*inv*d
-
-	subl	%ebp, %eax		C inv = 2*inv - inv*inv*d
-	leal	(%eax,%eax), %ebp	C 2*inv
-
-	imull	%eax, %eax		C inv*inv
-	imull	%edx, %eax	C inv*inv*d
-
-	subl	%eax, %ebp		C inv = 2*inv - inv*inv*d
-
-	jmp	L(common)
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/p6/copyd.asm b/gmp/mpn/x86/p6/copyd.asm
index 1be7636835..2946f51e7a 100644
--- a/gmp/mpn/x86/p6/copyd.asm
+++ b/gmp/mpn/x86/p6/copyd.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6 mpn_copyd -- copy limb vector backwards.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/dive_1.asm b/gmp/mpn/x86/p6/dive_1.asm
index aa7ba880c9..e8efc28eac 100644
--- a/gmp/mpn/x86/p6/dive_1.asm
+++ b/gmp/mpn/x86/p6/dive_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6 mpn_modexact_1_odd -- exact division style remainder.
 
 dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -117,7 +106,7 @@ ifdef(`PIC',`
 
 	subl	%eax, %ebp		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	movl	PARAM_DIVISOR, %eax
 	imull	%ebp, %eax
 	cmpl	$1, %eax')
@@ -138,7 +127,7 @@ C	subl	%edx, %eax       1
 C	imull	%ebp, %eax       4
 C	mull	PARAM_DIVISOR    5
 C			       ----
-C	total			10
+C       total		        10
 C
 C and this is the measured speed.  No special scheduling is necessary, out
 C of order execution hides the load latency.
diff --git a/gmp/mpn/x86/p6/gcd_1.asm b/gmp/mpn/x86/p6/gcd_1.asm
deleted file mode 100644
index f6518f6e19..0000000000
--- a/gmp/mpn/x86/p6/gcd_1.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl  x86 mpn_gcd_1 optimised for processors with fast BSF.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked by Torbjorn Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/bit (approx)
-C AMD K7	 7.80
-C AMD K8,K9	 7.79
-C AMD K10	 4.08
-C AMD bd1	 ?
-C AMD bobcat	 7.82
-C Intel P4-2	14.9
-C Intel P4-3/4	14.0
-C Intel P6/13	 5.09
-C Intel core2	 4.22
-C Intel NHM	 5.00
-C Intel SBR	 5.00
-C Intel atom	17.1
-C VIA nano	?
-C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 6)
-
-
-define(`up',    `%edi')
-define(`n',     `%esi')
-define(`v0',    `%edx')
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_gcd_1)
-	push	%edi
-	push	%esi
-
-	mov	12(%esp), up
-	mov	16(%esp), n
-	mov	20(%esp), v0
-
-	mov	(up), %eax	C U low limb
-	or	v0, %eax
-	bsf	%eax, %eax	C min(ctz(u0),ctz(v0))
-
-	bsf	v0, %ecx
-	shr	%cl, v0
-
-	push	%eax		C preserve common twos over call
-	push	v0		C preserve v0 argument over call
-
-	cmp	$1, n
-	jnz	L(reduce_nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	mov	(up), %ecx
-	mov	%ecx, %eax
-	shr	$BMOD_THRES_LOG2, %ecx
-	cmp	%ecx, v0
-	ja	L(reduced)
-	jmp	L(bmod)
-
-L(reduce_nby1):
-	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
-	jl	L(bmod)
-ifdef(`PIC_WITH_EBX',`
-	push	%ebx
-	call	L(movl_eip_to_ebx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-')
-	push	v0		C param 3
-	push	n		C param 2
-	push	up		C param 1
-	CALL(	mpn_mod_1)
-	jmp	L(called)
-
-L(bmod):
-ifdef(`PIC_WITH_EBX',`dnl
-	push	%ebx
-	call	L(movl_eip_to_ebx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-')
-	push	v0		C param 3
-	push	n		C param 2
-	push	up		C param 1
-	CALL(	mpn_modexact_1_odd)
-
-L(called):
-	add	$12, %esp	C deallocate params
-ifdef(`PIC_WITH_EBX',`dnl
-	pop	%ebx
-')
-L(reduced):
-	pop	%edx
-
-	bsf	%eax, %ecx
-C	test	%eax, %eax	C FIXME: does this lower latency?
-	jnz	L(mid)
-	jmp	L(end)
-
-	ALIGN(16)		C               K10   BD    C2    NHM   SBR
-L(top):	cmovc(	%esi, %eax)	C if x-y < 0    0,3   0,3   0,6   0,5   0,5
-	cmovc(	%edi, %edx)	C use x,y-x     0,3   0,3   2,8   1,7   1,7
-L(mid):	shr	%cl, %eax	C               1,7   1,6   2,8   2,8   2,8
-	mov	%edx, %esi	C               1     1     4     3     3
-	sub	%eax, %esi	C               2     2     5     4     4
-	bsf	%esi, %ecx	C               3     3     6     5     5
-	mov	%eax, %edi	C               2     2     3     3     4
-	sub	%edx, %eax	C               2     2     4     3     4
-	jnz	L(top)		C
-
-L(end):	pop	%ecx
-	mov	%edx, %eax
-	shl	%cl, %eax
-
-	pop	%esi
-	pop	%edi
-	ret
-
-ifdef(`PIC_WITH_EBX',`dnl
-L(movl_eip_to_ebx):
-	mov	(%esp), %ebx
-	ret
-')
-EPILOGUE()
diff --git a/gmp/mpn/x86/p6/gmp-mparam.h b/gmp/mpn/x86/p6/gmp-mparam.h
index 96c96fd558..a85c500275 100644
--- a/gmp/mpn/x86/p6/gmp-mparam.h
+++ b/gmp/mpn/x86/p6/gmp-mparam.h
@@ -1,194 +1,70 @@
 /* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2003, 2008-2010, 2012 Free Software
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
 
-or both in parallel, as here.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
-   value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard limit in
-   mpn/x86/p6/sqr_basecase.asm.  */
-
-
-/* 1867 MHz P6 model 13 */
-
-#define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           21
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                74
-#define MUL_TOOM44_THRESHOLD               181
-#define MUL_TOOM6H_THRESHOLD               252
-#define MUL_TOOM8H_THRESHOLD               363
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                101
-#define SQR_TOOM4_THRESHOLD                154
-#define SQR_TOOM6_THRESHOLD                222
-#define SQR_TOOM8_THRESHOLD                527
-
-#define MULMID_TOOM42_THRESHOLD             58
-
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define POWM_SEC_TABLE  4,23,258,768,2388
-
-#define MUL_FFT_MODF_THRESHOLD             565  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    565, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
-    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 5}, \
-    {    383, 4}, {    991, 5}, {    511, 6}, {    267, 7}, \
-    {    157, 8}, {     91, 9}, {     47, 8}, {    111, 9}, \
-    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
-    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
-    {    159,10}, {    335, 9}, {    671,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
-    {    415,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
-    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,10}, {   1471,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    863,12}, {    447,11}, {    959,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1471,13}, {    383,12}, {    831,11}, {   1727,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1727,13}, {    895,12}, {   1919,14}, {    511,13}, \
-    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
-    {   1407,12}, {   2815,14}, {    767,13}, {   1663,12}, \
-    {   3455,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 132
-#define MUL_FFT_THRESHOLD                 6784
-
-#define SQR_FFT_MODF_THRESHOLD             472  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    472, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     63, 4}, {   1023, 8}, {     67, 9}, \
-    {     39, 5}, {    639, 4}, {   1471, 6}, {    383, 7}, \
-    {    209, 8}, {    119, 9}, {     63, 7}, {    255, 8}, \
-    {    139, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
-    {    167,10}, {     95,11}, {     63,10}, {    143, 9}, \
-    {    287,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399, 9}, {    799,10}, {    415, 9}, \
-    {    831,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,11}, {    319,10}, {    671, 9}, {   1343,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
-    {    255,11}, {    543,10}, {   1087,11}, {    607,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,12}, \
-    {    383,11}, {    799,10}, {   1599,11}, {    863,12}, \
-    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
-    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
-    {    767,11}, {   1599,12}, {    831,11}, {   1727,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,13}, {    767,12}, {   1727,13}, \
-    {    895,12}, {   1919,14}, {    511,13}, {   1023,12}, \
-    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,14}, \
-    {    767,13}, {   1663,12}, {   3455,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 146
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 20
-#define DC_DIVAPPR_Q_THRESHOLD              56
-#define DC_BDIV_QR_THRESHOLD                60
-#define DC_BDIV_Q_THRESHOLD                134
-
-#define INV_MULMOD_BNM1_THRESHOLD           38
-#define INV_NEWTON_THRESHOLD                66
-#define INV_APPR_THRESHOLD                  63
-
-#define BINV_NEWTON_THRESHOLD              250
-#define REDC_1_TO_REDC_N_THRESHOLD          63
-
-#define MU_DIV_QR_THRESHOLD               1164
-#define MU_DIVAPPR_Q_THRESHOLD             979
-#define MUPI_DIV_QR_THRESHOLD               38
-#define MU_BDIV_QR_THRESHOLD              1442
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                      64
-#define HGCD_APPR_THRESHOLD                105
-#define HGCD_REDUCE_THRESHOLD             3524
-#define GCD_DC_THRESHOLD                   386
-#define GCDEXT_DC_THRESHOLD                309
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        26
-#define SET_STR_DC_THRESHOLD               587
-#define SET_STR_PRECOMPUTE_THRESHOLD      1104
+
+/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be
+   smaller than the value in mpn/x86/p6/mmx/gmp-mparam.h.  The former is
+   used as a hard limit in mpn/x86/p6/sqr_basecase.asm, and that file will
+   be run by the p6/mmx cpus (pentium2, pentium3).  */
+
+
+/* 200MHz Pentium Pro */
+
+/* Generated by tuneup.c, 2003-02-12, gcc 2.95 */
+
+#define MUL_KARATSUBA_THRESHOLD          23
+#define MUL_TOOM3_THRESHOLD             140
+
+#define SQR_BASECASE_THRESHOLD            0  /* always */
+#define SQR_KARATSUBA_THRESHOLD          52
+#define SQR_TOOM3_THRESHOLD             189
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                116
+#define POWM_THRESHOLD                  131
+
+#define GCD_ACCEL_THRESHOLD               3
+#define JACOBI_BASE_METHOD                1
+
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVREM_2_THRESHOLD                0  /* always */
+#define DIVEXACT_1_THRESHOLD              0  /* always */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always */
+
+#define GET_STR_DC_THRESHOLD             18
+#define GET_STR_PRECOMPUTE_THRESHOLD     23
+#define SET_STR_THRESHOLD              6093
+
+#define MUL_FFT_TABLE  { 464, 928, 1920, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          360
+#define MUL_FFT_THRESHOLD              2816
+
+#define SQR_FFT_TABLE  { 528, 1184, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          440
+#define SQR_FFT_THRESHOLD              2816
+
+#define MUL_FFT_TABLE2 {{1,4}, {305,5}, {321,4}, {337,5}, {353,4}, {369,5}, {801,6}, {833,5}, {865,6}, {897,5}, {929,6}, {961,5}, {993,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3457,8}, {3841,7}, {3969,8}, {4097,7}, {4225,8}, {4353,7}, {4481,8}, {5889,7}, {6017,8}, {6401,7}, {6529,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {9985,9}, {10241,8}, {11009,9}, {11777,8}, {12289,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {19969,10}, {23553,9}, {24065,8}, {24321,9}, {26113,10}, {27649,11}, {28673,10}, {31745,9}, {34305,10}, {34817,9}, {35329,10}, {39937,9}, {40449,10}, {48129,11}, {55297,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {131073,11}, {133121,10}, {134145,11}, {137217,10}, {138241,11}, {161793,10}, {162817,11}, {194561,12}, {258049,11}, {260097,10}, {261121,9}, {261633,10}, {266241,11}, {268289,10}, {277505,11}, {292865,10}, {293889,9}, {294401,10}, {310273,9}, {310785,11}, {325633,10}, {326657,12}, {389121,13}, {516097,12}, {520193,11}, {522241,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {622593,12}, {651265,11}, {653313,10}, {654337,11}, {655361,10}, {657409,11}, {663553,10}, {664577,11}, {686081,10}, {687105,11}, {718849,10}, {719873,11}, {720897,10}, {722945,11}, {737281,10}, {740353,11}, {745473,10}, {749569,11}, {751617,10}, {752641,9}, {753153,11}, {753665,12}, {770049,11}, {774145,12}, {782337,11}, {786433,10}, {787457,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {854017,10}, {855041,11}, {862209,10}, {863233,11}, {866305,10}, {867329,11}, {876545,10}, {877569,11}, {882689,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1515521,13}, {1523713,12}, {1527809,13}, {1540097,12}, {1544193,13}, {1548289,12}, {1568769,11}, {1636353,10}, {1637377,12}, {1699841,11}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {289,4}, {305,5}, {673,6}, {705,5}, {737,6}, {769,5}, {801,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3713,8}, {3841,7}, {4225,8}, {4865,7}, {4993,9}, {5121,8}, {6657,9}, {7681,8}, {8961,9}, {11777,8}, {12033,10}, {12289,8}, {12545,9}, {13825,10}, {14337,9}, {14849,10}, {15361,9}, {19969,10}, {23553,9}, {24577,11}, {30721,10}, {31745,9}, {32257,10}, {37889,9}, {38401,10}, {39937,9}, {40449,10}, {48129,11}, {63489,10}, {80897,11}, {96257,12}, {126977,11}, {129025,10}, {130049,11}, {194561,12}, {208897,11}, {210945,12}, {258049,11}, {260097,9}, {269313,10}, {277505,9}, {278017,11}, {278529,10}, {280577,11}, {282625,10}, {283649,11}, {284673,10}, {285697,11}, {286721,10}, {289793,11}, {290817,10}, {293889,9}, {294401,10}, {310273,9}, {310785,8}, {311041,10}, {311297,11}, {315393,10}, {321537,12}, {323585,11}, {325633,10}, {326657,12}, {331777,10}, {332801,12}, {389121,10}, {392193,9}, {392705,10}, {413697,9}, {414209,10}, {418817,9}, {419841,10}, {424961,9}, {425473,10}, {441345,9}, {441857,10}, {449537,9}, {450561,10}, {452609,9}, {453121,10}, {454657,9}, {455169,10}, {490497,12}, {491521,13}, {516097,12}, {520193,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {624641,12}, {626689,11}, {653313,10}, {654337,11}, {686081,10}, {687105,11}, {718849,10}, {720897,11}, {722945,10}, {724993,11}, {729089,10}, {734209,11}, {737281,10}, {744449,11}, {745473,10}, {747521,11}, {749569,10}, {752641,11}, {784385,10}, {785409,11}, {808961,10}, {809985,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {851969,10}, {852993,11}, {858113,10}, {859137,11}, {860161,10}, {861185,11}, {882689,10}, {883713,11}, {980993,13}, {1040385,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1568769,11}, {1636353,10}, {1637377,12}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/x86/p6/lshsub_n.asm b/gmp/mpn/x86/p6/lshsub_n.asm
index 7ada213644..a3086bdbc2 100644
--- a/gmp/mpn/x86/p6/lshsub_n.asm
+++ b/gmp/mpn/x86/p6/lshsub_n.asm
@@ -1,38 +1,27 @@
 dnl  Intel P6 mpn_lshsub_n -- mpn papillion support.
 
 dnl  Copyright 2006 Free Software Foundation, Inc.
-
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C P6/13: 3.35 cycles/limb	(separate mpn_sub_n + mpn_lshift needs 4.12)
 
-C (1) The loop is not scheduled in any way, and scheduling attempts have not
+C (1) The loop is is not scheduled in any way, and scheduling attempts have not
 C     improved speed on P6/13.  Presumably, the K7 will want scheduling, if it
 C     at all wants to use MMX.
 C (2) We could save a register by not alternatingly using eax and edx in the
diff --git a/gmp/mpn/x86/p6/mmx/divrem_1.asm b/gmp/mpn/x86/p6/mmx/divrem_1.asm
index 5300616c14..8891f3a843 100644
--- a/gmp/mpn/x86/p6/mmx/divrem_1.asm
+++ b/gmp/mpn/x86/p6/mmx/divrem_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-II mpn_divrem_1 -- mpn by limb division.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/mmx/gmp-mparam.h b/gmp/mpn/x86/p6/mmx/gmp-mparam.h
index 35c3aadfc1..47602f562e 100644
--- a/gmp/mpn/x86/p6/mmx/gmp-mparam.h
+++ b/gmp/mpn/x86/p6/mmx/gmp-mparam.h
@@ -1,198 +1,79 @@
 /* Intel P6/mmx gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2005, 2009, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
 
-or both in parallel, as here.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
-   value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard limit in
-   mpn/x86/p6/sqr_basecase.asm.  */
-
-
-/* 800 MHz P6 model 8 */
-
-#define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        10
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     17
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           49
-
-#define MUL_TOOM22_THRESHOLD                22
-#define MUL_TOOM33_THRESHOLD                73
-#define MUL_TOOM44_THRESHOLD               193
-#define MUL_TOOM6H_THRESHOLD               254
-#define MUL_TOOM8H_THRESHOLD               381
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 81
-#define SQR_TOOM4_THRESHOLD                142
-#define SQR_TOOM6_THRESHOLD                258
-#define SQR_TOOM8_THRESHOLD                399
-
-#define MULMOD_BNM1_THRESHOLD               15
-#define SQRMOD_BNM1_THRESHOLD               18
-
-#define MUL_FFT_MODF_THRESHOLD             476  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    476, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     27, 8}, {     15, 7}, {     35, 8}, \
-    {     19, 7}, {     41, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
-    {     47, 8}, {     95, 9}, {     55,10}, {     31, 9}, \
-    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    167,10}, {     95, 9}, {    199,10}, \
-    {    111,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575,10}, \
-    {    159,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543, 8}, {   1087,10}, \
-    {    287, 9}, {    575,11}, {    159,10}, {    319, 9}, \
-    {    639,10}, {    351, 9}, {    703,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    415, 9}, {    831,11}, \
-    {    223,10}, {    447,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,11}, {    319,10}, {    671,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,10}, {    831,11}, {    447,13}, {    127,12}, \
-    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    703,10}, {   1407,11}, {    735,12}, {    383,11}, \
-    {    831,12}, {    447,11}, {    959,10}, {   1919,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,10}, {   2431,12}, {    639,11}, {   1343,12}, \
-    {    703,11}, {   1471,13}, {    383,12}, {    767,11}, \
-    {   1535,12}, {    831,11}, {   1727,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1215,11}, \
-    {   2431,13}, {    639,12}, {   1471,11}, {   2943,13}, \
-    {    767,12}, {   1727,13}, {    895,12}, {   1919,11}, \
-    {   3839,14}, {    511,13}, {   1023,12}, {   2111,13}, \
-    {   1151,12}, {   2431,13}, {   1279,12}, {   2559,13}, \
-    {   1407,12}, {   2943,14}, {    767,13}, {   1663,12}, \
-    {   3327,13}, {   1919,12}, {   3839,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD                 7040
-
-#define SQR_FFT_MODF_THRESHOLD             376  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    376, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     24, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     27, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     27, 9}, {     15, 8}, {     39, 9}, {     23, 8}, \
-    {     51,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
-    {    255, 9}, {    135,10}, {     79, 9}, {    167,10}, \
-    {     95, 9}, {    191, 8}, {    383,10}, {    111,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    511, 9}, \
-    {    271,10}, {    143, 9}, {    287, 8}, {    575, 9}, \
-    {    303, 8}, {    607,10}, {    159, 9}, {    319,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,10}, {    287, 9}, {    575,10}, \
-    {    303,11}, {    159,10}, {    319, 9}, {    639,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    415, 9}, {    831,11}, {    223,10}, \
-    {    479,12}, {    127,11}, {    255,10}, {    543, 9}, \
-    {   1087,11}, {    287,10}, {    607, 9}, {   1215,11}, \
-    {    319,10}, {    671,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    703,10}, \
-    {   1407,11}, {    735,12}, {    383,11}, {    831,12}, \
-    {    447,11}, {    959,10}, {   1919,13}, {    255,12}, \
-    {    511,11}, {   1087,12}, {    575,11}, {   1215,10}, \
-    {   2431,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    831,11}, {   1727,12}, \
-    {    959,11}, {   1919,14}, {    255,13}, {    511,12}, \
-    {   1215,11}, {   2431,13}, {    639,12}, {   1471,11}, \
-    {   2943,13}, {    767,12}, {   1727,13}, {    895,12}, \
-    {   1919,11}, {   3839,14}, {    511,13}, {   1023,12}, \
-    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,12}, \
-    {   2943,14}, {    767,13}, {   1535,12}, {   3071,13}, \
-    {   1663,12}, {   3455,13}, {   1919,12}, {   3839,15}, \
-    {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 161
-#define SQR_FFT_THRESHOLD                 3712
-
-#define MULLO_BASECASE_THRESHOLD             8
-#define MULLO_DC_THRESHOLD                  60
-#define MULLO_MUL_N_THRESHOLD            13765
-
-#define DC_DIV_QR_THRESHOLD                 83
-#define DC_DIVAPPR_Q_THRESHOLD             246
-#define DC_BDIV_QR_THRESHOLD                76
-#define DC_BDIV_Q_THRESHOLD                175
-
-#define INV_MULMOD_BNM1_THRESHOLD           42
-#define INV_NEWTON_THRESHOLD               268
-#define INV_APPR_THRESHOLD                 250
-
-#define BINV_NEWTON_THRESHOLD              276
-#define REDC_1_TO_REDC_N_THRESHOLD          74
-
-#define MU_DIV_QR_THRESHOLD               1442
-#define MU_DIVAPPR_Q_THRESHOLD            1442
-#define MUPI_DIV_QR_THRESHOLD              132
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define MATRIX22_STRASSEN_THRESHOLD         18
-#define HGCD_THRESHOLD                     121
-#define GCD_DC_THRESHOLD                   478
-#define GCDEXT_DC_THRESHOLD                361
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        26
-#define SET_STR_DC_THRESHOLD               272
-#define SET_STR_PRECOMPUTE_THRESHOLD      1074
+
+/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be more
+   than the value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard
+   limit in mpn/x86/p6/sqr_basecase.asm.  */
+
+
+/* 1867 MHz P6 model 13 */
+
+/* Generated by tuneup.c, 2009-03-02, gcc 4.3 */
+
+#define MUL_KARATSUBA_THRESHOLD          20
+#define MUL_TOOM3_THRESHOLD              74
+#define MUL_TOOM44_THRESHOLD            166
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          30
+#define SQR_TOOM3_THRESHOLD             101
+#define SQR_TOOM4_THRESHOLD             154
+
+#define MULLOW_BASECASE_THRESHOLD         7
+#define MULLOW_DC_THRESHOLD              39
+#define MULLOW_MUL_N_THRESHOLD          230
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 21
+#define POWM_THRESHOLD                  154
+
+#define MATRIX22_STRASSEN_THRESHOLD      23
+#define HGCD_THRESHOLD                   72
+#define GCD_DC_THRESHOLD                321
+#define GCDEXT_DC_THRESHOLD             416
+#define JACOBI_BASE_METHOD                1
+
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             15
+#define GET_STR_PRECOMPUTE_THRESHOLD     24
+#define SET_STR_DC_THRESHOLD            587
+#define SET_STR_PRECOMPUTE_THRESHOLD   1083
+
+#define MUL_FFT_TABLE  { 400, 928, 1664, 4608, 10240, 57344, 163840, 393216, 0 }
+#define MUL_FFT_MODF_THRESHOLD          496
+#define MUL_FFT_THRESHOLD              7168
+
+#define SQR_FFT_TABLE  { 432, 928, 1664, 3584, 10240, 40960, 98304, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD          448
+#define SQR_FFT_THRESHOLD              3840
+
+/* These tables need updating */
+#define MUL_FFT_TABLE2 {{1,4}, {305,5}, {321,4}, {337,5}, {353,4}, {369,5}, {801,6}, {833,5}, {865,6}, {897,5}, {929,6}, {961,5}, {993,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3457,8}, {3841,7}, {3969,8}, {4097,7}, {4225,8}, {4353,7}, {4481,8}, {5889,7}, {6017,8}, {6401,7}, {6529,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {9985,9}, {10241,8}, {11009,9}, {11777,8}, {12289,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {19969,10}, {23553,9}, {24065,8}, {24321,9}, {26113,10}, {27649,11}, {28673,10}, {31745,9}, {34305,10}, {34817,9}, {35329,10}, {39937,9}, {40449,10}, {48129,11}, {55297,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {131073,11}, {133121,10}, {134145,11}, {137217,10}, {138241,11}, {161793,10}, {162817,11}, {194561,12}, {258049,11}, {260097,10}, {261121,9}, {261633,10}, {266241,11}, {268289,10}, {277505,11}, {292865,10}, {293889,9}, {294401,10}, {310273,9}, {310785,11}, {325633,10}, {326657,12}, {389121,13}, {516097,12}, {520193,11}, {522241,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {622593,12}, {651265,11}, {653313,10}, {654337,11}, {655361,10}, {657409,11}, {663553,10}, {664577,11}, {686081,10}, {687105,11}, {718849,10}, {719873,11}, {720897,10}, {722945,11}, {737281,10}, {740353,11}, {745473,10}, {749569,11}, {751617,10}, {752641,9}, {753153,11}, {753665,12}, {770049,11}, {774145,12}, {782337,11}, {786433,10}, {787457,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {854017,10}, {855041,11}, {862209,10}, {863233,11}, {866305,10}, {867329,11}, {876545,10}, {877569,11}, {882689,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1515521,13}, {1523713,12}, {1527809,13}, {1540097,12}, {1544193,13}, {1548289,12}, {1568769,11}, {1636353,10}, {1637377,12}, {1699841,11}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {289,4}, {305,5}, {673,6}, {705,5}, {737,6}, {769,5}, {801,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3713,8}, {3841,7}, {4225,8}, {4865,7}, {4993,9}, {5121,8}, {6657,9}, {7681,8}, {8961,9}, {11777,8}, {12033,10}, {12289,8}, {12545,9}, {13825,10}, {14337,9}, {14849,10}, {15361,9}, {19969,10}, {23553,9}, {24577,11}, {30721,10}, {31745,9}, {32257,10}, {37889,9}, {38401,10}, {39937,9}, {40449,10}, {48129,11}, {63489,10}, {80897,11}, {96257,12}, {126977,11}, {129025,10}, {130049,11}, {194561,12}, {208897,11}, {210945,12}, {258049,11}, {260097,9}, {269313,10}, {277505,9}, {278017,11}, {278529,10}, {280577,11}, {282625,10}, {283649,11}, {284673,10}, {285697,11}, {286721,10}, {289793,11}, {290817,10}, {293889,9}, {294401,10}, {310273,9}, {310785,8}, {311041,10}, {311297,11}, {315393,10}, {321537,12}, {323585,11}, {325633,10}, {326657,12}, {331777,10}, {332801,12}, {389121,10}, {392193,9}, {392705,10}, {413697,9}, {414209,10}, {418817,9}, {419841,10}, {424961,9}, {425473,10}, {441345,9}, {441857,10}, {449537,9}, {450561,10}, {452609,9}, {453121,10}, {454657,9}, {455169,10}, {490497,12}, {491521,13}, {516097,12}, {520193,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {624641,12}, {626689,11}, {653313,10}, {654337,11}, {686081,10}, {687105,11}, {718849,10}, {720897,11}, {722945,10}, {724993,11}, {729089,10}, {734209,11}, {737281,10}, {744449,11}, {745473,10}, {747521,11}, {749569,10}, {752641,11}, {784385,10}, {785409,11}, {808961,10}, {809985,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {851969,10}, {852993,11}, {858113,10}, {859137,11}, {860161,10}, {861185,11}, {882689,10}, {883713,11}, {980993,13}, {1040385,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1568769,11}, {1636353,10}, {1637377,12}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/x86/p6/mmx/lshift.asm b/gmp/mpn/x86/p6/mmx/lshift.asm
index febd1c0e6c..e325b67d64 100644
--- a/gmp/mpn/x86/p6/mmx/lshift.asm
+++ b/gmp/mpn/x86/p6/mmx/lshift.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-II mpn_lshift -- mpn left shift.
 
 dnl  Copyright 2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  The P55 code runs well on P-II/III, but could stand some minor tweaks
diff --git a/gmp/mpn/x86/p6/mmx/popham.asm b/gmp/mpn/x86/p6/mmx/popham.asm
index fd340e4b45..421daa5308 100644
--- a/gmp/mpn/x86/p6/mmx/popham.asm
+++ b/gmp/mpn/x86/p6/mmx/popham.asm
@@ -2,32 +2,21 @@ dnl  Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and
 dnl  hamming distance.
 
 dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/mmx/rshift.asm b/gmp/mpn/x86/p6/mmx/rshift.asm
index 77aa1909fa..b1543cdf52 100644
--- a/gmp/mpn/x86/p6/mmx/rshift.asm
+++ b/gmp/mpn/x86/p6/mmx/rshift.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-II mpn_rshift -- mpn left shift.
 
 dnl  Copyright 2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  The P55 code runs well on P-II/III, but could stand some minor tweaks
diff --git a/gmp/mpn/x86/p6/mod_1.asm b/gmp/mpn/x86/p6/mod_1.asm
new file mode 100644
index 0000000000..b6eacf7e82
--- /dev/null
+++ b/gmp/mpn/x86/p6/mod_1.asm
@@ -0,0 +1,472 @@
+dnl  Intel P6 mpn_mod_1 -- mpn by limb remainder.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: 21.5 cycles/limb
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                       mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                             mp_limb_t inverse);
+C
+C The code here is in two parts, a simple divl loop and a mul-by-inverse.
+C The divl is used by mod_1 and mod_1c for small sizes, until the savings in
+C the mul-by-inverse can overcome the time to calculate an inverse.
+C preinv_mod_1 goes straight to the mul-by-inverse.
+C
+C The mul-by-inverse normalizes the divisor (or for preinv_mod_1 it's
+C already normalized).  The calculation done is r=a%(d*2^n) followed by a
+C final (r*2^n)%(d*2^n), where a is the dividend, d the divisor, and n is
+C the number of leading zero bits on d.  This means there's no bit shifts in
+C the main loop, at the cost of an extra divide step at the end.
+C
+C The simple divl for mod_1 is able to skip one divide step if high<divisor.
+C For mod_1c the carry parameter is the high of the first divide step, and
+C no attempt is make to skip that step since carry==0 will be very rare.
+C
+C The mul-by-inverse always skips one divide step, but then needs an extra
+C step at the end, unless the divisor was already normalized (n==0).  This
+C leads to different mul-by-inverse thresholds for normalized and
+C unnormalized divisors, in mod_1 and mod_1c.
+C
+C Alternatives:
+C
+C If n is small then the extra divide step could be done by a few shift and
+C trial subtract steps instead of a full divide.  That would probably be 3
+C or 4 cycles/bit, so say up to n=8 might benefit from that over a 21 cycle
+C divide.  However it's considered that small divisors, meaning biggish n,
+C are more likely than small n, and that it's not worth the branch
+C mispredicts of a loop.
+C
+C Past:
+C
+C There used to be some MMX based code for P-II and P-III, roughly following
+C the K7 form, but it was slower (about 24.0 c/l) than the code here.  That
+C code did have an advantage that mod_1 was able to do one less divide step
+C when high<divisor and the divisor unnormalized, but the speed advantage of
+C the current code soon overcomes that.
+C
+C Future:
+C
+C It's not clear whether what's here is optimal.  A rough count of micro-ops
+C on the dependent chain would suggest a couple of cycles could be shaved,
+C perhaps.
+
+
+dnl  The following thresholds are the sizes where the multiply by inverse
+dnl  method is used instead of plain divl's.  Minimum value 2 each.
+dnl
+dnl  MUL_NORM_THRESHOLD is for normalized divisors (high bit set),
+dnl  MUL_UNNORM_THRESHOLD for unnormalized divisors.
+dnl
+dnl  With the divl loop at 39 c/l, and the inverse loop at 21.5 c/l but
+dnl  setups for the inverse of about 50, the threshold should be around
+dnl  50/(39-21.5)==2.85.  An unnormalized divisor gets an extra divide step
+dnl  at the end, so if that's about 25 cycles then that threshold might be
+dnl  around (50+25)/(39-21.5) == 4.3.
+
+deflit(MUL_NORM_THRESHOLD,   4)
+deflit(MUL_UNNORM_THRESHOLD, 5)
+
+deflit(MUL_NORM_DELTA, eval(MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD))
+
+
+defframe(PARAM_INVERSE, 16)  dnl  mpn_preinv_mod_1
+defframe(PARAM_CARRY,   16)  dnl  mpn_mod_1c
+defframe(PARAM_DIVISOR, 12)
+defframe(PARAM_SIZE,     8)
+defframe(PARAM_SRC,      4)
+
+defframe(SAVE_EBX,    -4)
+defframe(SAVE_ESI,    -8)
+defframe(SAVE_EDI,    -12)
+defframe(SAVE_EBP,    -16)
+
+defframe(VAR_NORM,    -20)
+defframe(VAR_INVERSE, -24)
+
+deflit(STACK_SPACE, 24)
+
+	TEXT
+
+	ALIGN(16)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SRC, %edx
+	subl	$STACK_SPACE, %esp	FRAME_subl_esp(STACK_SPACE)
+
+	movl	%ebx, SAVE_EBX
+	movl	PARAM_SIZE, %ebx
+
+	movl	%ebp, SAVE_EBP
+	movl	PARAM_DIVISOR, %ebp
+
+	movl	%esi, SAVE_ESI
+	movl	PARAM_INVERSE, %eax
+
+	movl	%edi, SAVE_EDI
+	movl	-4(%edx,%ebx,4), %edi	C src high limb
+
+	movl	$0, VAR_NORM
+	leal	-8(%edx,%ebx,4), %ecx	C &src[size-2]
+
+	C
+
+	movl	%edi, %esi
+	subl	%ebp, %edi		C high-divisor
+
+	cmovc(	%esi, %edi)		C restore if underflow
+	decl	%ebx
+	jnz	L(preinv_entry)
+
+	jmp	L(done_edi)
+
+EPILOGUE()
+
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	subl	$STACK_SPACE, %esp	FRAME_subl_esp(STACK_SPACE)
+
+	movl	%ebp, SAVE_EBP
+	movl	PARAM_DIVISOR, %eax
+
+	movl	%esi, SAVE_ESI
+	movl	PARAM_CARRY, %edx
+
+	movl	PARAM_SRC, %esi
+	orl	%ecx, %ecx
+	jz	L(done_edx)		C result==carry if size==0
+
+	sarl	$31, %eax
+	movl	PARAM_DIVISOR, %ebp
+
+	andl	$MUL_NORM_DELTA, %eax
+
+	addl	$MUL_UNNORM_THRESHOLD, %eax
+
+	cmpl	%eax, %ecx
+	jb	L(divide_top)
+
+
+	C The carry parameter pretends to be the src high limb.
+
+	movl	%ebx, SAVE_EBX
+	leal	1(%ecx), %ebx		C size+1
+
+	movl	%edx, %eax		C carry
+	jmp	L(mul_by_inverse_1c)
+
+EPILOGUE()
+
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	subl	$STACK_SPACE, %esp	FRAME_subl_esp(STACK_SPACE)
+	movl	$0, %edx		C initial carry (if can't skip a div)
+
+	movl	%esi, SAVE_ESI
+	movl	PARAM_SRC, %eax
+
+	movl	%ebp, SAVE_EBP
+	movl	PARAM_DIVISOR, %ebp
+
+	movl	PARAM_DIVISOR, %esi
+	orl	%ecx, %ecx
+	jz	L(done_edx)
+
+	movl	-4(%eax,%ecx,4), %eax	C src high limb
+
+	sarl	$31, %ebp
+
+	andl	$MUL_NORM_DELTA, %ebp
+
+	addl	$MUL_UNNORM_THRESHOLD, %ebp
+	cmpl	%esi, %eax		C carry flag if high<divisor
+
+	cmovc(	%eax, %edx)		C src high limb as initial carry
+	movl	PARAM_SRC, %esi
+
+	sbbl	$0, %ecx		C size-1 to skip one div
+	jz	L(done_eax)		C done if had size==1
+
+	cmpl	%ebp, %ecx
+	movl	PARAM_DIVISOR, %ebp
+	jae	L(mul_by_inverse)
+
+
+L(divide_top):
+	C eax	scratch (quotient)
+	C ebx
+	C ecx	counter, limbs, decrementing
+	C edx	scratch (remainder)
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	movl	-4(%esi,%ecx,4), %eax
+
+	divl	%ebp
+
+	decl	%ecx
+	jnz	L(divide_top)
+
+
+L(done_edx):
+	movl	%edx, %eax
+L(done_eax):
+	movl	SAVE_ESI, %esi
+
+	movl	SAVE_EBP, %ebp
+	addl	$STACK_SPACE, %esp
+
+	ret
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+	C eax	src high limb
+	C ebx
+	C ecx
+	C edx
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	movl	%ebx, SAVE_EBX
+	movl	PARAM_SIZE, %ebx
+
+L(mul_by_inverse_1c):
+	bsrl	%ebp, %ecx		C 31-l
+
+	movl	%edi, SAVE_EDI
+	xorl	$31, %ecx		C l
+
+	movl	%ecx, VAR_NORM
+	shll	%cl, %ebp		C d normalized
+
+	movl	%eax, %edi		C src high -> n2
+	subl	%ebp, %eax
+
+	cmovnc(	%eax, %edi)		C n2-divisor if no underflow
+
+	movl	$-1, %eax
+	movl	$-1, %edx
+
+	subl	%ebp, %edx		C (b-d)-1 so  edx:eax = b*(b-d)-1
+	leal	-8(%esi,%ebx,4), %ecx	C &src[size-2]
+
+	divl	%ebp			C floor (b*(b-d)-1) / d
+
+L(preinv_entry):
+	movl	%eax, VAR_INVERSE
+
+
+
+C No special scheduling of loads is necessary in this loop, out of order
+C execution hides the latencies already.
+C
+C The way q1+1 is generated in %ebx and d is moved to %eax for the multiply
+C seems fastest.  The obvious change to generate q1+1 in %eax and then just
+C multiply by %ebp (as per mpn/x86/pentium/mod_1.asm in fact) runs 1 cycle
+C slower, for no obvious reason.
+
+
+	ALIGN(16)
+L(inverse_top):
+	C eax	n10 (then scratch)
+	C ebx	scratch (nadj, q1)
+	C ecx	src pointer, decrementing
+	C edx	scratch
+	C esi	n10
+	C edi	n2
+	C ebp	divisor
+
+	movl	(%ecx), %eax	   C next src limb
+	movl	%eax, %esi
+
+	sarl	$31, %eax	   C -n1
+	movl	%ebp, %ebx
+
+	andl	%eax, %ebx	   C -n1 & d
+	negl	%eax		   C n1
+
+	addl	%edi, %eax         C n2+n1
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	addl	%esi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+	subl	$4, %ecx
+
+	C
+
+	addl	%ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+	leal	1(%edi), %ebx      C n2+1
+	movl	%ebp, %eax	   C d
+
+	adcl	%edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+	jz	L(q1_ff)
+
+	mull	%ebx		   C (q1+1)*d
+
+	C
+
+	subl	%eax, %esi	   C low n - (q1+1)*d
+
+	sbbl	%edx, %edi	   C high n - (q1+1)*d, 0 or -1
+
+	andl	%ebp, %edi	   C d if underflow
+
+	addl	%esi, %edi	   C remainder with addback if necessary
+
+	cmpl	PARAM_SRC, %ecx
+	jae	L(inverse_top)
+
+
+C -----------------------------------------------------------------------------
+L(inverse_loop_done):
+
+	C %edi is the remainder modulo d*2^n and now must be reduced to
+	C 0<=r<d by calculating r*2^n mod d*2^n and then right shifting by
+	C n.  If d was already normalized on entry so that n==0 then nothing
+	C is needed here.  The chance of n==0 is low, but it's true of say
+	C PP from gmp-impl.h.
+	C
+	C eax
+	C ebx
+	C ecx
+	C edx
+	C esi
+	C edi	remainder
+	C ebp	divisor (normalized)
+
+	movl	VAR_NORM, %ecx
+	movl	$0, %esi
+
+	orl	%ecx, %ecx
+	jz	L(done_edi)
+
+
+	C Here use %edi=n10 and %esi=n2, opposite to the loop above.
+	C
+	C The q1=0xFFFFFFFF case is handled with an sbbl to adjust q1+1
+	C back, rather than q1_ff special case code.  This is simpler and
+	C costs only 2 uops.
+
+	shldl(	%cl, %edi, %esi)
+
+	shll	%cl, %edi
+
+	movl	%edi, %eax	   C n10
+	movl	%ebp, %ebx	   C d
+
+	sarl	$31, %eax          C -n1
+
+	andl	%eax, %ebx         C -n1 & d
+	negl	%eax		   C n1
+
+	addl	%edi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+	addl	%esi, %eax	   C n2+n1
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	C
+
+	addl	%ebx, %eax         C m*(n2+n1) + nadj, low giving carry flag
+	leal	1(%esi), %ebx      C n2+1
+
+	adcl	%edx, %ebx         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+	sbbl	$0, %ebx
+	movl	%ebp, %eax	   C d
+
+	mull	%ebx		   C (q1+1)*d
+
+	movl	SAVE_EBX, %ebx
+
+	C
+
+	subl	%eax, %edi	   C low  n - (q1+1)*d is remainder
+
+	sbbl	%edx, %esi	   C high n - (q1+1)*d, 0 or -1
+
+	andl	%ebp, %esi
+	movl	SAVE_EBP, %ebp
+
+	leal	(%esi,%edi), %eax  C remainder
+	movl	SAVE_ESI, %esi
+
+	shrl	%cl, %eax	   C denorm remainder
+	movl	SAVE_EDI, %edi
+	addl	$STACK_SPACE, %esp
+
+	ret
+
+
+L(done_edi):
+	movl	SAVE_EBX, %ebx
+	movl	%edi, %eax
+
+	movl	SAVE_ESI, %esi
+
+	movl	SAVE_EDI, %edi
+
+	movl	SAVE_EBP, %ebp
+	addl	$STACK_SPACE, %esp
+
+	ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d.
+C
+C This is reached only very rarely.
+
+L(q1_ff):
+	C eax	(divisor)
+	C ebx	(q1+1 == 0)
+	C ecx	src pointer
+	C edx
+	C esi	n10
+	C edi	(n2)
+	C ebp	divisor
+
+	leal	(%ebp,%esi), %edi	C n-q*d remainder -> next n2
+
+	cmpl	PARAM_SRC, %ecx
+	jae	L(inverse_top)
+
+	jmp	L(inverse_loop_done)
+
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/p6/mod_34lsub1.asm b/gmp/mpn/x86/p6/mod_34lsub1.asm
index b88ab5d17c..5e854b7274 100644
--- a/gmp/mpn/x86/p6/mod_34lsub1.asm
+++ b/gmp/mpn/x86/p6/mod_34lsub1.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
 
-dnl  Copyright 2000-2002, 2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/mode1o.asm b/gmp/mpn/x86/p6/mode1o.asm
index c62b676e5a..4aff48d7e6 100644
--- a/gmp/mpn/x86/p6/mode1o.asm
+++ b/gmp/mpn/x86/p6/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6 mpn_modexact_1_odd -- exact division style remainder.
 
-dnl  Copyright 2000-2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -112,7 +101,7 @@ ifdef(`PIC',`
 
 	subl	%eax, %edi		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	movl	PARAM_DIVISOR, %eax
 	imull	%edi, %eax
 	cmpl	$1, %eax')
@@ -124,7 +113,7 @@ C	subl	%edx, %eax       1
 C	imull	%edi, %eax       4
 C	mull	PARAM_DIVISOR    5
 C			       ----
-C	total			10
+C       total		        10
 C
 C and this is the measured speed.  No special scheduling is necessary, out
 C of order execution hides the load latency.
diff --git a/gmp/mpn/x86/p6/mul_basecase.asm b/gmp/mpn/x86/p6/mul_basecase.asm
index d87bc12b60..fc1afbdf0e 100644
--- a/gmp/mpn/x86/p6/mul_basecase.asm
+++ b/gmp/mpn/x86/p6/mul_basecase.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6 mpn_mul_basecase -- multiply two mpn numbers.
 
-dnl  Copyright 1999-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/p3mmx/popham.asm b/gmp/mpn/x86/p6/p3mmx/popham.asm
index db2f2601c9..2f58968a31 100644
--- a/gmp/mpn/x86/p6/p3mmx/popham.asm
+++ b/gmp/mpn/x86/p6/p3mmx/popham.asm
@@ -2,32 +2,21 @@ dnl  Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and
 dnl  hamming distance.
 
 dnl  Copyright 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/sqr_basecase.asm b/gmp/mpn/x86/p6/sqr_basecase.asm
index 8fc7fdf375..05a31f1a15 100644
--- a/gmp/mpn/x86/p6/sqr_basecase.asm
+++ b/gmp/mpn/x86/p6/sqr_basecase.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6 mpn_sqr_basecase -- square an mpn number.
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -38,15 +27,15 @@ C     which is the Karatsuba recursing range).
 
 dnl  These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for
 dnl  a description.  The only difference here is that UNROLL_COUNT can go up
-dnl  to 64 (not 63) making SQR_TOOM2_THRESHOLD_MAX 67.
+dnl  to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67.
 
-deflit(SQR_TOOM2_THRESHOLD_MAX, 67)
+deflit(SQR_KARATSUBA_THRESHOLD_MAX, 67)
 
-ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
-`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
+`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
 
-m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
-deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
 
 
 C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/gmp/mpn/x86/p6/sse2/addmul_1.asm b/gmp/mpn/x86/p6/sse2/addmul_1.asm
index 144b627aa3..b601c54bcf 100644
--- a/gmp/mpn/x86/p6/sse2/addmul_1.asm
+++ b/gmp/mpn/x86/p6/sse2/addmul_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6/SSE2 mpn_addmul_1.
 
 dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/sse2/gmp-mparam.h b/gmp/mpn/x86/p6/sse2/gmp-mparam.h
index 69226289a7..843227b99a 100644
--- a/gmp/mpn/x86/p6/sse2/gmp-mparam.h
+++ b/gmp/mpn/x86/p6/sse2/gmp-mparam.h
@@ -1,197 +1,74 @@
 /* Intel P6/sse2 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2003, 2008-2010 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2008, 2009
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
 
-or both in parallel, as here.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be more
+   than the value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard
+   limit in mpn/x86/p6/sqr_basecase.asm.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+/* 1867 MHz P6 model 13 */
 
+/* Generated by tuneupc, 2008-10-30, gcc 4.3 */
 
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
-   value in mpn/x86/p6/gmp-mparam.h.  The latter is used as a hard limit in
-   mpn/x86/p6/sqr_basecase.asm.  */
+#define MUL_KARATSUBA_THRESHOLD          20
+#define MUL_TOOM3_THRESHOLD              77
+#define MUL_TOOM44_THRESHOLD            142
 
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          30
+#define SQR_TOOM3_THRESHOLD             101
+#define SQR_TOOM4_THRESHOLD             154
 
-/* 1867 MHz P6 model 13 */
+#define MULLOW_BASECASE_THRESHOLD         4
+#define MULLOW_DC_THRESHOLD              38
+#define MULLOW_MUL_N_THRESHOLD          234
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 24
+#define POWM_THRESHOLD                  150
+
+#define MATRIX22_STRASSEN_THRESHOLD      23
+#define HGCD_THRESHOLD                   95
+#define GCD_DC_THRESHOLD                381
+#define GCDEXT_DC_THRESHOLD             419
+#define JACOBI_BASE_METHOD                1
+
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             14
+#define GET_STR_PRECOMPUTE_THRESHOLD     24
+#define SET_STR_DC_THRESHOLD            276
+#define SET_STR_PRECOMPUTE_THRESHOLD   1078
+
+#define MUL_FFT_TABLE  { 400, 928, 1664, 3584, 10240, 40960, 98304, 393216, 1572864, 0 }
+#define MUL_FFT_MODF_THRESHOLD          496
+#define MUL_FFT_THRESHOLD              7168
 
-#define MOD_1_NORM_THRESHOLD                 4
-#define MOD_1_UNNORM_THRESHOLD               4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           21
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                77
-#define MUL_TOOM44_THRESHOLD               169
-#define MUL_TOOM6H_THRESHOLD               246
-#define MUL_TOOM8H_THRESHOLD               381
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                101
-#define SQR_TOOM4_THRESHOLD                154
-#define SQR_TOOM6_THRESHOLD                222
-#define SQR_TOOM8_THRESHOLD                527
-
-#define MULMID_TOOM42_THRESHOLD             58
-
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define MUL_FFT_MODF_THRESHOLD             690  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    565, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     28, 7}, {     15, 6}, \
-    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 5}, \
-    {    383, 4}, {    991, 5}, {    511, 6}, {    267, 7}, \
-    {    157, 8}, {     91, 9}, {     47, 8}, {    111, 9}, \
-    {     63, 8}, {    127, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159,10}, {     95,11}, {     63,10}, \
-    {    143, 9}, {    287,10}, {    159,11}, {     95,10}, \
-    {    191,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,10}, {    287,11}, \
-    {    159,10}, {    335, 9}, {    671,11}, {    191,10}, \
-    {    383, 9}, {    767,10}, {    399, 9}, {    799,10}, \
-    {    415,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607,11}, \
-    {    319,10}, {    671,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
-    {    255,11}, {    543,10}, {   1087,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    671,10}, {   1343,11}, \
-    {    735,10}, {   1471,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    863,12}, {    447,11}, {    959,13}, \
-    {    255,12}, {    511,11}, {   1087,12}, {    575,11}, \
-    {   1215,12}, {    639,11}, {   1343,12}, {    703,11}, \
-    {   1471,13}, {    383,12}, {    831,11}, {   1727,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
-    {   1727,13}, {    895,12}, {   1919,14}, {    511,13}, \
-    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
-    {   1407,12}, {   2815,14}, {    767,13}, {   1663,12}, \
-    {   3455,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 132
-#define MUL_FFT_THRESHOLD                 7424
-
-#define SQR_FFT_MODF_THRESHOLD             565  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    472, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     17, 6}, {     35, 7}, {     27, 8}, \
-    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
-    {     23, 7}, {     49, 8}, {     27, 9}, {     15, 8}, \
-    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
-    {     31, 8}, {     63, 4}, {   1023, 8}, {     67, 9}, \
-    {     39, 5}, {    639, 4}, {   1471, 6}, {    383, 7}, \
-    {    209, 8}, {    119, 9}, {     63, 7}, {    255, 8}, \
-    {    139, 9}, {     71, 8}, {    143, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    135,10}, {     79, 9}, {    159, 8}, {    319, 9}, \
-    {    167,10}, {     95,11}, {     63,10}, {    143, 9}, \
-    {    287,10}, {    159,11}, {     95,10}, {    191,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    543, 8}, \
-    {   1087,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319, 9}, {    639,10}, {    335, 9}, {    671,10}, \
-    {    351, 9}, {    703,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399, 9}, {    799,10}, {    415, 9}, \
-    {    831,11}, {    223,12}, {    127,11}, {    255,10}, \
-    {    543, 9}, {   1087,11}, {    287,10}, {    607, 9}, \
-    {   1215,11}, {    319,10}, {    671, 9}, {   1343,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    799,11}, {    415,10}, {    831,13}, {    127,12}, \
-    {    255,11}, {    543,10}, {   1087,11}, {    607,12}, \
-    {    319,11}, {    671,10}, {   1343,11}, {    735,12}, \
-    {    383,11}, {    799,10}, {   1599,11}, {    863,12}, \
-    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
-    {   1087,12}, {    575,11}, {   1215,12}, {    639,11}, \
-    {   1343,12}, {    703,11}, {   1471,13}, {    383,12}, \
-    {    767,11}, {   1599,12}, {    831,11}, {   1727,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
-    {    639,12}, {   1471,13}, {    767,12}, {   1727,13}, \
-    {    895,12}, {   1919,14}, {    511,13}, {   1023,12}, \
-    {   2111,13}, {   1151,12}, {   2431,13}, {   1407,14}, \
-    {    767,13}, {   1663,12}, {   3455,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 146
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  31
-#define MULLO_MUL_N_THRESHOLD            13463
-
-#define DC_DIV_QR_THRESHOLD                 25
-#define DC_DIVAPPR_Q_THRESHOLD              55
-#define DC_BDIV_QR_THRESHOLD                60
-#define DC_BDIV_Q_THRESHOLD                132
-
-#define INV_MULMOD_BNM1_THRESHOLD           38
-#define INV_NEWTON_THRESHOLD                65
-#define INV_APPR_THRESHOLD                  65
-
-#define BINV_NEWTON_THRESHOLD              252
-#define REDC_1_TO_REDC_N_THRESHOLD          62
-
-#define MU_DIV_QR_THRESHOLD               1164
-#define MU_DIVAPPR_Q_THRESHOLD             748
-#define MUPI_DIV_QR_THRESHOLD               38
-#define MU_BDIV_QR_THRESHOLD              1360
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  2,23,258,879,2246
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                      69
-#define HGCD_APPR_THRESHOLD                112
-#define HGCD_REDUCE_THRESHOLD             3389
-#define GCD_DC_THRESHOLD                   386
-#define GCDEXT_DC_THRESHOLD                303
-#define JACOBI_BASE_METHOD                   1
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        25
-#define SET_STR_DC_THRESHOLD               582
-#define SET_STR_PRECOMPUTE_THRESHOLD      1118
-
-#define FAC_DSC_THRESHOLD                  178
-#define FAC_ODD_THRESHOLD                   34
+#define SQR_FFT_TABLE  { 432, 928, 1664, 3584, 10240, 40960, 98304, 393216, 1572864, 0 }
+#define SQR_FFT_MODF_THRESHOLD          448
+#define SQR_FFT_THRESHOLD              3840
diff --git a/gmp/mpn/x86/p6/sse2/mod_1_1.asm b/gmp/mpn/x86/p6/sse2/mod_1_1.asm
deleted file mode 100644
index 8b7b7adaa5..0000000000
--- a/gmp/mpn/x86/p6/sse2/mod_1_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel P6/SSE2 mpn_mod_1_1.
-
-dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1_1p)
-include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/gmp/mpn/x86/p6/sse2/mod_1_4.asm b/gmp/mpn/x86/p6/sse2/mod_1_4.asm
deleted file mode 100644
index 49c96c60b9..0000000000
--- a/gmp/mpn/x86/p6/sse2/mod_1_4.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl  Intel P6/SSE2 mpn_mod_1_4.
-
-dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1s_4p)
-include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/gmp/mpn/x86/p6/sse2/mul_1.asm b/gmp/mpn/x86/p6/sse2/mul_1.asm
index 50e5b6983a..fc3d4e6414 100644
--- a/gmp/mpn/x86/p6/sse2/mul_1.asm
+++ b/gmp/mpn/x86/p6/sse2/mul_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6/SSE2 mpn_mul_1.
 
 dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/sse2/mul_basecase.asm b/gmp/mpn/x86/p6/sse2/mul_basecase.asm
index 4687625790..f52ece025f 100644
--- a/gmp/mpn/x86/p6/sse2/mul_basecase.asm
+++ b/gmp/mpn/x86/p6/sse2/mul_basecase.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6/SSE2 mpn_mul_basecase.
 
 dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/sse2/popcount.asm b/gmp/mpn/x86/p6/sse2/popcount.asm
index 4c02b93be2..f818d6e230 100644
--- a/gmp/mpn/x86/p6/sse2/popcount.asm
+++ b/gmp/mpn/x86/p6/sse2/popcount.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6/SSE2 mpn_popcount -- population count.
 
 dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/sse2/sqr_basecase.asm b/gmp/mpn/x86/p6/sse2/sqr_basecase.asm
index 76b574b6c7..8a7f24974d 100644
--- a/gmp/mpn/x86/p6/sse2/sqr_basecase.asm
+++ b/gmp/mpn/x86/p6/sse2/sqr_basecase.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6/SSE2 mpn_sqr_basecase.
 
 dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/p6/sse2/submul_1.asm b/gmp/mpn/x86/p6/sse2/submul_1.asm
index 98a603ce93..ae97fd6346 100644
--- a/gmp/mpn/x86/p6/sse2/submul_1.asm
+++ b/gmp/mpn/x86/p6/sse2/submul_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel P6/SSE2 mpn_submul_1.
 
 dnl  Copyright 2008 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/README b/gmp/mpn/x86/pentium/README
index 305936bbd9..6c4d872c47 100644
--- a/gmp/mpn/x86/pentium/README
+++ b/gmp/mpn/x86/pentium/README
@@ -1,30 +1,19 @@
-Copyright 1996, 1999-2001, 2003 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/x86/pentium/aors_n.asm b/gmp/mpn/x86/pentium/aors_n.asm
index 01ebfb96ae..30d0df79b0 100644
--- a/gmp/mpn/x86/pentium/aors_n.asm
+++ b/gmp/mpn/x86/pentium/aors_n.asm
@@ -1,32 +1,22 @@
 dnl  Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
 
-dnl  Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -88,13 +78,13 @@ deflit(`FRAME',16)
 	pushl	%edx
 FRAME_pushl()
 	movl	PARAM_CARRY,%eax
-	shrl	%eax			C shift bit 0 into carry
+	shrl	$1,%eax			C shift bit 0 into carry
 	jmp	L(oop)
 
 L(endgo):
 deflit(`FRAME',16)
 	movl	PARAM_CARRY,%eax
-	shrl	%eax			C shift bit 0 into carry
+	shrl	$1,%eax			C shift bit 0 into carry
 	jmp	L(end)
 
 EPILOGUE()
diff --git a/gmp/mpn/x86/pentium/aorsmul_1.asm b/gmp/mpn/x86/pentium/aorsmul_1.asm
index d83cc4513b..a50299b5cf 100644
--- a/gmp/mpn/x86/pentium/aorsmul_1.asm
+++ b/gmp/mpn/x86/pentium/aorsmul_1.asm
@@ -2,32 +2,21 @@ dnl  Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
 
 dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
 dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/bdiv_q_1.asm b/gmp/mpn/x86/pentium/bdiv_q_1.asm
deleted file mode 100644
index 9fee3cb87a..0000000000
--- a/gmp/mpn/x86/pentium/bdiv_q_1.asm
+++ /dev/null
@@ -1,260 +0,0 @@
-dnl  Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
-
-dnl  Rearranged from mpn/x86/pentium/dive_1.asm by Marco Bodrato.
-
-dnl  Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C         divisor
-C       odd   even
-C P54:  24.5  30.5   cycles/limb
-C P55:  23.0  28.0
-
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-
-C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as
-C expected.  On P54 in the even case the shrdl pairing nonsense (see
-C mpn/x86/pentium/README) costs 1 cycle, but it's not clear why there's a
-C further 1.5 slowdown for both odd and even.
-
-defframe(PARAM_SHIFT,  24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE,   12)
-defframe(PARAM_SRC,    8)
-defframe(PARAM_DST,    4)
-
-dnl  re-use parameter space
-define(VAR_INVERSE,`PARAM_DST')
-
-	TEXT
-
-	ALIGN(32)
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                           mp_limb_t divisor);
-C
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	$-1, %ecx
-	movl	PARAM_DIVISOR, %eax
-
-L(strip_twos):
-	ASSERT(nz, `orl %eax, %eax')
-	shrl	%eax
-	incl	%ecx			C shift count
-
-	jnc	L(strip_twos)
-
-	leal	1(%eax,%eax), %edx	C d
-	andl	$127, %eax		C d/2, 7 bits
-
-	pushl	%ebx		FRAME_pushl()
-	pushl	%ebp		FRAME_pushl()
-
-ifdef(`PIC',`
-	call	L(here)
-L(here):
-	popl	%ebp			C eip
-
-	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
-	C AGI
-	movl	binvert_limb_table@GOT(%ebp), %ebp
-	C AGI
-	movzbl	(%eax,%ebp), %eax
-',`
-
-dnl non-PIC
-	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
-')
-
-	movl	%eax, %ebp		C inv
-	addl	%eax, %eax		C 2*inv
-
-	imull	%ebp, %ebp		C inv*inv
-
-	imull	%edx, %ebp		C inv*inv*d
-
-	subl	%ebp, %eax		C inv = 2*inv - inv*inv*d
-	movl	PARAM_SIZE, %ebx
-
-	movl	%eax, %ebp
-	addl	%eax, %eax		C 2*inv
-
-	imull	%ebp, %ebp		C inv*inv
-
-	imull	%edx, %ebp		C inv*inv*d
-
-	subl	%ebp, %eax		C inv = 2*inv - inv*inv*d
-	movl	%edx, PARAM_DIVISOR	C d without twos
-
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
-	pushl	%eax	FRAME_pushl()
-	imull	PARAM_DIVISOR, %eax
-	cmpl	$1, %eax
-	popl	%eax	FRAME_popl()')
-
-	jmp	L(common)
-EPILOGUE()
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C		    mp_limb_t inverse, int shift)
-	ALIGN(32)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_SHIFT, %ecx
-
-	pushl	%ebx		FRAME_pushl()
-	pushl	%ebp		FRAME_pushl()
-
-	movl	PARAM_SIZE, %ebx
-	movl	PARAM_INVERSE, %eax
-
-L(common):
-	pushl	%esi		FRAME_pushl()
-	push	%edi		FRAME_pushl()
-
-	movl	PARAM_SRC, %esi
-	movl	PARAM_DST, %edi
-	movl	%eax, VAR_INVERSE
-
-	leal	(%esi,%ebx,4), %esi	C src end
-	leal	(%edi,%ebx,4), %edi	C dst end
-
-	negl	%ebx			C -size
-
-	xorl	%ebp, %ebp		C initial carry bit
-
-	orl	%ecx, %ecx		C shift
-	movl	(%esi,%ebx,4), %eax	C src low limb
-	jz	L(odd_entry)
-
-	xorl	%edx, %edx		C initial carry limb (for even, if one)
-	incl	%ebx
-	jz	L(one)
-
-	movl	(%esi,%ebx,4), %edx	C src second limb (for even)
-	shrdl(	%cl, %edx, %eax)
-
-	jmp	L(even_entry)
-
-
-	ALIGN(8)
-L(odd_top):
-	C eax	scratch
-	C ebx	counter, limbs, negative
-	C ecx
-	C edx
-	C esi	src end
-	C edi	dst end
-	C ebp	carry bit, 0 or -1
-
-	mull	PARAM_DIVISOR
-
-	movl	(%esi,%ebx,4), %eax
-	subl	%ebp, %edx
-
-	subl	%edx, %eax
-
-	sbbl	%ebp, %ebp
-
-L(odd_entry):
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, (%edi,%ebx,4)
-
-	incl	%ebx
-	jnz	L(odd_top)
-
-	popl	%edi
-	popl	%esi
-
-	popl	%ebp
-	popl	%ebx
-
-	ret
-
-L(even_top):
-	C eax	scratch
-	C ebx	counter, limbs, negative
-	C ecx	twos
-	C edx
-	C esi	src end
-	C edi	dst end
-	C ebp	carry bit, 0 or -1
-
-	mull	PARAM_DIVISOR
-
-	subl	%ebp, %edx		C carry bit
-	movl	-4(%esi,%ebx,4), %eax	C src limb
-
-	movl	(%esi,%ebx,4), %ebp	C and one above it
-
-	shrdl(	%cl, %ebp, %eax)
-
-	subl	%edx, %eax		C carry limb
-
-	sbbl	%ebp, %ebp
-
-L(even_entry):
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, -4(%edi,%ebx,4)
-	incl	%ebx
-
-	jnz	L(even_top)
-
-	mull	PARAM_DIVISOR
-
-	movl	-4(%esi), %eax		C src high limb
-	subl	%ebp, %edx
-
-L(one):
-	shrl	%cl, %eax
-
-	subl	%edx, %eax		C no carry if division is exact
-
-	imull	VAR_INVERSE, %eax
-
-	movl	%eax, -4(%edi)		C dst high limb
-	nop				C protect against cache bank clash
-
-	popl	%edi
-	popl	%esi
-
-	popl	%ebp
-	popl	%ebx
-
-	ret
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium/com.asm b/gmp/mpn/x86/pentium/com_n.asm
index b0805452a6..c6d2d72e5e 100644
--- a/gmp/mpn/x86/pentium/com.asm
+++ b/gmp/mpn/x86/pentium/com_n.asm
@@ -1,32 +1,21 @@
-dnl  Intel Pentium mpn_com -- mpn ones complement.
+dnl  Intel Pentium mpn_com_n -- mpn ones complement.
 
 dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -37,7 +26,7 @@ C P5: 1.75 cycles/limb
 NAILS_SUPPORT(0-31)
 
 
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
 C
 C This code is similar to mpn_copyi, basically there's just some "xorl
 C $GMP_NUMB_MASK"s inserted.
@@ -55,7 +44,7 @@ defframe(PARAM_DST, 4)
 
 	TEXT
 	ALIGN(8)
-PROLOGUE(mpn_com)
+PROLOGUE(mpn_com_n)
 deflit(`FRAME',0)
 
 	movl	PARAM_SRC, %eax
diff --git a/gmp/mpn/x86/pentium/copyd.asm b/gmp/mpn/x86/pentium/copyd.asm
index 72a543b2a3..2be8c765ac 100644
--- a/gmp/mpn/x86/pentium/copyd.asm
+++ b/gmp/mpn/x86/pentium/copyd.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_copyd -- copy limb vector, decrementing.
 
 dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/copyi.asm b/gmp/mpn/x86/pentium/copyi.asm
index d983d6b46e..9da08e2c06 100644
--- a/gmp/mpn/x86/pentium/copyi.asm
+++ b/gmp/mpn/x86/pentium/copyi.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_copyi -- copy limb vector, incrementing.
 
 dnl  Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/dive_1.asm b/gmp/mpn/x86/pentium/dive_1.asm
index f80632f479..79885244a5 100644
--- a/gmp/mpn/x86/pentium/dive_1.asm
+++ b/gmp/mpn/x86/pentium/dive_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -157,7 +146,7 @@ dnl non-PIC
 
 	negl	%ebx			C -size
 
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax	FRAME_pushl()
 	imull	PARAM_DIVISOR, %eax
 	cmpl	$1, %eax
diff --git a/gmp/mpn/x86/pentium/gmp-mparam.h b/gmp/mpn/x86/pentium/gmp-mparam.h
index befa6e27a9..5c49c4e3cb 100644
--- a/gmp/mpn/x86/pentium/gmp-mparam.h
+++ b/gmp/mpn/x86/pentium/gmp-mparam.h
@@ -1,36 +1,26 @@
 /* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* For mpn/x86/pentium/mod_1.asm */
@@ -41,11 +31,11 @@ see https://www.gnu.org/licenses/.  */
 
 /* Generated by tuneup.c, 2004-02-10, gcc 2.95 */
 
-#define MUL_TOOM22_THRESHOLD             16
-#define MUL_TOOM33_THRESHOLD             90
+#define MUL_KARATSUBA_THRESHOLD          16
+#define MUL_TOOM3_THRESHOLD              90
 
 #define SQR_BASECASE_THRESHOLD            0  /* always */
-#define SQR_TOOM2_THRESHOLD              22
+#define SQR_KARATSUBA_THRESHOLD          22
 #define SQR_TOOM3_THRESHOLD             122
 
 #define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
diff --git a/gmp/mpn/x86/pentium/hamdist.asm b/gmp/mpn/x86/pentium/hamdist.asm
index 2d7bc99b12..a129030f74 100644
--- a/gmp/mpn/x86/pentium/hamdist.asm
+++ b/gmp/mpn/x86/pentium/hamdist.asm
@@ -1,32 +1,21 @@
 dnl  Intel P5 mpn_hamdist -- mpn hamming distance.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/logops_n.asm b/gmp/mpn/x86/pentium/logops_n.asm
index 18773172e9..0552e55809 100644
--- a/gmp/mpn/x86/pentium/logops_n.asm
+++ b/gmp/mpn/x86/pentium/logops_n.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/lshift.asm b/gmp/mpn/x86/pentium/lshift.asm
index 2a31f36c6e..ece51e06d3 100644
--- a/gmp/mpn/x86/pentium/lshift.asm
+++ b/gmp/mpn/x86/pentium/lshift.asm
@@ -1,32 +1,22 @@
 dnl  Intel Pentium mpn_lshift -- mpn left shift.
 
-dnl  Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mmx/gmp-mparam.h b/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
index 02a0def127..e443c8c300 100644
--- a/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
+++ b/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
@@ -1,37 +1,26 @@
 /* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2009, 2010 Free Software
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
 
 /* For mpn/x86/pentium/mod_1.asm */
@@ -40,124 +29,45 @@ see https://www.gnu.org/licenses/.  */
 
 /* 233MHz P55 */
 
-#define MOD_1_NORM_THRESHOLD                 5
-#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD      MP_SIZE_T_MAX  /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD         12
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        11
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     63
-#define USE_PREINV_DIVREM_1                  0
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           51
-
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                53
-#define MUL_TOOM44_THRESHOLD               128
-#define MUL_TOOM6H_THRESHOLD               189
-#define MUL_TOOM8H_THRESHOLD               260
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      90
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      88
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 20
-#define SQR_TOOM3_THRESHOLD                 73
-#define SQR_TOOM4_THRESHOLD                178
-#define SQR_TOOM6_THRESHOLD                210
-#define SQR_TOOM8_THRESHOLD                375
-
-#define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               12
-
-#define MUL_FFT_MODF_THRESHOLD             364  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    364, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     17, 7}, {      9, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     15, 6}, \
-    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
-    {     15, 7}, {     33, 8}, {     19, 7}, {     39, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 9}, {     23, 8}, \
-    {     47,10}, {     15, 9}, {     31, 8}, {     67, 9}, \
-    {     39, 8}, {     79, 9}, {     47, 8}, {     95, 9}, \
-    {     55,10}, {     31, 9}, {     79,10}, {     47, 9}, \
-    {     95,11}, {     31,10}, {     63, 9}, {    135,10}, \
-    {     79, 9}, {    159, 8}, {    319, 9}, {    167,10}, \
-    {     95, 9}, {    191, 8}, {    383,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287,10}, \
-    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
-    {    383,12}, {     63,11}, {    127,10}, {    271, 9}, \
-    {    543,10}, {    287,11}, {    159,10}, {    351,11}, \
-    {    191,10}, {    415,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    287,10}, {    575,11}, \
-    {    351,12}, {    191,11}, {    415,13}, {    127,12}, \
-    {    255,11}, {    575,12}, {    319,11}, {    703,12}, \
-    {    383,11}, {    831,12}, {    447,13}, {   8192,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 90
-#define MUL_FFT_THRESHOLD                 3520
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    340, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     17, 7}, {      9, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     15, 6}, {     31, 7}, {     21, 8}, \
-    {     11, 7}, {     29, 8}, {     15, 7}, {     33, 8}, \
-    {     19, 7}, {     39, 8}, {     27, 7}, {     55, 9}, \
-    {     15, 8}, {     31, 7}, {     65, 8}, {     43, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     31, 8}, \
-    {     67, 9}, {     39, 8}, {     83, 9}, {     47, 8}, \
-    {     95,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     79,10}, {     47, 9}, {     95,11}, {     31,10}, \
-    {     63, 9}, {    127, 8}, {    255, 9}, {    135,10}, \
-    {     79, 9}, {    159, 8}, {    319,10}, {     95, 9}, \
-    {    191,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511, 9}, {    271,10}, {    143, 9}, {    287, 8}, \
-    {    575, 9}, {    303,10}, {    159, 9}, {    319,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207,12}, \
-    {     63,11}, {    127,10}, {    271, 9}, {    543,10}, \
-    {    287, 9}, {    575,10}, {    303,11}, {    159,10}, \
-    {    351,11}, {    191,10}, {    415,11}, {    223,10}, \
-    {    447,12}, {    127,11}, {    255,10}, {    543,11}, \
-    {    287,10}, {    607,11}, {    351,12}, {    191,11}, \
-    {    479,13}, {    127,12}, {    255,11}, {    575,12}, \
-    {    319,11}, {    703,12}, {    383,11}, {    767,12}, \
-    {    447,13}, {   8192,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 96
-#define SQR_FFT_THRESHOLD                 5504
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  48
-#define MULLO_MUL_N_THRESHOLD             6633
-
-#define DC_DIV_QR_THRESHOLD                 43
-#define DC_DIVAPPR_Q_THRESHOLD             170
-#define DC_BDIV_QR_THRESHOLD                43
-#define DC_BDIV_Q_THRESHOLD                110
-
-#define INV_MULMOD_BNM1_THRESHOLD           30
-#define INV_NEWTON_THRESHOLD               177
-#define INV_APPR_THRESHOLD                 171
-
-#define BINV_NEWTON_THRESHOLD              194
-#define REDC_1_TO_REDC_N_THRESHOLD          50
-
-#define MU_DIV_QR_THRESHOLD               1142
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD               90
-#define MU_BDIV_QR_THRESHOLD               942
-#define MU_BDIV_Q_THRESHOLD               1017
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                      92
-#define GCD_DC_THRESHOLD                   283
-#define GCDEXT_DC_THRESHOLD                221
-#define JACOBI_BASE_METHOD                   2
-
-#define GET_STR_DC_THRESHOLD                18
-#define GET_STR_PRECOMPUTE_THRESHOLD        31
-#define SET_STR_DC_THRESHOLD               490
-#define SET_STR_PRECOMPUTE_THRESHOLD       994
+/* Generated by tuneup.c, 2009-01-06, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          16
+#define MUL_TOOM3_THRESHOLD              89
+#define MUL_TOOM44_THRESHOLD            131
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          22
+#define SQR_TOOM3_THRESHOLD              77
+#define SQR_TOOM4_THRESHOLD             168
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              40
+#define MULLOW_MUL_N_THRESHOLD          266
+
+#define DIV_SB_PREINV_THRESHOLD           4
+#define DIV_DC_THRESHOLD                 43
+#define POWM_THRESHOLD                   64
+
+#define MATRIX22_STRASSEN_THRESHOLD      13
+#define HGCD_THRESHOLD                   95
+#define GCD_DC_THRESHOLD                316
+#define GCDEXT_DC_THRESHOLD             316
+#define JACOBI_BASE_METHOD                2
+
+#define USE_PREINV_DIVREM_1               0
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             17
+#define GET_STR_PRECOMPUTE_THRESHOLD     27
+#define SET_STR_DC_THRESHOLD            527
+#define SET_STR_PRECOMPUTE_THRESHOLD   1069
+
+#define MUL_FFT_TABLE  { 304, 672, 1152, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          320
+#define MUL_FFT_THRESHOLD              3840
+
+#define SQR_FFT_TABLE  { 304, 672, 1152, 4608, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD          320
+#define SQR_FFT_THRESHOLD              3840
diff --git a/gmp/mpn/x86/pentium/mmx/hamdist.asm b/gmp/mpn/x86/pentium/mmx/hamdist.asm
index 72e3196697..185eeaee22 100644
--- a/gmp/mpn/x86/pentium/mmx/hamdist.asm
+++ b/gmp/mpn/x86/pentium/mmx/hamdist.asm
@@ -1,32 +1,21 @@
 dnl  Intel P55 mpn_hamdist -- mpn hamming distance.
 
 dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mmx/lshift.asm b/gmp/mpn/x86/pentium/mmx/lshift.asm
index 04b0ddcc8f..012d794952 100644
--- a/gmp/mpn/x86/pentium/mmx/lshift.asm
+++ b/gmp/mpn/x86/pentium/mmx/lshift.asm
@@ -1,32 +1,21 @@
 dnl  Intel P5 mpn_lshift -- mpn left shift.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mmx/mul_1.asm b/gmp/mpn/x86/pentium/mmx/mul_1.asm
index 4ced577b13..b9fe77ed07 100644
--- a/gmp/mpn/x86/pentium/mmx/mul_1.asm
+++ b/gmp/mpn/x86/pentium/mmx/mul_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium MMX mpn_mul_1 -- mpn by limb multiplication.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mmx/rshift.asm b/gmp/mpn/x86/pentium/mmx/rshift.asm
index e3b274bb63..f50b8ab0e0 100644
--- a/gmp/mpn/x86/pentium/mmx/rshift.asm
+++ b/gmp/mpn/x86/pentium/mmx/rshift.asm
@@ -1,32 +1,21 @@
 dnl  Intel P5 mpn_rshift -- mpn right shift.
 
 dnl  Copyright 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mod_1.asm b/gmp/mpn/x86/pentium/mod_1.asm
new file mode 100644
index 0000000000..408242e7a9
--- /dev/null
+++ b/gmp/mpn/x86/pentium/mod_1.asm
@@ -0,0 +1,454 @@
+dnl  Intel P5 mpn_mod_1 -- mpn by limb remainder.
+
+dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 28.0 cycles/limb
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                       mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                             mp_limb_t inverse);
+C
+C This code is not unlike mpn/x86/p6/mod_1.asm, it does the same sort of
+C multiply by inverse without on-the-fly shifts.  See that code for some
+C general comments.
+C
+C Alternatives:
+C
+C P5 shldl is 4 cycles, so shifting on the fly would be at least 5 cycles
+C slower, probably more depending what it did to register usage.  Using MMX
+C on P55 would be better, but still at least 4 or 5 instructions and so 2 or
+C 3 cycles.
+
+
+dnl  These thresholds are the sizes where the multiply by inverse method is
+dnl  used, rather than plain "divl"s.  Minimum value 2.
+dnl
+dnl  MUL_NORM_THRESHOLD is for an already normalized divisor (high bit set),
+dnl  MUL_UNNORM_THRESHOLD for an unnormalized divisor.
+dnl
+dnl  With the divl loop at 44 c/l and the inverse at 28 c/l with about 70
+dnl  cycles to setup, the threshold should be about ceil(70/16)==5, which is
+dnl  what happens in practice.
+dnl
+dnl  An unnormalized divisor gets an extra 40 cycles at the end for the
+dnl  final (r*2^n)%(d*2^n) and shift.  This increases the threshold by about
+dnl  40/16=3.
+dnl
+dnl  PIC adds between 4 and 7 cycles (not sure why it varies), but this
+dnl  doesn't change the thresholds.
+dnl
+dnl  The entry sequence code that chooses between MUL_NORM_THRESHOLD and
+dnl  MUL_UNNORM_THRESHOLD is a bit horrible, but it adds only 2 cycles
+dnl  (branch free) and ensures the choice between div or mul is optimal.
+
+deflit(MUL_NORM_THRESHOLD,   ifdef(`PIC',5,5))
+deflit(MUL_UNNORM_THRESHOLD, ifdef(`PIC',8,8))
+
+deflit(MUL_NORM_DELTA, eval(MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD))
+
+
+defframe(PARAM_INVERSE, 16)   dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY,   16)   dnl mpn_mod_1c
+defframe(PARAM_DIVISOR, 12)
+defframe(PARAM_SIZE,     8)
+defframe(PARAM_SRC,      4)
+
+dnl  re-using parameter space
+define(VAR_NORM,    `PARAM_DIVISOR')
+define(VAR_INVERSE, `PARAM_SIZE')
+
+	TEXT
+
+	ALIGN(8)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+	pushl	%ebp	FRAME_pushl()
+	pushl	%esi	FRAME_pushl()
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_SIZE, %edx
+
+	pushl	%edi	FRAME_pushl()
+	pushl	%ebx	FRAME_pushl()
+
+	movl	PARAM_DIVISOR, %ebp
+	movl	PARAM_INVERSE, %eax
+
+	movl	-4(%esi,%edx,4), %edi	C src high limb
+	leal	-8(%esi,%edx,4), %esi	C &src[size-2]
+
+	movl	$0, VAR_NORM
+	decl	%edx
+
+	jnz	L(start_preinv)
+
+	subl	%ebp, %edi		C src-divisor
+	popl	%ebx
+
+	sbbl	%ecx, %ecx		C -1 if underflow
+	movl	%edi, %eax		C src-divisor
+
+	andl	%ebp, %ecx		C d if underflow
+	popl	%edi
+
+	addl	%ecx, %eax		C remainder, with possible addback
+	popl	%esi
+
+	popl	%ebp
+
+	ret
+
+EPILOGUE()
+
+
+	ALIGN(8)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+	movl	PARAM_DIVISOR, %eax
+	movl	PARAM_SIZE, %ecx
+
+	sarl	$31, %eax			C d highbit
+	movl	PARAM_CARRY, %edx
+
+	orl	%ecx, %ecx
+	jz	L(done_edx)			C result==carry if size==0
+
+	andl	$MUL_NORM_DELTA, %eax
+	pushl	%ebp		FRAME_pushl()
+
+	addl	$MUL_UNNORM_THRESHOLD, %eax	C norm or unnorm thresh
+	pushl	%esi		FRAME_pushl()
+
+	movl	PARAM_SRC, %esi
+	movl	PARAM_DIVISOR, %ebp
+
+	cmpl	%eax, %ecx
+	jb	L(divide_top)
+
+	movl	%edx, %eax		C carry as pretend src high limb
+	leal	1(%ecx), %edx		C size+1
+
+	cmpl	$0x1000000, %ebp
+	jmp	L(mul_by_inverse_1c)
+
+EPILOGUE()
+
+
+	ALIGN(8)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	pushl	%ebp		FRAME_pushl()
+
+	orl	%ecx, %ecx
+	jz	L(done_zero)
+
+	movl	PARAM_SRC, %eax
+	movl	PARAM_DIVISOR, %ebp
+
+	sarl	$31, %ebp		C -1 if divisor normalized
+	movl	-4(%eax,%ecx,4), %eax	C src high limb
+
+	movl	PARAM_DIVISOR, %edx
+	pushl	%esi		FRAME_pushl()
+
+	andl	$MUL_NORM_DELTA, %ebp
+	cmpl	%edx, %eax		C carry flag if high<divisor
+
+	sbbl	%edx, %edx		C -1 if high<divisor
+	addl	$MUL_UNNORM_THRESHOLD, %ebp C norm or unnorm thresh
+
+	addl	%edx, %ecx		C size-1 if high<divisor
+	jz	L(done_eax)
+
+	cmpl	%ebp, %ecx
+	movl	PARAM_DIVISOR, %ebp
+
+	movl	PARAM_SRC, %esi
+	jae	L(mul_by_inverse)
+
+	andl	%eax, %edx		C high as initial carry if high<divisor
+
+
+L(divide_top):
+	C eax	scratch (quotient)
+	C ebx
+	C ecx	counter, limbs, decrementing
+	C edx	scratch (remainder)
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	movl	-4(%esi,%ecx,4), %eax
+
+	divl	%ebp
+
+	decl	%ecx
+	jnz	L(divide_top)
+
+
+	popl	%esi
+	popl	%ebp
+
+L(done_edx):
+	movl	%edx, %eax
+
+	ret
+
+
+L(done_zero):
+	xorl	%eax, %eax
+	popl	%ebp
+
+	ret
+
+
+C -----------------------------------------------------------------------------
+C
+C The divisor is normalized using the same code as the pentium
+C count_leading_zeros in longlong.h.  Going through the GOT for PIC costs a
+C couple of cycles, but is more or less unavoidable.
+
+
+	ALIGN(8)
+L(mul_by_inverse):
+	C eax	src high limb
+	C ebx
+	C ecx	size or size-1
+	C edx
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	movl	PARAM_SIZE, %edx
+	cmpl	$0x1000000, %ebp
+
+L(mul_by_inverse_1c):
+	sbbl	%ecx, %ecx
+	cmpl	$0x10000, %ebp
+
+	sbbl	$0, %ecx
+	cmpl	$0x100, %ebp
+
+	sbbl	$0, %ecx
+	pushl	%edi		FRAME_pushl()
+
+	pushl	%ebx		FRAME_pushl()
+	movl	%ebp, %ebx		C d
+
+ifdef(`PIC',`
+	call	L(here)
+L(here):
+	popl	%edi
+	leal	25(,%ecx,8), %ecx	C 0,-1,-2,-3 -> 25,17,9,1
+
+	shrl	%cl, %ebx
+	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(here)], %edi
+
+	C AGI
+	movl	__clz_tab@GOT(%edi), %edi
+	addl	$-34, %ecx
+
+	C AGI
+	movb	(%ebx,%edi), %bl
+
+',`
+	leal	25(,%ecx,8), %ecx	C 0,-1,-2,-3 -> 25,17,9,1
+
+	shrl	%cl, %ebx
+	addl	$-34, %ecx
+
+	C AGI
+	movb	__clz_tab(%ebx), %bl
+')
+	movl	%eax, %edi		C carry -> n1
+
+	addl	%ebx, %ecx		C -34 + c + __clz_tab[d>>c] = -clz-1
+	leal	-8(%esi,%edx,4), %esi	C &src[size-2]
+
+	xorl	$-1, %ecx		C clz
+	movl	$-1, %edx
+
+	ASSERT(e,`pushl	%eax		C clz calculation same as bsrl
+		bsrl	%ebp, %eax
+		xorl	$31, %eax
+		cmpl	%eax, %ecx
+		popl	%eax')
+
+	shll	%cl, %ebp		C d normalized
+	movl	%ecx, VAR_NORM
+
+	subl	%ebp, %edx		C (b-d)-1, so edx:eax = b*(b-d)-1
+	movl	$-1, %eax
+
+	divl	%ebp			C floor (b*(b-d)-1) / d
+
+L(start_preinv):
+	movl	%eax, VAR_INVERSE
+	movl	%ebp, %eax		C d
+
+	movl	%ecx, %edx		C fake high, will cancel
+
+
+C For mpn_mod_1 and mpn_preinv_mod_1, the initial carry in %edi is the src
+C high limb, and this may be greater than the divisor and may need one copy
+C of the divisor subtracted (only one, because the divisor is normalized).
+C This is accomplished by having the initial ecx:edi act as a fake previous
+C n2:n10.  The initial edx:eax is d, acting as a fake (q1+1)*d which is
+C subtracted from ecx:edi, with the usual addback if it produces an
+C underflow.
+
+
+L(inverse_top):
+	C eax	scratch (n10, n1, q1, etc)
+	C ebx	scratch (nadj, src limit)
+	C ecx	old n2
+	C edx	scratch
+	C esi	src pointer, &src[size-2] to &src[0]
+	C edi	old n10
+	C ebp	d
+
+	subl	%eax, %edi	   C low  n - (q1+1)*d
+	movl	(%esi), %eax	   C new n10
+
+	sbbl	%edx, %ecx	   C high n - (q1+1)*d, 0 or -1
+	movl	%ebp, %ebx	   C d
+
+	sarl	$31, %eax	   C -n1
+	andl	%ebp, %ecx	   C d if underflow
+
+	addl	%edi, %ecx	   C remainder -> n2, and possible addback
+	ASSERT(b,`cmpl %ebp, %ecx')
+	andl	%eax, %ebx	   C -n1 & d
+
+	movl	(%esi), %edi	   C n10
+	andl	$1, %eax	   C n1
+
+	addl	%ecx, %eax         C n2+n1
+	addl	%edi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	addl	%eax, %ebx         C low(m*(n2+n1) + nadj), giving carry flag
+	leal	1(%ecx), %eax      C 1+n2
+
+	adcl	%edx, %eax         C 1 + high[n2<<32 + m*(n2+n1) + nadj] = q1+1
+	movl	PARAM_SRC, %ebx
+
+	sbbl	$0, %eax	   C use q1 if q1+1 overflows
+	subl	$4, %esi	   C step src ptr
+
+	mull	%ebp		   C (q1+1)*d
+
+	cmpl	%ebx, %esi
+	jae	L(inverse_top)
+
+
+
+	C %edi (after subtract and addback) is the remainder modulo d*2^n
+	C and must be reduced to 0<=r<d by calculating r*2^n mod d*2^n and
+	C right shifting by n.
+	C
+	C If d was already normalized on entry so that n==0 then nothing is
+	C needed here.  This is always the case for preinv_mod_1.  For mod_1
+	C or mod_1c the chance of n==0 is low, but about 40 cycles can be
+	C saved.
+
+	subl	%eax, %edi	   C low  n - (q1+1)*d
+	movl	%ecx, %ebx	   C n2
+
+	sbbl	%edx, %ebx	   C high n - (q1+1)*d, 0 or -1
+	xorl	%esi, %esi	   C next n2
+
+	andl	%ebp, %ebx	   C d if underflow
+	movl	VAR_NORM, %ecx
+
+	addl	%ebx, %edi	   C remainder, with possible addback
+	orl	%ecx, %ecx
+
+	jz	L(done_mul_edi)
+
+
+	C Here using %esi=n2 and %edi=n10, unlike the above
+
+	shldl(	%cl, %edi, %esi)   C n2
+
+	shll	%cl, %edi	   C n10
+
+	movl	%edi, %eax	   C n10
+	movl	%edi, %ebx	   C n10
+
+	sarl	$31, %ebx          C -n1
+
+	shrl	$31, %eax          C n1
+	andl	%ebp, %ebx         C -n1 & d
+
+	addl	%esi, %eax	   C n2+n1
+	addl	%edi, %ebx         C nadj = n10 + (-n1 & d), ignoring overflow
+
+	mull	VAR_INVERSE        C m*(n2+n1)
+
+	addl	%eax, %ebx         C m*(n2+n1) + nadj, low giving carry flag
+	leal	1(%esi), %eax      C 1+n2
+
+	adcl	%edx, %eax         C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+	sbbl	$0, %eax	   C use q1 if q1+1 overflows
+
+	mull	%ebp		   C (q1+1)*d
+
+	subl	%eax, %edi	   C low  n - (q1+1)*d
+	popl	%ebx
+
+	sbbl	%edx, %esi	   C high n - (q1+1)*d, 0 or -1
+	movl	%edi, %eax
+
+	andl	%ebp, %esi	   C d if underflow
+	popl	%edi
+
+	addl	%esi, %eax	   C addback if underflow
+	popl	%esi
+
+	shrl	%cl, %eax	   C denorm remainder
+	popl	%ebp
+
+	ret
+
+
+L(done_mul_edi):
+	movl	%edi, %eax
+	popl	%ebx
+
+	popl	%edi
+L(done_eax):
+	popl	%esi
+
+	popl	%ebp
+
+	ret
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/pentium/mod_34lsub1.asm b/gmp/mpn/x86/pentium/mod_34lsub1.asm
index 2d88223b84..201081a437 100644
--- a/gmp/mpn/x86/pentium/mod_34lsub1.asm
+++ b/gmp/mpn/x86/pentium/mod_34lsub1.asm
@@ -1,32 +1,21 @@
 dnl  Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mode1o.asm b/gmp/mpn/x86/pentium/mode1o.asm
index eb2790e1a0..222f64e5cb 100644
--- a/gmp/mpn/x86/pentium/mode1o.asm
+++ b/gmp/mpn/x86/pentium/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_modexact_1_odd -- exact division style remainder.
 
-dnl  Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -133,7 +122,7 @@ dnl non-PIC
 	subl	%eax, %ecx		C inv = 2*inv - inv*inv*d
 	pushl	%esi		FRAME_pushl()
 
-	ASSERT(e,`	C d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	movl	%ecx, %eax
 	imull	PARAM_DIVISOR, %eax
 	cmpl	$1, %eax')
diff --git a/gmp/mpn/x86/pentium/mul_1.asm b/gmp/mpn/x86/pentium/mul_1.asm
index a0858af2b4..c6b255c322 100644
--- a/gmp/mpn/x86/pentium/mul_1.asm
+++ b/gmp/mpn/x86/pentium/mul_1.asm
@@ -2,32 +2,21 @@ dnl  Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
 
 dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
 dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mul_2.asm b/gmp/mpn/x86/pentium/mul_2.asm
index 4c7beb5df2..36a025c425 100644
--- a/gmp/mpn/x86/pentium/mul_2.asm
+++ b/gmp/mpn/x86/pentium/mul_2.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/mul_basecase.asm b/gmp/mpn/x86/pentium/mul_basecase.asm
index 50e15d3567..fd24fdf7fa 100644
--- a/gmp/mpn/x86/pentium/mul_basecase.asm
+++ b/gmp/mpn/x86/pentium/mul_basecase.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
 
-dnl  Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/popcount.asm b/gmp/mpn/x86/pentium/popcount.asm
index b8d84ad2e2..df53bb8842 100644
--- a/gmp/mpn/x86/pentium/popcount.asm
+++ b/gmp/mpn/x86/pentium/popcount.asm
@@ -1,32 +1,21 @@
 dnl  Intel P5 mpn_popcount -- mpn bit population count.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/rshift.asm b/gmp/mpn/x86/pentium/rshift.asm
index 2105c4c935..949b0d2e2f 100644
--- a/gmp/mpn/x86/pentium/rshift.asm
+++ b/gmp/mpn/x86/pentium/rshift.asm
@@ -1,32 +1,22 @@
 dnl  Intel Pentium mpn_rshift -- mpn right shift.
 
-dnl  Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium/sqr_basecase.asm b/gmp/mpn/x86/pentium/sqr_basecase.asm
index b11d767da2..e4fca7c546 100644
--- a/gmp/mpn/x86/pentium/sqr_basecase.asm
+++ b/gmp/mpn/x86/pentium/sqr_basecase.asm
@@ -1,32 +1,21 @@
 dnl  Intel P5 mpn_sqr_basecase -- square an mpn number.
 
-dnl  Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/README b/gmp/mpn/x86/pentium4/README
index 90f752e5d5..8dc0479f04 100644
--- a/gmp/mpn/x86/pentium4/README
+++ b/gmp/mpn/x86/pentium4/README
@@ -3,28 +3,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/x86/pentium4/copyd.asm b/gmp/mpn/x86/pentium4/copyd.asm
index 82af81c522..491ad60128 100644
--- a/gmp/mpn/x86/pentium4/copyd.asm
+++ b/gmp/mpn/x86/pentium4/copyd.asm
@@ -1,32 +1,22 @@
 dnl  Pentium-4 mpn_copyd -- copy limb vector, decrementing.
-
-dnl  Copyright 1999-2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  The std/rep/movsl/cld is very slow for small blocks on pentium4.  Its
diff --git a/gmp/mpn/x86/pentium4/copyi.asm b/gmp/mpn/x86/pentium4/copyi.asm
index b6148879fa..bf812c822b 100644
--- a/gmp/mpn/x86/pentium4/copyi.asm
+++ b/gmp/mpn/x86/pentium4/copyi.asm
@@ -1,32 +1,22 @@
 dnl  Pentium-4 mpn_copyi -- copy limb vector, incrementing.
-
-dnl  Copyright 1999-2001 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+
+dnl  Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  The rep/movsl is very slow for small blocks on pentium4.  Its startup
diff --git a/gmp/mpn/x86/pentium4/mmx/lshift.asm b/gmp/mpn/x86/pentium4/mmx/lshift.asm
index b5eca66698..5d316d5da4 100644
--- a/gmp/mpn/x86/pentium4/mmx/lshift.asm
+++ b/gmp/mpn/x86/pentium4/mmx/lshift.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-4 mpn_lshift -- left shift.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/mmx/popham.asm b/gmp/mpn/x86/pentium4/mmx/popham.asm
index 9563cb57e4..2e79816821 100644
--- a/gmp/mpn/x86/pentium4/mmx/popham.asm
+++ b/gmp/mpn/x86/pentium4/mmx/popham.asm
@@ -1,33 +1,22 @@
 dnl  Intel Pentium 4 mpn_popcount, mpn_hamdist -- population count and
 dnl  hamming distance.
 
-dnl  Copyright 2000-2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/mmx/rshift.asm b/gmp/mpn/x86/pentium4/mmx/rshift.asm
index 3ac0094a5a..a7dec54a3a 100644
--- a/gmp/mpn/x86/pentium4/mmx/rshift.asm
+++ b/gmp/mpn/x86/pentium4/mmx/rshift.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-4 mpn_rshift -- right shift.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/sse2/add_n.asm b/gmp/mpn/x86/pentium4/sse2/add_n.asm
index 8e2380e493..04c0c68d0e 100644
--- a/gmp/mpn/x86/pentium4/sse2/add_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/add_n.asm
@@ -1,44 +1,36 @@
 dnl  Intel Pentium-4 mpn_add_n -- mpn addition.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C					cycles/limb
-C			     dst!=src1,2  dst==src1  dst==src2
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		?
-C P6 model 13  (Dothan)		?
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)	4	     6		6
-C P4 model 3-4 (Prescott)	4.25	     7.5	7.5
+C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C			    6.0 cycles/limb if dst==src1 or dst==src2
+C P4 Prescott:		    >= 5 cycles/limb
+
+C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                      mp_size_t size);
+C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                       mp_size_t size, mp_limb_t carry);
+C
+C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l
+C for a basic adc loop.
 
 defframe(PARAM_CARRY,20)
 defframe(PARAM_SIZE, 16)
@@ -54,25 +46,29 @@ define(SAVE_EBX,`PARAM_SRC1')
 
 PROLOGUE(mpn_add_nc)
 deflit(`FRAME',0)
+
 	movd	PARAM_CARRY, %mm0
 	jmp	L(start_nc)
+
 EPILOGUE()
 
 	ALIGN(8)
 PROLOGUE(mpn_add_n)
 deflit(`FRAME',0)
+
 	pxor	%mm0, %mm0
+
 L(start_nc):
-	mov	PARAM_SRC1, %eax
-	mov	%ebx, SAVE_EBX
-	mov	PARAM_SRC2, %ebx
-	mov	PARAM_DST, %edx
-	mov	PARAM_SIZE, %ecx
+	movl	PARAM_SRC1, %eax
+	movl	%ebx, SAVE_EBX
+	movl	PARAM_SRC2, %ebx
+	movl	PARAM_DST, %edx
+	movl	PARAM_SIZE, %ecx
 
-	lea	(%eax,%ecx,4), %eax	C src1 end
-	lea	(%ebx,%ecx,4), %ebx	C src2 end
-	lea	(%edx,%ecx,4), %edx	C dst end
-	neg	%ecx			C -size
+	leal	(%eax,%ecx,4), %eax	C src1 end
+	leal	(%ebx,%ecx,4), %ebx	C src2 end
+	leal	(%edx,%ecx,4), %edx	C dst end
+	negl	%ecx			C -size
 
 L(top):
 	C eax	src1 end
@@ -90,11 +86,12 @@ L(top):
 
 	psrlq	$32, %mm0
 
-	add	$1, %ecx
+	addl	$1, %ecx
 	jnz	L(top)
 
+
 	movd	%mm0, %eax
-	mov	SAVE_EBX, %ebx
+	movl	SAVE_EBX, %ebx
 	emms
 	ret
 
diff --git a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
index 93b63b2018..46b0903c50 100644
--- a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
@@ -1,45 +1,33 @@
 dnl  Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y.
 
-dnl  Copyright 2001-2004, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl  Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C					cycles/limb
-C			     dst!=src1,2  dst==src1  dst==src2
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		?
-C P6 model 13  (Dothan)		?
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)	4.25	     6		6
-C P4 model 3-4 (Prescott)	5	     8.5	8.5
+C          cycles/limb (approx)
+C          dst!=src1,2  dst==src1  dst==src2
+C P4 m2:      4.5         ?7.25      ?6.75
+C P4 m3:      5.3         ?	     ?
 
+C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C
 C The slightly strange combination of indexing and pointer incrementing
 C that's used seems to work best.  Not sure why, but %ecx,4 with src1 and/or
 C src2 is a slowdown.
@@ -63,18 +51,18 @@ define(SAVE_EBX,`PARAM_SRC1')
 PROLOGUE(mpn_addlsh1_n)
 deflit(`FRAME',0)
 
-	mov	PARAM_SRC1, %eax
-	mov	%ebx, SAVE_EBX
+	movl	PARAM_SRC1, %eax
+	movl	%ebx, SAVE_EBX
 
-	mov	PARAM_SRC2, %ebx
+	movl	PARAM_SRC2, %ebx
 	pxor	%mm0, %mm0		C initial carry
 
-	mov	PARAM_DST, %edx
+	movl	PARAM_DST, %edx
 
-	mov	PARAM_SIZE, %ecx
+	movl	PARAM_SIZE, %ecx
 
-	lea	(%edx,%ecx,4), %edx	C dst end
-	neg	%ecx			C -size
+	leal	(%edx,%ecx,4), %edx	C dst end
+	negl	%ecx			C -size
 
 L(top):
 	C eax	src1 end
@@ -83,24 +71,24 @@ L(top):
 	C edx	dst end
 	C mm0	carry
 
-	movd	(%ebx), %mm2
 	movd	(%eax), %mm1
+	movd	(%ebx), %mm2
 	psrlq	$32, %mm0
-	lea	4(%eax), %eax
-	lea	4(%ebx), %ebx
+	leal	4(%eax), %eax
+	leal	4(%ebx), %ebx
 
-	psllq	$1, %mm2
+	paddq	%mm2, %mm1
 	paddq	%mm2, %mm1
 
 	paddq	%mm1, %mm0
 
 	movd	%mm0, (%edx,%ecx,4)
-	add	$1, %ecx
+	addl	$1, %ecx
 	jnz	L(top)
 
 
 	psrlq	$32, %mm0
-	mov	SAVE_EBX, %ebx
+	movl	SAVE_EBX, %ebx
 	movd	%mm0, %eax
 	emms
 	ret
diff --git a/gmp/mpn/x86/pentium4/sse2/addmul_1.asm b/gmp/mpn/x86/pentium4/sse2/addmul_1.asm
index 78102072bf..3a8d0bb9bd 100644
--- a/gmp/mpn/x86/pentium4/sse2/addmul_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/addmul_1.asm
@@ -1,48 +1,37 @@
 dnl  mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
 
-dnl  Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
-
+dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
 
-C			    cycles/limb
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		5.24
-C P6 model 13  (Dothan)		5.24
-C P4 model 0-1 (Willamette)	5
-C P4 model 2   (Northwood)	5
-C P4 model 3-4 (Prescott)	5
-
 C TODO:
 C  * Tweak eax/edx offsets in loop as to save some lea's
 C  * Perhaps software pipeline small-case code
 
+C                           cycles/limb
+C P6 model 0-8,10-12)           -
+C P6 model 9   (Banias)         ?
+C P6 model 13  (Dothan)         5.24
+C P4 model 0-1 (Willamette):    5
+C P4 model 2   (Northwood):     5
+C P4 model 3-4 (Prescott):      5
+
 C INPUT PARAMETERS
 C rp		sp + 4
 C up		sp + 8
@@ -51,13 +40,22 @@ C v0		sp + 16
 
 	TEXT
 	ALIGN(16)
+PROLOGUE(mpn_addmul_1c)
+	mov	4(%esp), %edx
+	mov	8(%esp), %eax
+	mov	12(%esp), %ecx
+	movd	16(%esp), %mm7
+	movd	20(%esp), %mm6
+	jmp	L(ent)
+EPILOGUE()
+	ALIGN(16)
 PROLOGUE(mpn_addmul_1)
-	pxor	%mm6, %mm6
-L(ent):	mov	4(%esp), %edx
+	mov	4(%esp), %edx
 	mov	8(%esp), %eax
 	mov	12(%esp), %ecx
 	movd	16(%esp), %mm7
-	cmp	$4, %ecx
+	pxor	%mm6, %mm6
+L(ent):	cmp	$4, %ecx
 	jnc	L(big)
 
 L(lp0):	movd	(%eax), %mm0
@@ -183,7 +181,3 @@ L(end):	pmuludq	%mm7, %mm2
 	emms
 	ret
 EPILOGUE()
-PROLOGUE(mpn_addmul_1c)
-	movd	20(%esp), %mm6
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm
deleted file mode 100644
index 354300e4de..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,141 +0,0 @@
-dnl  Intel Atom  mpn_bdiv_dbm1.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C			    cycles/limb
-C			    cycles/limb
-C P5				 -
-C P6 model 0-8,10-12		 -
-C P6 model 9  (Banias)		 9.75
-C P6 model 13 (Dothan)
-C P4 model 0  (Willamette)
-C P4 model 1  (?)
-C P4 model 2  (Northwood)	 8.25
-C P4 model 3  (Prescott)
-C P4 model 4  (Nocona)
-C Intel Atom			 8
-C AMD K6			 -
-C AMD K7			 -
-C AMD K8
-C AMD K10
-
-C TODO: This code was optimised for atom-32, consider moving it back to atom
-C	dir(atom currently grabs this code), and write a 4-way version(7c/l).
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_MUL,  16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC,  8)
-defframe(PARAM_DST,  4)
-
-dnl  re-use parameter space
-define(SAVE_RP,`PARAM_MUL')
-define(SAVE_UP,`PARAM_SIZE')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`n',  `%ecx')
-define(`reg', `%edx')
-define(`cy', `%eax')	C contains the return value
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(mpn_bdiv_dbm1c)
-	mov	PARAM_SIZE, n		C size
-	mov	up, SAVE_UP
-	mov	PARAM_SRC, up
-	movd	PARAM_MUL, %mm7
-	mov	rp, SAVE_RP
-	mov	PARAM_DST, rp
-
-	movd	(up), %mm0
-	pmuludq	%mm7, %mm0
-	shr	n
-	mov	PARAM_CARRY, cy
-	jz	L(eq1)
-
-	movd	4(up), %mm1
-	jc	L(odd)
-
-	lea	4(up), up
-	pmuludq	%mm7, %mm1
-	movd	%mm0, reg
-	psrlq	$32, %mm0
-	sub	reg, cy
-	movd	%mm0, reg
-	movq	%mm1, %mm0
-	dec	n
-	mov	cy, (rp)
-	lea	4(rp), rp
-	jz	L(end)
-
-C	ALIGN(16)
-L(top):	movd	4(up), %mm1
-	sbb	reg, cy
-L(odd):	movd	%mm0, reg
-	psrlq	$32, %mm0
-	pmuludq	%mm7, %mm1
-	sub	reg, cy
-	lea	8(up), up
-	movd	%mm0, reg
-	movd	(up), %mm0
-	mov	cy, (rp)
-	sbb	reg, cy
-	movd	%mm1, reg
-	psrlq	$32, %mm1
-	sub	reg, cy
-	movd	%mm1, reg
-	pmuludq	%mm7, %mm0
-	dec	n
-	mov	cy, 4(rp)
-	lea	8(rp), rp
-	jnz	L(top)
-
-L(end):	sbb	reg, cy
-
-L(eq1):	movd	%mm0, reg
-	psrlq	$32, %mm0
-	mov	SAVE_UP, up
-	sub	reg, cy
-	movd	%mm0, reg
-	emms
-	mov	cy, (rp)
-	sbb	reg, cy
-
-	mov	SAVE_RP, rp
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm b/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm
deleted file mode 100644
index f7f461d56f..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm
+++ /dev/null
@@ -1,233 +0,0 @@
-dnl  Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
-
-dnl  Rearranged from mpn/x86/pentium4/sse2/dive_1.asm by Marco Bodrato.
-
-dnl  Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C P4: 19.0 cycles/limb
-
-C Pairs of movd's are used to avoid unaligned loads.  Despite the loads not
-C being on the dependent chain and there being plenty of cycles available,
-C using an unaligned movq on every second iteration measured about 23 c/l.
-C
-
-defframe(PARAM_SHIFT,  24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE,   12)
-defframe(PARAM_SRC,    8)
-defframe(PARAM_DST,    4)
-
-	TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C		    mp_limb_t inverse, int shift)
-	ALIGN(32)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_SIZE, %edx
-
-	movl	PARAM_SRC, %eax
-
-	movl	PARAM_DIVISOR, %ecx
-
-	movd	%ecx, %mm6
-	movl	PARAM_SHIFT, %ecx
-
-	movd	%ecx, %mm7		C shift
-
-	C
-
-	movl	PARAM_INVERSE, %ecx
-	movd	%ecx, %mm5		C inv
-
-	movl	PARAM_DST, %ecx
-	pxor	%mm1, %mm1		C initial carry limb
-	pxor	%mm0, %mm0		C initial carry bit
-
-	subl	$1, %edx
-	jz	L(done)
-
-	pcmpeqd	%mm4, %mm4
-	psrlq	$32, %mm4		C 0x00000000FFFFFFFF
-
-C The dependent chain here is as follows.
-C
-C					latency
-C	psubq	 s = (src-cbit) - climb	   2
-C	pmuludq	 q = s*inverse		   8
-C	pmuludq	 prod = q*divisor	   8
-C	psrlq	 climb = high(prod)	   2
-C					  --
-C					  20
-C
-C Yet the loop measures 19.0 c/l, so obviously there's something gained
-C there over a straight reading of the chip documentation.
-
-L(top):
-	C eax	src, incrementing
-	C ebx
-	C ecx	dst, incrementing
-	C edx	counter, size-1 iterations
-	C
-	C mm0	carry bit
-	C mm1	carry limb
-	C mm4	0x00000000FFFFFFFF
-	C mm5	inverse
-	C mm6	divisor
-	C mm7	shift
-
-	movd	(%eax), %mm2
-	movd	4(%eax), %mm3
-	addl	$4, %eax
-	punpckldq %mm3, %mm2
-
-	psrlq	%mm7, %mm2
-	pand	%mm4, %mm2		C src
-	psubq	%mm0, %mm2		C src - cbit
-
-	psubq	%mm1, %mm2		C src - cbit - climb
-	movq	%mm2, %mm0
-	psrlq	$63, %mm0		C new cbit
-
-	pmuludq	%mm5, %mm2		C s*inverse
-	movd	%mm2, (%ecx)		C q
-	addl	$4, %ecx
-
-	movq	%mm6, %mm1
-	pmuludq	%mm2, %mm1		C q*divisor
-	psrlq	$32, %mm1		C new climb
-
-L(entry):
-	subl	$1, %edx
-	jnz	L(top)
-
-L(done):
-	movd	(%eax), %mm2
-	psrlq	%mm7, %mm2		C src
-	psubq	%mm0, %mm2		C src - cbit
-
-	psubq	%mm1, %mm2		C src - cbit - climb
-
-	pmuludq	%mm5, %mm2		C s*inverse
-	movd	%mm2, (%ecx)		C q
-
-	emms
-	ret
-
-EPILOGUE()
-
-	ALIGN(16)
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C                           mp_limb_t divisor);
-C
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
-	movl	PARAM_SIZE, %edx
-
-	movl	PARAM_DIVISOR, %ecx
-
-	C eax	src
-	C ebx
-	C ecx	divisor
-	C edx	size-1
-
-	movl	%ecx, %eax
-	bsfl	%ecx, %ecx		C trailing twos
-
-	shrl	%cl, %eax		C d = divisor without twos
-	movd	%eax, %mm6
-	movd	%ecx, %mm7		C shift
-
-	shrl	%eax			C d/2
-
-	andl	$127, %eax		C d/2, 7 bits
-
-ifdef(`PIC',`
-	LEA(	binvert_limb_table, %ecx)
-	movzbl	(%eax,%ecx), %eax		C inv 8 bits
-',`
-	movzbl	binvert_limb_table(%eax), %eax	C inv 8 bits
-')
-
-	C
-
-	movd	%eax, %mm5		C inv
-
-	movd	%eax, %mm0		C inv
-
-	pmuludq	%mm5, %mm5		C inv*inv
-
-	C
-
-	pmuludq	%mm6, %mm5		C inv*inv*d
-	paddd	%mm0, %mm0		C 2*inv
-
-	C
-
-	psubd	%mm5, %mm0		C inv = 2*inv - inv*inv*d
-	pxor	%mm5, %mm5
-
-	paddd	%mm0, %mm5
-	pmuludq	%mm0, %mm0		C inv*inv
-
-	pcmpeqd	%mm4, %mm4
-	psrlq	$32, %mm4		C 0x00000000FFFFFFFF
-
-	C
-
-	pmuludq	%mm6, %mm0		C inv*inv*d
-	paddd	%mm5, %mm5		C 2*inv
-
-	movl	PARAM_SRC, %eax
-	movl	PARAM_DST, %ecx
-	pxor	%mm1, %mm1		C initial carry limb
-
-	C
-
-	psubd	%mm0, %mm5		C inv = 2*inv - inv*inv*d
-
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
-	pushl	%eax	FRAME_pushl()
-	movq	%mm6, %mm0
-	pmuludq	%mm5, %mm0
-	movd	%mm0, %eax
-	cmpl	$1, %eax
-	popl	%eax	FRAME_popl()')
-
-	pxor	%mm0, %mm0		C initial carry bit
-	jmp	L(entry)
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm b/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm
deleted file mode 100644
index b3f3474e67..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm
+++ /dev/null
@@ -1,95 +0,0 @@
-dnl  Intel Pentium-4 mpn_cnd_add_n -- mpn addition.
-
-dnl  Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C			    cycles/limb
-C P6 model 0-8,10-12		 -
-C P6 model 9   (Banias)		 ?
-C P6 model 13  (Dothan)		 4.67
-C P4 model 0-1 (Willamette)	 ?
-C P4 model 2   (Northwood)	 5
-C P4 model 3-4 (Prescott)	 5.25
-
-defframe(PARAM_SIZE, 20)
-defframe(PARAM_SRC2, 16)
-defframe(PARAM_SRC1, 12)
-defframe(PARAM_DST,  8)
-defframe(PARAM_CND,  4)
-
-dnl  re-use parameter space
-define(SAVE_EBX,`PARAM_SRC1')
-
-define(`cnd', `%mm3')
-
-	TEXT
-	ALIGN(8)
-
-	ALIGN(8)
-PROLOGUE(mpn_cnd_add_n)
-deflit(`FRAME',0)
-	pxor	%mm0, %mm0
-
-	mov	PARAM_CND, %eax
-	neg	%eax
-	sbb	%eax, %eax
-	movd	%eax, cnd
-
-	mov	PARAM_SRC1, %eax
-	mov	%ebx, SAVE_EBX
-	mov	PARAM_SRC2, %ebx
-	mov	PARAM_DST, %edx
-	mov	PARAM_SIZE, %ecx
-
-	lea	(%eax,%ecx,4), %eax	C src1 end
-	lea	(%ebx,%ecx,4), %ebx	C src2 end
-	lea	(%edx,%ecx,4), %edx	C dst end
-	neg	%ecx			C -size
-
-L(top):	movd	(%ebx,%ecx,4), %mm2
-	movd	(%eax,%ecx,4), %mm1
-	pand	cnd, %mm2
-	paddq	%mm2, %mm1
-
-	paddq	%mm1, %mm0
-	movd	%mm0, (%edx,%ecx,4)
-
-	psrlq	$32, %mm0
-
-	add	$1, %ecx
-	jnz	L(top)
-
-	movd	%mm0, %eax
-	mov	SAVE_EBX, %ebx
-	emms
-	ret
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm b/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm
deleted file mode 100644
index 339a23e0b6..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm
+++ /dev/null
@@ -1,114 +0,0 @@
-dnl  Intel Pentium-4 mpn_cnd_sub_n -- mpn subtraction.
-
-dnl  Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C			    cycles/limb
-C P6 model 0-8,10-12		 -
-C P6 model 9   (Banias)		 ?
-C P6 model 13  (Dothan)		 4.67
-C P4 model 0-1 (Willamette)	 ?
-C P4 model 2   (Northwood)	 5
-C P4 model 3-4 (Prescott)	 5.25
-
-defframe(PARAM_SIZE, 20)
-defframe(PARAM_SRC2, 16)
-defframe(PARAM_SRC1, 12)
-defframe(PARAM_DST,  8)
-defframe(PARAM_CND,  4)
-
-dnl  re-use parameter space
-define(SAVE_EBX,`PARAM_SRC1')
-
-define(`cnd', `%mm3')
-
-	TEXT
-	ALIGN(8)
-
-	ALIGN(8)
-PROLOGUE(mpn_cnd_sub_n)
-deflit(`FRAME',0)
-	pxor	%mm0, %mm0
-
-	mov	PARAM_CND, %eax
-	neg	%eax
-	sbb	%eax, %eax
-	movd	%eax, cnd
-
-	mov	PARAM_SRC1, %eax
-	mov	%ebx, SAVE_EBX
-	mov	PARAM_SRC2, %ebx
-	mov	PARAM_DST, %edx
-	mov	PARAM_SIZE, %ecx
-
-	lea	(%eax,%ecx,4), %eax	C src1 end
-	lea	(%ebx,%ecx,4), %ebx	C src2 end
-	lea	(%edx,%ecx,4), %edx	C dst end
-	neg	%ecx			C -size
-
-L(top):	movd	(%ebx,%ecx,4), %mm2
-	movd	(%eax,%ecx,4), %mm1
-	pand	cnd, %mm2
-	psubq	%mm2, %mm1
-
-	psubq	%mm0, %mm1
-	movd	%mm1, (%edx,%ecx,4)
-
-	psrlq	$63, %mm1
-
-	add	$1, %ecx
-	jz	L(done_mm1)
-
-	movd	(%ebx,%ecx,4), %mm2
-	movd	(%eax,%ecx,4), %mm0
-	pand	cnd, %mm2
-	psubq	%mm2, %mm0
-
-	psubq	%mm1, %mm0
-	movd	%mm0, (%edx,%ecx,4)
-
-	psrlq	$63, %mm0
-
-	add	$1, %ecx
-	jnz	L(top)
-
-	movd	%mm0, %eax
-	mov	SAVE_EBX, %ebx
-	emms
-	ret
-
-L(done_mm1):
-	movd	%mm1, %eax
-	mov	SAVE_EBX, %ebx
-	emms
-	ret
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/dive_1.asm b/gmp/mpn/x86/pentium4/sse2/dive_1.asm
index 238f0dd8a5..c50ef7d29e 100644
--- a/gmp/mpn/x86/pentium4/sse2/dive_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/dive_1.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
 
 dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -137,7 +126,7 @@ ifdef(`PIC',`
 
 	psubd	%mm0, %mm5		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax	FRAME_pushl()
 	movq	%mm6, %mm0
 	pmuludq	%mm5, %mm0
@@ -150,13 +139,13 @@ ifdef(`PIC',`
 
 C The dependent chain here is as follows.
 C
-C					latency
-C	psubq	 s = (src-cbit) - climb	   2
-C	pmuludq	 q = s*inverse		   8
-C	pmuludq	 prod = q*divisor	   8
-C	psrlq	 climb = high(prod)	   2
-C					  --
-C					  20
+C				        latency
+C	psubq	 s = (src-cbit) - climb    2
+C	pmuludq	 q = s*inverse             8
+C	pmuludq	 prod = q*divisor          8
+C	psrlq	 climb = high(prod)        2
+C	                                  --
+C	                                  20
 C
 C Yet the loop measures 19.0 c/l, so obviously there's something gained
 C there over a straight reading of the chip documentation.
diff --git a/gmp/mpn/x86/pentium4/sse2/divrem_1.asm b/gmp/mpn/x86/pentium4/sse2/divrem_1.asm
index 0146fab117..7f973dbf98 100644
--- a/gmp/mpn/x86/pentium4/sse2/divrem_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/divrem_1.asm
@@ -1,32 +1,22 @@
 dnl  Intel Pentium-4 mpn_divrem_1 -- mpn by limb division.
 
-dnl  Copyright 1999-2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h b/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h
index a94ae868b3..5071aae092 100644
--- a/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h
+++ b/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h
@@ -1,206 +1,68 @@
 /* Intel Pentium-4 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2005, 2007-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
+2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2600 MHz P4 Northwood */
-/* FFT tuning limit = 12500000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.2 */
-
-#define MOD_1_NORM_THRESHOLD                24
-#define MOD_1_UNNORM_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        13
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      2
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1N_PI1_METHOD                 2
-#define DIV_QR_1_NORM_THRESHOLD             19
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           20
-
-#define MUL_TOOM22_THRESHOLD                29
-#define MUL_TOOM33_THRESHOLD               113
-#define MUL_TOOM44_THRESHOLD               288
-#define MUL_TOOM6H_THRESHOLD               454
-#define MUL_TOOM8H_THRESHOLD               592
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     118
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     214
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     193
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     186
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     287
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 44
-#define SQR_TOOM3_THRESHOLD                173
-#define SQR_TOOM4_THRESHOLD                390
-#define SQR_TOOM6_THRESHOLD                  0  /* always */
-#define SQR_TOOM8_THRESHOLD                915
-
-#define MULMID_TOOM42_THRESHOLD             66
-
-#define MULMOD_BNM1_THRESHOLD               19
-#define SQRMOD_BNM1_THRESHOLD               23
-
-#define MUL_FFT_MODF_THRESHOLD            1147  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {   1147, 5}, {     36, 6}, {     19, 5}, {     39, 6}, \
-    {     27, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     35, 7}, {     19, 6}, {     40, 7}, {     21, 6}, \
-    {     43, 7}, {     23, 6}, {     49, 7}, {     27, 6}, \
-    {     55, 7}, {     31, 6}, {     63, 7}, {     35, 8}, \
-    {     19, 7}, {     43, 8}, {     23, 7}, {     51, 8}, \
-    {     27, 7}, {     55, 8}, {     31, 7}, {     63, 8}, \
-    {     39, 7}, {     79, 8}, {     43, 9}, {     23, 8}, \
-    {     55, 9}, {     31, 8}, {     71, 9}, {     39, 8}, \
-    {     79, 9}, {     47, 8}, {     95, 9}, {     55,10}, \
-    {     31, 9}, {     63, 8}, {    127, 9}, {     79,10}, \
-    {     47, 9}, {    111,11}, {     31,10}, {     63, 9}, \
-    {    143,10}, {     79, 9}, {    159,10}, {    111,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    159, 9}, \
-    {    319,11}, {     95,10}, {    207,12}, {     63,11}, \
-    {    127,10}, {    287,11}, {    159,10}, {    335,11}, \
-    {    191,10}, {    383,11}, {    223,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    319,10}, {    671,11}, \
-    {    351,12}, {    191,11}, {    383,10}, {    799,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1055, 9}, \
-    {   2111,10}, {   1119, 9}, {   2239,11}, {    607,12}, \
-    {    319,11}, {    671,10}, {   1407,11}, {    735,10}, \
-    {   1471, 9}, {   2943,12}, {    383,11}, {    799,10}, \
-    {   1599,11}, {    863,10}, {   1727, 9}, {   3455,12}, \
-    {    447,11}, {    895,13}, {    255,12}, {    511,11}, \
-    {   1055,10}, {   2111,11}, {   1119,10}, {   2239, 9}, \
-    {   4479,12}, {    575,11}, {   1247,10}, {   2495, 9}, \
-    {   4991,12}, {    639,11}, {   1471,10}, {   2943,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,10}, {   3455,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2047,12}, {   1087,11}, \
-    {   2239,10}, {   4479,12}, {   1215,11}, {   2495,10}, \
-    {   4991,13}, {    639,12}, {   1471,11}, {   2943,10}, \
-    {   5887,11}, {   3007,13}, {    767,12}, {   1727,11}, \
-    {   3455,13}, {    895,12}, {   1791,11}, {   3711,12}, \
-    {   1983,11}, {   3967,10}, {   7935,14}, {    511,13}, \
-    {   1023,12}, {   2239,11}, {   4479,13}, {   1151,12}, \
-    {   2495,11}, {   4991,13}, {   1279,12}, {   2623,13}, \
-    {   1407,12}, {   2943,11}, {   5887,12}, {   3007,14}, \
-    {  16384,15}, {  32768,16} }
-#define MUL_FFT_TABLE3_SIZE 158
-#define MUL_FFT_THRESHOLD                 7808
-
-#define SQR_FFT_MODF_THRESHOLD             896  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    896, 5}, {     28, 6}, {     15, 5}, {     33, 6}, \
-    {     17, 5}, {     35, 6}, {     19, 5}, {     39, 6}, \
-    {     27, 7}, {     15, 6}, {     33, 7}, {     17, 6}, \
-    {     36, 7}, {     19, 6}, {     39, 7}, {     23, 6}, \
-    {     47, 7}, {     27, 6}, {     55, 7}, {     31, 6}, \
-    {     63, 7}, {     37, 8}, {     19, 7}, {     43, 8}, \
-    {     23, 7}, {     51, 8}, {     27, 7}, {     55, 8}, \
-    {     31, 7}, {     63, 8}, {     39, 7}, {     79, 8}, \
-    {     43, 9}, {     23, 8}, {     55, 9}, {     31, 8}, \
-    {     71, 9}, {     39, 8}, {     79, 9}, {     47, 8}, \
-    {     95, 9}, {     55,10}, {     31, 9}, {     79,10}, \
-    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
-    {    127,10}, {     79, 9}, {    159,10}, {     95, 9}, \
-    {    191,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    159,11}, {     95,10}, {    191,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543,11}, {    159,10}, {    319, 9}, {    639,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    223,12}, \
-    {    127,11}, {    255,10}, {    511, 9}, {   1055,10}, \
-    {    543,11}, {    287,10}, {    607,11}, {    319,12}, \
-    {    191,11}, {    383,10}, {    767,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1055,11}, {    543,10}, \
-    {   1119, 9}, {   2239,11}, {    607,12}, {    319,11}, \
-    {    671,10}, {   1407,11}, {    735,10}, {   1471, 9}, \
-    {   2943,12}, {    383,11}, {    799,10}, {   1599,11}, \
-    {    863,10}, {   1727,12}, {    447,11}, {    991,13}, \
-    {    255,12}, {    511,11}, {   1055,10}, {   2111,11}, \
-    {   1119,10}, {   2239,12}, {    575,11}, {   1247,10}, \
-    {   2495,12}, {    639,11}, {   1471,10}, {   2943,13}, \
-    {    383,12}, {    767,11}, {   1599,12}, {    831,11}, \
-    {   1727,10}, {   3455,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2111,12}, {   1087,11}, \
-    {   2239,10}, {   4479,12}, {   1215,11}, {   2495,13}, \
-    {    639,12}, {   1471,11}, {   2943,10}, {   5887,13}, \
-    {    767,12}, {   1727,11}, {   3455,13}, {    895,12}, \
-    {   1791,11}, {   3711,12}, {   1983,11}, {   3967,10}, \
-    {   7935,14}, {    511,13}, {   1023,12}, {   2239,11}, \
-    {   4479,13}, {   1151,12}, {   2495,11}, {   4991,13}, \
-    {   1279,12}, {   2623,13}, {   1407,12}, {   2943,11}, \
-    {   5887,14}, {  16384,15}, {  32768,16} }
-#define SQR_FFT_TABLE3_SIZE 159
-#define SQR_FFT_THRESHOLD                 7296
-
-#define MULLO_BASECASE_THRESHOLD            12
-#define MULLO_DC_THRESHOLD                  55
-#define MULLO_MUL_N_THRESHOLD            14709
-
-#define DC_DIV_QR_THRESHOLD                 38
-#define DC_DIVAPPR_Q_THRESHOLD              77
-#define DC_BDIV_QR_THRESHOLD                51
-#define DC_BDIV_Q_THRESHOLD                 85
-
-#define INV_MULMOD_BNM1_THRESHOLD           56
-#define INV_NEWTON_THRESHOLD               121
-#define INV_APPR_THRESHOLD                  93
-
-#define BINV_NEWTON_THRESHOLD              366
-#define REDC_1_TO_REDC_N_THRESHOLD          64
-
-#define MU_DIV_QR_THRESHOLD               2350
-#define MU_DIVAPPR_Q_THRESHOLD            2172
-#define MUPI_DIV_QR_THRESHOLD               62
-#define MU_BDIV_QR_THRESHOLD              2172
-#define MU_BDIV_Q_THRESHOLD               2304
-
-#define POWM_SEC_TABLE  1,19,102,615,2111
-
-#define MATRIX22_STRASSEN_THRESHOLD         23
-#define HGCD_THRESHOLD                      88
-#define HGCD_APPR_THRESHOLD                 93
-#define HGCD_REDUCE_THRESHOLD             5010
-#define GCD_DC_THRESHOLD                   379
-#define GCDEXT_DC_THRESHOLD                258
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        26
-#define SET_STR_DC_THRESHOLD               147
-#define SET_STR_PRECOMPUTE_THRESHOLD       894
-
-#define FAC_DSC_THRESHOLD                  906
-#define FAC_ODD_THRESHOLD                   28
+/* 2600 MHz Pentium 4 model 2 */
+
+/* Generated by tuneup.c, 2009-01-06, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          31
+#define MUL_TOOM3_THRESHOLD             119
+#define MUL_TOOM44_THRESHOLD            178
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          49
+#define SQR_TOOM3_THRESHOLD             165
+#define SQR_TOOM4_THRESHOLD             252
+
+#define MULLOW_BASECASE_THRESHOLD        15
+#define MULLOW_DC_THRESHOLD              44
+#define MULLOW_MUL_N_THRESHOLD          363
+
+#define DIV_SB_PREINV_THRESHOLD       MP_SIZE_T_MAX  /* never */
+#define DIV_DC_THRESHOLD                 33
+#define POWM_THRESHOLD                   95
+
+#define MATRIX22_STRASSEN_THRESHOLD      23
+#define HGCD_THRESHOLD                   64
+#define GCD_DC_THRESHOLD                310
+#define GCDEXT_DC_THRESHOLD             310
+#define JACOBI_BASE_METHOD                1
+
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1  /* native */
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             11
+#define GET_STR_PRECOMPUTE_THRESHOLD     26
+#define SET_STR_DC_THRESHOLD            118
+#define SET_STR_PRECOMPUTE_THRESHOLD   1078
+
+#define MUL_FFT_TABLE  { 560, 928, 1920, 5632, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD          720
+#define MUL_FFT_THRESHOLD              9216
+
+#define SQR_FFT_TABLE  { 592, 928, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD          608
+#define SQR_FFT_THRESHOLD              5888
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1.asm b/gmp/mpn/x86/pentium4/sse2/mod_1.asm
new file mode 100644
index 0000000000..0e95f13913
--- /dev/null
+++ b/gmp/mpn/x86/pentium4/sse2/mod_1.asm
@@ -0,0 +1,391 @@
+dnl  Intel Pentium-4 mpn_mod_1 -- mpn by limb remainder.
+
+dnl  Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+dnl  P4: 31 cycles/limb.
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                       mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C                             mp_limb_t inverse);
+C
+C An idea was tried in the mul-by-inverse to process the last limb by a jump
+C back to the top of the loop skipping the -4(%esi) fetch.  But that seemed
+C to produce slightly strange timings, like 9 and 10 limb operations about
+C the same speed.  The jump would be successively taken and not-taken, which
+C in theory should predict ok, but perhaps isn't enjoyed by the chip.
+C Duplicating the loop for the last limb seems to be a couple of cycles
+C quicker too.
+C
+C Enhancements:
+C
+C The loop measures 31 cycles, but the dependent chain would suggest it
+C could be done with 30.  Not sure where to start looking for the extra
+C cycle.
+
+
+dnl  MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl  used, rather than plain "divl"s.  Minimum value 2.
+dnl
+dnl  The inverse takes about 80-90 cycles to calculate, but after that the
+dnl  multiply is 31 c/l versus division at about 58 c/l.
+
+deflit(MUL_THRESHOLD, 5)
+
+
+defframe(PARAM_INVERSE,16)  dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY,  16)  dnl mpn_mod_1c
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE,    8)
+defframe(PARAM_SRC,     4)
+
+dnl  re-use parameter space
+define(SAVE_ESI,`PARAM_SIZE')
+define(SAVE_EBP,`PARAM_SRC')
+
+	TEXT
+
+	ALIGN(16)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	movl	%esi, SAVE_ESI
+	movl	$32, %eax
+
+	movd	%eax, %mm6			C l = 0, so 32-l = 32
+	movl	PARAM_SRC, %esi
+	movl	%ebp, SAVE_EBP
+
+	movd	PARAM_DIVISOR, %mm5
+	pxor	%mm7, %mm7			C l = 0
+
+	movd	-4(%esi,%ecx,4), %mm0		C src high limb
+	leal	-8(%esi,%ecx,4), %esi		C &src[size-2]
+
+	movd	PARAM_INVERSE, %mm4
+	subl	$2, %ecx			C size-2
+
+	psubq	%mm5, %mm0			C high-divisor
+	movq	%mm0, %mm2
+
+	psrlq	$32, %mm0			C -1 if underflow
+
+	pand	%mm5, %mm0			C divisor if underflow
+
+	paddq	%mm2, %mm0			C addback if underflow
+	jz	L(inverse_last)			C if size==2
+	ja	L(inverse_top)			C if size>2
+
+
+	C if size==1
+	movl	SAVE_ESI, %esi
+	movd	%mm0, %eax
+	emms
+	ret
+
+EPILOGUE()
+
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+	movl	PARAM_SIZE, %ecx
+	movl	%esi, SAVE_ESI
+
+	movl	PARAM_SRC, %esi
+	movl	%ebp, SAVE_EBP
+
+	movl	PARAM_CARRY, %edx
+	orl	%ecx, %ecx
+	jz	L(divide_done)		C result==carry if size==0
+
+	movl	PARAM_DIVISOR, %ebp
+	jmp	L(start_1c)
+
+EPILOGUE()
+
+
+	ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+	movl	PARAM_SIZE, %ecx
+	movl	%esi, SAVE_ESI
+
+	movl	PARAM_SRC, %esi
+	movl	%ebp, SAVE_EBP
+
+	movl	PARAM_DIVISOR, %ebp
+	xorl	%edx, %edx		C result 0 if size==0
+
+	orl	%ecx, %ecx
+	jz	L(divide_done)
+	movl	-4(%esi,%ecx,4), %eax	C src high limb
+
+	leal	-1(%ecx), %edx
+	cmpl	%ebp, %eax		C c if high<divisor
+
+	cmovc(	%edx, %ecx)		C size-1 if high<divisor
+
+	movl	$0, %edx		C initial carry
+	cmovc(	%eax, %edx)		C src high limb if high<divisor
+
+	orl	%ecx, %ecx
+	jz	L(divide_done)		C if size==1 and skip div
+
+
+L(start_1c):
+	C eax
+	C ebx
+	C ecx	size
+	C edx	carry
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	leal	-4(%esi,%ecx,4), %esi	C &src[size-1]
+	cmpl	$MUL_THRESHOLD, %ecx
+	jae	L(mul_by_inverse)
+
+
+L(divide_top):
+	C eax
+	C ebx
+	C ecx	counter, limbs, decrementing
+	C edx	remainder
+	C esi	src, decrementing
+	C edi
+	C ebp	divisor
+
+	movl	(%esi), %eax
+	subl	$4, %esi
+
+	divl	%ebp
+
+	subl	$1, %ecx
+	jnz	L(divide_top)
+
+
+L(divide_done):
+	movl	SAVE_ESI, %esi
+	movl	SAVE_EBP, %ebp
+	movl	%edx, %eax
+	ret
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+	C eax
+	C ebx
+	C ecx	size
+	C edx	carry
+	C esi	src
+	C edi
+	C ebp	divisor
+
+	bsrl	%ebp, %eax		C 31-l
+
+	movd	%edx, %mm1		C carry
+	movl	%ecx, %edx		C size
+	movl	$31, %ecx
+
+	C
+
+	xorl	%eax, %ecx		C l = leading zeros on d
+	addl	$1, %eax		C 32-l
+
+	shll	%cl, %ebp		C normalize d
+	movd	%ecx, %mm7		C l
+	leal	-1(%edx), %ecx		C size-1
+
+	movd	%eax, %mm6		C 32-l
+	movl	$-1, %edx
+	movl	$-1, %eax
+
+	C
+
+	subl	%ebp, %edx		C (b-d)-1 so edx:eax = b*(b-d)-1
+
+	divl	%ebp			C floor (b*(b-d)-1 / d)
+
+	movd	%ebp, %mm5		C d
+	movd	(%esi), %mm0		C src high limb
+	punpckldq %mm1, %mm0
+	psrlq	%mm6, %mm0		C n2 = high (carry:srchigh << l)
+
+	C
+
+	movd	%eax, %mm4		C m
+
+
+C The dependent chain here consists of
+C
+C	2   paddd    n1+n2
+C	8   pmuludq  m*(n1+n2)
+C	2   paddq    n2:nadj + m*(n1+n2)
+C	2   psrlq    q1
+C	8   pmuludq  d*q1
+C	2   psubq    (n-d)-q1*d
+C	2   psrlq    high mask
+C	2   pand     d masked
+C	2   paddd    n2+d addback
+C	--
+C	30
+C
+C But it seems to run at 31 cycles, so presumably there's something else
+C going on.
+
+
+	ALIGN(16)
+L(inverse_top):
+	C eax
+	C ebx
+	C ecx	counter, size-1 to 1
+	C edx
+	C esi	src, decrementing
+	C edi
+	C ebp
+	C
+	C mm0	n2
+	C mm4	m
+	C mm5	d
+	C mm6	32-l
+	C mm7	l
+
+	ASSERT(b,`C n2<d
+	 movd	%mm0, %eax
+	 movd	%mm5, %edx
+	 cmpl	%edx, %eax')
+
+	movd	-4(%esi), %mm1		C next src limbs
+	movd	(%esi), %mm2
+	leal	-4(%esi), %esi
+
+	punpckldq %mm2, %mm1
+	psrlq	%mm6, %mm1		C n10
+
+	movq	%mm1, %mm2		C n10
+	movq	%mm1, %mm3		C n10
+	psrad	$31, %mm1		C -n1
+	pand	%mm5, %mm1		C -n1 & d
+	paddd	%mm2, %mm1		C nadj = n10+(-n1&d), ignore overflow
+
+	psrld	$31, %mm2		C n1
+	paddd	%mm0, %mm2		C n2+n1
+	punpckldq %mm0, %mm1		C n2:nadj
+
+	pmuludq	%mm4, %mm2		C m*(n2+n1)
+
+	paddq	%mm2, %mm1		C n2:nadj + m*(n2+n1)
+
+	psrlq	$32, %mm1		C q1 = high(n2:nadj + m*(n2+n1))
+
+	pmuludq	%mm5, %mm1		C q1*d
+	punpckldq %mm0, %mm3		C n
+	psubq	%mm5, %mm3		C n - d
+	pxor	%mm0, %mm0
+
+	psubq	%mm1, %mm3		C n - (q1+1)*d
+
+	por	%mm3, %mm0		C remainder -> n2
+	psrlq	$32, %mm3		C high n - (q1+1)*d, 0 or -1
+
+	ASSERT(be,`C 0 or -1
+	 movd	%mm3, %eax
+	 addl	$1, %eax
+	 cmpl	$1, %eax')
+
+	pand	%mm5, %mm3		C mask & d
+
+	paddd	%mm3, %mm0		C addback if necessary
+
+	subl	$1, %ecx
+	jnz	L(inverse_top)
+
+
+	C Least significant limb.
+	C Same code as the loop, but there's no -4(%esi) limb to fetch.
+
+L(inverse_last):
+	C eax
+	C ebx
+	C ecx
+	C edx
+	C esi	&src[0]
+	C
+	C mm0	n2
+	C mm4	m
+	C mm5	d
+	C mm6	32-l
+	C mm7	l
+
+	movd	(%esi), %mm1		C src[0]
+	psllq	%mm7, %mm1		C n10
+
+	movq	%mm1, %mm2		C n10
+	movq	%mm1, %mm3		C n10
+	psrad	$31, %mm1		C -n1
+	pand	%mm5, %mm1		C -n1 & d
+	paddd	%mm2, %mm1		C nadj = n10+(-n1&d), ignore overflow
+
+	psrld	$31, %mm2		C n1
+	paddd	%mm0, %mm2		C n2+n1
+	punpckldq %mm0, %mm1		C n2:nadj
+
+	pmuludq	%mm4, %mm2		C m*(n2+n1)
+
+	paddq	%mm2, %mm1		C n2:nadj + m*(n2+n1)
+
+	psrlq	$32, %mm1		C q1 = high(n2:nadj + m*(n2+n1))
+
+	pmuludq	%mm5, %mm1		C q1*d
+	punpckldq %mm0, %mm3		C n
+	psubq	%mm5, %mm3		C n - d
+	pxor	%mm0, %mm0
+
+	psubq	%mm1, %mm3		C n - (q1+1)*d
+
+	por	%mm3, %mm0		C remainder -> n2
+	psrlq	$32, %mm3		C high n - (q1+1)*d, 0 or -1
+
+	ASSERT(be,`C 0 or -1
+	 movd	%mm3, %eax
+	 addl	$1, %eax
+	 cmpl	$1, %eax')
+
+	movl	SAVE_EBP, %ebp
+	pand	%mm5, %mm3		C mask & d
+
+	movl	SAVE_ESI, %esi
+	paddd	%mm3, %mm0		C addback if necessary
+
+	psrld	%mm7, %mm0
+
+	movd	%mm0, %eax
+
+	emms
+	ret
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm b/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm
deleted file mode 100644
index ee88babeee..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm
+++ /dev/null
@@ -1,166 +0,0 @@
-dnl  x86-32 mpn_mod_1_1p for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO:
-C  * Optimize.  The present code was written quite straightforwardly.
-C  * Optimize post-loop reduction code; it is from mod_1s_4p, thus overkill.
-C  * Write a cps function that uses sse2 insns.
-
-C                           cycles/limb
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		?
-C P6 model 13  (Dothan)		?
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)     16
-C P4 model 3-4 (Prescott)      18
-
-C INPUT PARAMETERS
-C ap		sp + 4
-C n		sp + 8
-C b		sp + 12
-C cps		sp + 16
-
-define(`B1modb', `%mm1')
-define(`B2modb', `%mm2')
-define(`ap',     `%edx')
-define(`n',      `%eax')
-
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mod_1_1p)
-	push	%ebx
-	mov	8(%esp), ap
-	mov	12(%esp), n
-	mov	20(%esp), %ecx
-	movd	8(%ecx), B1modb
-	movd	12(%ecx), B2modb
-
-	lea	-4(ap,n,4), ap
-
-C FIXME: See comment in generic/mod_1_1.c.
-	movd	(ap), %mm7
-	movd	-4(ap), %mm4
-	pmuludq B1modb, %mm7
-	paddq	%mm4, %mm7
-	add	$-2, n
-	jz	L(end)
-
-	ALIGN(8)
-L(top):	movq	%mm7, %mm6
-	psrlq	$32, %mm7		C rh
-	movd	-8(ap), %mm0
-	add	$-4, ap
-	pmuludq	B2modb, %mm7
-	pmuludq	B1modb, %mm6
-	add	$-1, n
-	paddq	%mm0, %mm7
-	paddq	%mm6, %mm7
-	jnz	L(top)
-
-L(end):	pcmpeqd	%mm4, %mm4
-	psrlq	$32, %mm4		C 0x00000000FFFFFFFF
-	pand	%mm7, %mm4		C rl
-	psrlq	$32, %mm7		C rh
-	pmuludq	B1modb, %mm7		C rh,cl
-	paddq	%mm4, %mm7		C rh,rl
-	movd	4(%ecx), %mm4		C cnt
-	psllq	%mm4, %mm7		C rh,rl normalized
-	movq	%mm7, %mm2		C rl in low half
-	psrlq	$32, %mm7		C rh
-	movd	(%ecx), %mm1		C bi
-	pmuludq	%mm7, %mm1		C qh,ql
-	paddq	%mm2, %mm1		C qh-1,ql
-	movd	%mm1, %ecx		C ql
-	psrlq	$32, %mm1		C qh-1
-	movd	16(%esp), %mm3		C b
-	pmuludq	%mm1, %mm3		C (qh-1) * b
-	psubq	%mm3, %mm2		C r in low half (could use psubd)
-	movd	%mm2, %eax		C r
-	mov	16(%esp), %ebx
-	sub	%ebx, %eax		C r
-	cmp	%eax, %ecx
-	lea	(%eax,%ebx), %edx
-	cmovc(	%edx, %eax)
-	movd	%mm4, %ecx		C cnt
-	cmp	%ebx, %eax
-	jae	L(fix)
-	emms
-	pop	%ebx
-	shr	%cl, %eax
-	ret
-
-L(fix):	sub	%ebx, %eax
-	emms
-	pop	%ebx
-	shr	%cl, %eax
-	ret
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps)
-C CAUTION: This is the same code as in k7/mod_1_1.asm
-	push	%ebp
-	mov	12(%esp), %ebp
-	push	%esi
-	bsr	%ebp, %ecx
-	push	%ebx
-	xor	$31, %ecx
-	mov	16(%esp), %esi
-	sal	%cl, %ebp
-	mov	%ebp, %edx
-	not	%edx
-	mov	$-1, %eax
-	div	%ebp
-	mov	%eax, (%esi)		C store bi
-	mov	%ecx, 4(%esi)		C store cnt
-	xor	%ebx, %ebx
-	sub	%ebp, %ebx
-	mov	$1, %edx
-	shld	%cl, %eax, %edx
-	imul	%edx, %ebx
-	mul	%ebx
-	add	%ebx, %edx
-	not	%edx
-	imul	%ebp, %edx
-	add	%edx, %ebp
-	cmp	%edx, %eax
-	cmovc(	%ebp, %edx)
-	shr	%cl, %ebx
-	mov	%ebx, 8(%esi)		C store B1modb
-	shr	%cl, %edx
-	mov	%edx, 12(%esi)		C store B2modb
-	pop	%ebx
-	pop	%esi
-	pop	%ebp
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm b/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm
deleted file mode 100644
index eb2edb6297..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm
+++ /dev/null
@@ -1,269 +0,0 @@
-dnl  x86-32 mpn_mod_1s_4p for Pentium 4 and P6 models with SSE2 (i.e. 9,D,E,F).
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO:
-C  * Optimize.  The present code was written quite straightforwardly.
-C  * Optimize post-loop reduction code.
-C  * Write a cps function that uses sse2 insns.
-
-C			    cycles/limb
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		?
-C P6 model 13  (Dothan)		3.4
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)	4
-C P4 model 3-4 (Prescott)	4.5
-
-C INPUT PARAMETERS
-C ap		sp + 4
-C n		sp + 8
-C b		sp + 12
-C cps		sp + 16
-
-define(`B1modb', `%mm1')
-define(`B2modb', `%mm2')
-define(`B3modb', `%mm3')
-define(`B4modb', `%mm4')
-define(`B5modb', `%mm5')
-define(`ap',     `%edx')
-define(`n',      `%eax')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p)
-	push	%ebx
-	mov	8(%esp), ap
-	mov	12(%esp), n
-	mov	20(%esp), %ecx
-
-	movd	8(%ecx), B1modb
-	movd	12(%ecx), B2modb
-	movd	16(%ecx), B3modb
-	movd	20(%ecx), B4modb
-	movd	24(%ecx), B5modb
-
-	mov	n, %ebx
-	lea	-4(ap,n,4), ap
-	and	$3, %ebx
-	je	L(b0)
-	cmp	$2, %ebx
-	jc	L(b1)
-	je	L(b2)
-
-L(b3):	movd	-4(ap), %mm7
-	pmuludq	B1modb, %mm7
-	movd	-8(ap), %mm6
-	paddq	%mm6, %mm7
-	movd	(ap), %mm6
-	pmuludq	B2modb, %mm6
-	paddq	%mm6, %mm7
-	lea	-24(ap), ap
-	add	$-3, n
-	jz	L(end)
-	jmp	L(top)
-
-L(b0):	movd	-8(ap), %mm7
-	pmuludq	B1modb, %mm7
-	movd	-12(ap), %mm6
-	paddq	%mm6, %mm7
-	movd	-4(ap), %mm6
-	pmuludq	B2modb, %mm6
-	paddq	%mm6, %mm7
-	movd	(ap), %mm6
-	pmuludq	B3modb, %mm6
-	paddq	%mm6, %mm7
-	lea	-28(ap), ap
-	add	$-4, n
-	jz	L(end)
-	jmp	L(top)
-
-L(b1):	movd	(ap), %mm7
-	lea	-16(ap), ap
-	dec	n
-	jz	L(x)
-	jmp	L(top)
-
-L(b2):	movd	-4(ap), %mm7		C rl
-	punpckldq (ap), %mm7		C rh
-	lea	-20(ap), ap
-	add	$-2, n
-	jz	L(end)
-
-	ALIGN(8)
-L(top):	movd	4(ap), %mm0
-	pmuludq	B1modb, %mm0
-	movd	0(ap), %mm6
-	paddq	%mm6, %mm0
-
-	movd	8(ap), %mm6
-	pmuludq	B2modb, %mm6
-	paddq	%mm6, %mm0
-
-	movd	12(ap), %mm6
-	pmuludq	B3modb, %mm6
-	paddq	%mm6, %mm0
-
-	movq	%mm7, %mm6
-	psrlq	$32, %mm7		C rh
-	pmuludq	B5modb, %mm7
-	pmuludq	B4modb, %mm6
-
-	paddq	%mm0, %mm7
-	paddq	%mm6, %mm7
-
-	add	$-16, ap
-	add	$-4, n
-	jnz	L(top)
-
-L(end):	pcmpeqd	%mm4, %mm4
-	psrlq	$32, %mm4		C 0x00000000FFFFFFFF
-	pand	%mm7, %mm4		C rl
-	psrlq	$32, %mm7		C rh
-	pmuludq	B1modb, %mm7		C rh,cl
-	paddq	%mm4, %mm7		C rh,rl
-L(x):	movd	4(%ecx), %mm4		C cnt
-	psllq	%mm4, %mm7		C rh,rl normalized
-	movq	%mm7, %mm2		C rl in low half
-	psrlq	$32, %mm7		C rh
-	movd	(%ecx), %mm1		C bi
-	pmuludq	%mm7, %mm1		C qh,ql
-	paddq	%mm2, %mm1		C qh-1,ql
-	movd	%mm1, %ecx		C ql
-	psrlq	$32, %mm1		C qh-1
-	movd	16(%esp), %mm3		C b
-	pmuludq	%mm1, %mm3		C (qh-1) * b
-	psubq	%mm3, %mm2		C r in low half (could use psubd)
-	movd	%mm2, %eax		C r
-	mov	16(%esp), %ebx
-	sub	%ebx, %eax		C r
-	cmp	%eax, %ecx
-	lea	(%eax,%ebx), %edx
-	cmovc(	%edx, %eax)
-	movd	%mm4, %ecx		C cnt
-	cmp	%ebx, %eax
-	jae	L(fix)
-	emms
-	pop	%ebx
-	shr	%cl, %eax
-	ret
-
-L(fix):	sub	%ebx, %eax
-	emms
-	pop	%ebx
-	shr	%cl, %eax
-	ret
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p_cps)
-C CAUTION: This is the same code as in k7/mod_1_4.asm
-	push	%ebp
-	push	%edi
-	push	%esi
-	push	%ebx
-	mov	20(%esp), %ebp		C FIXME: avoid bp for 0-idx
-	mov	24(%esp), %ebx
-	bsr	%ebx, %ecx
-	xor	$31, %ecx
-	sal	%cl, %ebx		C b << cnt
-	mov	%ebx, %edx
-	not	%edx
-	mov	$-1, %eax
-	div	%ebx
-	xor	%edi, %edi
-	sub	%ebx, %edi
-	mov	$1, %esi
-	mov	%eax, (%ebp)		C store bi
-	mov	%ecx, 4(%ebp)		C store cnt
-	shld	%cl, %eax, %esi
-	imul	%edi, %esi
-	mov	%eax, %edi
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 8(%ebp)		C store B1modb
-
-	not	%edx
-	imul	%ebx, %edx
-	lea	(%edx,%ebx), %esi
-	cmp	%edx, %eax
-	cmovnc(	%edx, %esi)
-	mov	%edi, %eax
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 12(%ebp)		C store B2modb
-
-	not	%edx
-	imul	%ebx, %edx
-	lea	(%edx,%ebx), %esi
-	cmp	%edx, %eax
-	cmovnc(	%edx, %esi)
-	mov	%edi, %eax
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 16(%ebp)		C store B3modb
-
-	not	%edx
-	imul	%ebx, %edx
-	lea	(%edx,%ebx), %esi
-	cmp	%edx, %eax
-	cmovnc(	%edx, %esi)
-	mov	%edi, %eax
-	mul	%esi
-
-	add	%esi, %edx
-	shr	%cl, %esi
-	mov	%esi, 20(%ebp)		C store B4modb
-
-	not	%edx
-	imul	%ebx, %edx
-	add	%edx, %ebx
-	cmp	%edx, %eax
-	cmovnc(	%edx, %ebx)
-
-	shr	%cl, %ebx
-	mov	%ebx, 24(%ebp)		C store B5modb
-
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	pop	%ebp
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm
index 31e25b79bc..1598b41785 100644
--- a/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium 4 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
 
-dnl  Copyright 2000-2003 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/sse2/mode1o.asm b/gmp/mpn/x86/pentium4/sse2/mode1o.asm
index 778c478169..2f0b177a00 100644
--- a/gmp/mpn/x86/pentium4/sse2/mode1o.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mode1o.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder.
 
 dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -112,7 +101,7 @@ ifdef(`PIC',`
 
 	psubd	%mm0, %mm6		C inv = 2*inv - inv*inv*d
 
-	ASSERT(e,`	C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+	ASSERT(e,`	C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
 	pushl	%eax	FRAME_pushl()
 	movd	%mm6, %eax
 	imul	PARAM_DIVISOR, %eax
@@ -124,13 +113,13 @@ ifdef(`PIC',`
 
 C The dependent chain here is as follows.
 C
-C					latency
-C	psubq	 s = (src-cbit) - climb	   2
-C	pmuludq	 q = s*inverse		   8
-C	pmuludq	 prod = q*divisor	   8
-C	psrlq	 climb = high(prod)	   2
-C					  --
-C					  20
+C				        latency
+C	psubq	 s = (src-cbit) - climb    2
+C	pmuludq	 q = s*inverse             8
+C	pmuludq	 prod = q*divisor          8
+C	psrlq	 climb = high(prod)        2
+C	                                  --
+C	                                  20
 C
 C Yet the loop measures 19.0 c/l, so obviously there's something gained
 C there over a straight reading of the chip documentation.
diff --git a/gmp/mpn/x86/pentium4/sse2/mul_1.asm b/gmp/mpn/x86/pentium4/sse2/mul_1.asm
index 6347b8bf62..07be951921 100644
--- a/gmp/mpn/x86/pentium4/sse2/mul_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mul_1.asm
@@ -1,48 +1,37 @@
 dnl  mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
 
-dnl  Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
-
+dnl  Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
 
-C                           cycles/limb
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		4.17
-C P6 model 13  (Dothan)		4.17
-C P4 model 0-1 (Willamette)	4
-C P4 model 2   (Northwood)	4
-C P4 model 3-4 (Prescott)	4.55
-
 C TODO:
 C  * Tweak eax/edx offsets in loop as to save some lea's
 C  * Perhaps software pipeline small-case code
 
+C                           cycles/limb
+C P6 model 0-8,10-12)           -
+C P6 model 9   (Banias)		?
+C P6 model 13  (Dothan)         4.17
+C P4 model 0-1 (Willamette):	4
+C P4 model 2   (Northwood):     4
+C P4 model 3-4 (Prescott):      4.55
+
 C INPUT PARAMETERS
 C rp		sp + 4
 C up		sp + 8
@@ -51,13 +40,22 @@ C v0		sp + 16
 
 	TEXT
 	ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+	mov	4(%esp), %edx
+	mov	8(%esp), %eax
+	mov	12(%esp), %ecx
+	movd	16(%esp), %mm7
+	movd	20(%esp), %mm6
+	jmp	L(ent)
+EPILOGUE()
+	ALIGN(16)
 PROLOGUE(mpn_mul_1)
-	pxor	%mm6, %mm6
-L(ent):	mov	4(%esp), %edx
+	mov	4(%esp), %edx
 	mov	8(%esp), %eax
 	mov	12(%esp), %ecx
 	movd	16(%esp), %mm7
-	cmp	$4, %ecx
+	pxor	%mm6, %mm6
+L(ent):	cmp	$4, %ecx
 	jnc	L(big)
 
 L(lp0):	movd	(%eax), %mm0
@@ -158,7 +156,3 @@ L(end):	pmuludq	%mm7, %mm2
 	emms
 	ret
 EPILOGUE()
-PROLOGUE(mpn_mul_1c)
-	movd	20(%esp), %mm6
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm b/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm
index 6e3775ae09..2628e5eb72 100644
--- a/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm
@@ -1,32 +1,21 @@
 dnl  mpn_mul_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
 
 dnl  Copyright 2001, 2002, 2005, 2007 Free Software Foundation, Inc.
-
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/sse2/popcount.asm b/gmp/mpn/x86/pentium4/sse2/popcount.asm
index b8238b9b66..cb982ade46 100644
--- a/gmp/mpn/x86/pentium4/sse2/popcount.asm
+++ b/gmp/mpn/x86/pentium4/sse2/popcount.asm
@@ -1,66 +1,52 @@
 dnl  X86-32 and X86-64 mpn_popcount using SSE2.
 
-dnl  Copyright 2006, 2007, 2011 Free Software Foundation, Inc.
-
+dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
 
 
-C 32-bit		     popcount	     hamdist
-C			    cycles/limb	    cycles/limb
-C P5				-
-C P6 model 0-8,10-12		-
-C P6 model 9  (Banias)		?
-C P6 model 13 (Dothan)		4
-C P4 model 0  (Willamette)	?
-C P4 model 1  (?)		?
-C P4 model 2  (Northwood)	3.9
-C P4 model 3  (Prescott)	?
-C P4 model 4  (Nocona)		?
-C AMD K6			-
-C AMD K7			-
-C AMD K8			?
-
-C 64-bit		     popcount	     hamdist
-C			    cycles/limb	    cycles/limb
-C P4 model 4 (Nocona):		8
-C AMD K8,K9			7.5
-C AMD K10			3.5
-C Intel core2			3.68
-C Intel corei			3.15
-C Intel atom		       10.8
-C VIA nano			6.5
+C 32-bit                     popcount        hamdist
+C                           cycles/limb     cycles/limb
+C P5:                           -
+C P6 model 0-8,10-12)           -
+C P6 model 9  (Banias)          ?
+C P6 model 13 (Dothan)          4
+C P4 model 0  (Willamette)      ?
+C P4 model 1  (?)               ?
+C P4 model 2  (Northwood)       3.9
+C P4 model 3  (Prescott)        ?
+C P4 model 4  (Nocona)          ?
+C K6:                           -
+C K7:                           -
+C K8:                           ?
+
+C 64-bit                     popcount        hamdist
+C                           cycles/limb     cycles/limb
+C P4 model 4 (Nocona):          8
+C K8:                           7.5
+C K10:				3.5
+C P6-15:			3.68
 
 C TODO
 C  * Make a mpn_hamdist based on this.  Alignment could either be handled by
 C    using movdqu for one operand and movdqa for the other, or by painfully
-C    shifting as we go.  Unfortunately, there seem to be no usable shift
+C    shifting as we go.  Unfortunately, there seem to be no useable shift
 C    instruction, except for one that takes an immediate count.
 C  * It would probably be possible to cut a few cycles/limb using software
 C    pipelining.
diff --git a/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm b/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm
index f421d1323e..bbf43245cb 100644
--- a/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm
@@ -1,32 +1,21 @@
 dnl  Intel Pentium-4 mpn_rsh1add_n -- mpn (x+y)/2
 
-dnl  Copyright 2001-2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm b/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm
index 2dd57d25d9..fc56f164ed 100644
--- a/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm
+++ b/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm
@@ -1,32 +1,21 @@
 dnl  mpn_sqr_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
 
 dnl  Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -41,7 +30,7 @@ C  * Look into different loop alignment, we now expand the code about 50 bytes
 C    with possibly needless alignment.
 C  * Use OSP, should solve feed-in latency problems.
 C  * Address relative slowness for un<=3 for Pentium M.  The old code is there
-C    considerably faster.  (1:20/14, 2:34:32, 3:66/57)
+C    consideraly faster.  (1:20/14, 2:34:32, 3:66/57)
 
 C INPUT PARAMETERS
 C rp		sp + 4
diff --git a/gmp/mpn/x86/pentium4/sse2/sub_n.asm b/gmp/mpn/x86/pentium4/sse2/sub_n.asm
index 5ba1c018ec..02d5f01474 100644
--- a/gmp/mpn/x86/pentium4/sse2/sub_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/sub_n.asm
@@ -1,44 +1,37 @@
 dnl  Intel Pentium-4 mpn_sub_n -- mpn subtraction.
 
 dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C					cycles/limb
-C			     dst!=src1,2  dst==src1  dst==src2
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		?
-C P6 model 13  (Dothan)		?
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)	4	     6		6
-C P4 model 3-4 (Prescott)	4.25	     7.5	7.5
+C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C			    6.0 cycles/limb if dst==src1 or dst==src2
+C P4 Prescott:		    >= 5 cycles/limb
+
+
+C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                      mp_size_t size);
+C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                       mp_size_t size, mp_limb_t carry);
+C
+C The main loop code is 2x unrolled so that the carry bit can alternate
+C between mm0 and mm1.
 
 defframe(PARAM_CARRY,20)
 defframe(PARAM_SIZE, 16)
@@ -54,8 +47,10 @@ define(SAVE_EBX,`PARAM_SRC1')
 
 PROLOGUE(mpn_sub_nc)
 deflit(`FRAME',0)
+
 	movd	PARAM_CARRY, %mm0
 	jmp	L(start_nc)
+
 EPILOGUE()
 
 	ALIGN(8)
@@ -63,16 +58,16 @@ PROLOGUE(mpn_sub_n)
 deflit(`FRAME',0)
 	pxor	%mm0, %mm0
 L(start_nc):
-	mov	PARAM_SRC1, %eax
-	mov	%ebx, SAVE_EBX
-	mov	PARAM_SRC2, %ebx
-	mov	PARAM_DST, %edx
-	mov	PARAM_SIZE, %ecx
+	movl	PARAM_SRC1, %eax
+	movl	%ebx, SAVE_EBX
+	movl	PARAM_SRC2, %ebx
+	movl	PARAM_DST, %edx
+	movl	PARAM_SIZE, %ecx
 
-	lea	(%eax,%ecx,4), %eax	C src1 end
-	lea	(%ebx,%ecx,4), %ebx	C src2 end
-	lea	(%edx,%ecx,4), %edx	C dst end
-	neg	%ecx			C -size
+	leal	(%eax,%ecx,4), %eax	C src1 end
+	leal	(%ebx,%ecx,4), %ebx	C src2 end
+	leal	(%edx,%ecx,4), %edx	C dst end
+	negl	%ecx			C -size
 
 L(top):
 	C eax	src1 end
@@ -90,7 +85,7 @@ L(top):
 
 	psrlq	$63, %mm1
 
-	add	$1, %ecx
+	addl	$1, %ecx
 	jz	L(done_mm1)
 
 	movd	(%eax,%ecx,4), %mm0
@@ -102,17 +97,18 @@ L(top):
 
 	psrlq	$63, %mm0
 
-	add	$1, %ecx
+	addl	$1, %ecx
 	jnz	L(top)
 
+
 	movd	%mm0, %eax
-	mov	SAVE_EBX, %ebx
+	movl	SAVE_EBX, %ebx
 	emms
 	ret
 
 L(done_mm1):
 	movd	%mm1, %eax
-	mov	SAVE_EBX, %ebx
+	movl	SAVE_EBX, %ebx
 	emms
 	ret
 
diff --git a/gmp/mpn/x86/pentium4/sse2/submul_1.asm b/gmp/mpn/x86/pentium4/sse2/submul_1.asm
index 020675bd7b..ceb41f2ac0 100644
--- a/gmp/mpn/x86/pentium4/sse2/submul_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/submul_1.asm
@@ -1,71 +1,60 @@
 dnl  Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
 dnl  subtract the result from a second limb vector.
 
-dnl  Copyright 2001, 2002, 2008, 2010 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C			    cycles/limb
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		6.8
-C P6 model 13  (Dothan)		6.9
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)	5.87
-C P4 model 3-4 (Prescott)	6.5
+C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C     (stepping 10).
 
-C This code represents a step forwards compared to the code available before
-C GMP 5.1, but it is not carefully tuned for either P6 or P4.  In fact, it is
-C not good for P6.  For P4 it saved a bit over 1 c/l for both Northwood and
-C Prescott compared to the old code.
+
+C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                         mp_limb_t mult);
+C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C                          mp_limb_t mult, mp_limb_t carry);
+C
+C This code is not particularly good at 7 c/l.  The dependent chain is only
+C 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that
+C speed isn't achieved.
 C
 C The arrangements made here to get a two instruction dependent chain are
-C slightly subtle.  In the loop the carry (or borrow rather) is a negative so
-C that a paddq can be used to give a low limb ready to store, and a high limb
-C ready to become the new carry after a psrlq.
+C slightly subtle.  In the loop the carry (or borrow rather) is a negative
+C so that a paddq can be used to give a low limb ready to store, and a high
+C limb ready to become the new carry after a psrlq.
 C
-C If the carry was a simple twos complement negative then the psrlq shift would
-C need to bring in 0 bits or 1 bits according to whether the high was zero or
-C non-zero, since a non-zero value would represent a negative needing sign
-C extension.  That wouldn't be particularly easy to arrange and certainly would
-C add an instruction to the dependent chain, so instead an offset is applied so
-C that the high limb will be 0xFFFFFFFF+c.  With c in the range -0xFFFFFFFF to
-C 0, the value 0xFFFFFFFF+c is in the range 0 to 0xFFFFFFFF and is therefore
-C always positive and can always have 0 bits shifted in, which is what psrlq
-C does.
+C If the carry was a simple twos complement negative then the psrlq shift
+C would need to bring in 0 bits or 1 bits according to whether the high was
+C zero or non-zero, since a non-zero value would represent a negative
+C needing sign extension.  That wouldn't be particularly easy to arrange and
+C certainly would add an instruction to the dependent chain, so instead an
+C offset is applied so that the high limb will be 0xFFFFFFFF+c.  With c in
+C the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to
+C 0xFFFFFFFF and is therefore always positive and can always have 0 bits
+C shifted in, which is what psrlq does.
 C
 C The extra 0xFFFFFFFF must be subtracted before c is used, but that can be
 C done off the dependent chain.  The total adjustment then is to add
-C 0xFFFFFFFF00000000 to offset the new carry, and subtract 0x00000000FFFFFFFF
-C to remove the offset from the current carry, for a net add of
-C 0xFFFFFFFE00000001.  In the code this is applied to the destination limb when
-C fetched.
+C 0xFFFFFFFF00000000 to offset the new carry, and subtract
+C 0x00000000FFFFFFFF to remove the offset from the current carry, for a net
+C add of 0xFFFFFFFE00000001.  In the code this is applied to the destination
+C limb when fetched.
 C
 C It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement
 C negative, which is how it's undone for the return value, but that doesn't
@@ -91,16 +80,16 @@ deflit(`FRAME',0)
 	pxor	%mm1, %mm1		C initial borrow
 
 L(start_1c):
-	mov	PARAM_SRC, %eax
+	movl	PARAM_SRC, %eax
 	pcmpeqd	%mm0, %mm0
 
 	movd	PARAM_MULTIPLIER, %mm7
 	pcmpeqd	%mm6, %mm6
 
-	mov	PARAM_DST, %edx
+	movl	PARAM_DST, %edx
 	psrlq	$32, %mm0		C 0x00000000FFFFFFFF
 
-	mov	PARAM_SIZE, %ecx
+	movl	PARAM_SIZE, %ecx
 	psllq	$32, %mm6		C 0xFFFFFFFF00000000
 
 	psubq	%mm0, %mm6		C 0xFFFFFFFE00000001
@@ -108,75 +97,32 @@ L(start_1c):
 	psubq	%mm1, %mm0		C 0xFFFFFFFF - borrow
 
 
-	movd	(%eax), %mm3		C up
-	movd	(%edx), %mm4		C rp
-
-	add	$-1, %ecx
-	paddq	%mm6, %mm4		C add 0xFFFFFFFE00000001
-	pmuludq	%mm7, %mm3
-	jnz	L(gt1)
-	psubq	%mm3, %mm4		C prod
-	paddq	%mm4, %mm0		C borrow
-	movd	%mm0, (%edx)		C result
-	jmp	L(rt)
-
-L(gt1):	movd	4(%eax), %mm1		C up
-	movd	4(%edx), %mm2		C rp
-
-	add	$-1, %ecx
-	jz	L(eev)
-
-	ALIGN(16)
-L(top):	paddq	%mm6, %mm2		C add 0xFFFFFFFE00000001
+	C eax	src, incrementing
+	C ebx
+	C ecx	loop counter, decrementing
+	C edx	dst, incrementing
+	C
+	C mm0	0xFFFFFFFF - borrow
+	C mm6	0xFFFFFFFE00000001
+	C mm7	multiplier
+
+L(loop):
+	movd	(%eax), %mm1		C src
+	leal	4(%eax), %eax
+	movd	(%edx), %mm2		C dst
+	paddq	%mm6, %mm2		C add 0xFFFFFFFE00000001
 	pmuludq	%mm7, %mm1
-	psubq	%mm3, %mm4		C prod
-	movd	8(%eax), %mm3		C up
-	paddq	%mm4, %mm0		C borrow
-	movd	8(%edx), %mm4		C rp
-	movd	%mm0, (%edx)		C result
-	psrlq	$32, %mm0
-
-	add	$-1, %ecx
-	jz	L(eod)
-
-	paddq	%mm6, %mm4		C add 0xFFFFFFFE00000001
-	pmuludq	%mm7, %mm3
 	psubq	%mm1, %mm2		C prod
-	movd	12(%eax), %mm1		C up
 	paddq	%mm2, %mm0		C borrow
-	movd	12(%edx), %mm2		C rp
-	movd	%mm0, 4(%edx)		C result
-	psrlq	$32, %mm0
-
-	lea	8(%eax), %eax
-	lea	8(%edx), %edx
-	add	$-1, %ecx
-	jnz	L(top)
-
-
-L(eev):	paddq	%mm6, %mm2		C add 0xFFFFFFFE00000001
-	pmuludq	%mm7, %mm1
-	psubq	%mm3, %mm4		C prod
-	paddq	%mm4, %mm0		C borrow
+	subl	$1, %ecx
 	movd	%mm0, (%edx)		C result
 	psrlq	$32, %mm0
-	psubq	%mm1, %mm2		C prod
-	paddq	%mm2, %mm0		C borrow
-	movd	%mm0, 4(%edx)		C result
-L(rt):	psrlq	$32, %mm0
+	leal	4(%edx), %edx
+	jnz	L(loop)
+
 	movd	%mm0, %eax
-	not	%eax
+	notl	%eax
 	emms
 	ret
 
-L(eod):	paddq	%mm6, %mm4		C add 0xFFFFFFFE00000001
-	pmuludq	%mm7, %mm3
-	psubq	%mm1, %mm2		C prod
-	paddq	%mm2, %mm0		C borrow
-	movd	%mm0, 4(%edx)		C result
-	psrlq	$32, %mm0
-	psubq	%mm3, %mm4		C prod
-	paddq	%mm4, %mm0		C borrow
-	movd	%mm0, 8(%edx)		C result
-	jmp	L(rt)
 EPILOGUE()
diff --git a/gmp/mpn/x86/rshift.asm b/gmp/mpn/x86/rshift.asm
index a60dcaa4b2..8e33eabd61 100644
--- a/gmp/mpn/x86/rshift.asm
+++ b/gmp/mpn/x86/rshift.asm
@@ -1,43 +1,33 @@
 dnl  x86 mpn_rshift -- mpn right shift.
 
-dnl  Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C     cycles/limb
-C P54	 7.5
-C P55	 7.0
-C P6	 2.5
-C K6	 4.5
-C K7	 5.0
-C P4	16.5
+C P54:   7.5
+C P55:   7.0
+C P6:    2.5
+C K6:    4.5
+C K7:    5.0
+C P4:   16.5
 
 
 C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/sec_tabselect.asm b/gmp/mpn/x86/sec_tabselect.asm
deleted file mode 100644
index c7c2e059f1..0000000000
--- a/gmp/mpn/x86/sec_tabselect.asm
+++ /dev/null
@@ -1,115 +0,0 @@
-dnl  x86 mpn_sec_tabselect.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C			    cycles/limb
-C P5				 ?
-C P6 model 0-8,10-12		 ?
-C P6 model 9  (Banias)		 ?
-C P6 model 13 (Dothan)		 ?
-C P4 model 0  (Willamette)	 ?
-C P4 model 1  (?)		 ?
-C P4 model 2  (Northwood)	 4.5
-C P4 model 3  (Prescott)	 ?
-C P4 model 4  (Nocona)		 ?
-C Intel Atom			 ?
-C AMD K6			 ?
-C AMD K7			 3.4
-C AMD K8			 ?
-C AMD K10			 ?
-
-C NOTES
-C  * This has not been tuned for any specific processor.  Its speed should not
-C    be too bad, though.
-C  * Using SSE2 could result in many-fold speedup.
-
-C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
-define(`rp',     `%edi')
-define(`tp',     `%esi')
-define(`n',      `%ebx')
-define(`nents',  `%ecx')
-define(`which',  `36(%esp)')
-
-define(`i',      `%ebp')
-define(`maskp',  `20(%esp)')
-define(`maskn',  `32(%esp)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
-	push	%edi
-	push	%esi
-	push	%ebx
-	push	%ebp
-	mov	20(%esp), rp
-	mov	24(%esp), tp
-	mov	28(%esp), n
-	mov	32(%esp), nents
-
-	lea	(rp,n,4), rp
-	lea	(tp,n,4), tp
-	sub	nents, which
-L(outer):
-	mov	which, %eax
-	add	nents, %eax
-	neg	%eax			C set CF iff 'which' != k
-	sbb	%eax, %eax
-	mov	%eax, maskn
-	not	%eax
-	mov	%eax, maskp
-
-	mov	n, i
-	neg	i
-
-	ALIGN(16)
-L(top):	mov	(tp,i,4), %eax
-	and	maskp, %eax
-	mov	(rp,i,4), %edx
-	and	maskn, %edx
-	or	%edx, %eax
-	mov	%eax, (rp,i,4)
-	inc	i
-	js	L(top)
-
-L(end):	mov	n, %eax
-	lea	(tp,%eax,4), tp
-	dec	nents
-	jne	L(outer)
-
-L(outer_end):
-	pop	%ebp
-	pop	%ebx
-	pop	%esi
-	pop	%edi
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/sqr_basecase.asm b/gmp/mpn/x86/sqr_basecase.asm
index 39f8a89805..9a7e13327b 100644
--- a/gmp/mpn/x86/sqr_basecase.asm
+++ b/gmp/mpn/x86/sqr_basecase.asm
@@ -1,43 +1,32 @@
 dnl  x86 generic mpn_sqr_basecase -- square an mpn number.
 
 dnl  Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
-
+dnl
 dnl  This file is part of the GNU MP Library.
 dnl
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
 dnl
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
 
 
 C     cycles/crossproduct  cycles/triangleproduct
-C P5
-C P6
-C K6
-C K7
-C P4
+C P5:
+C P6:
+C K6:
+C K7:
+C P4:
 
 
 C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/gmp/mpn/x86/t-zdisp.sh b/gmp/mpn/x86/t-zdisp.sh
index 61efdd6c4f..6c55067b6c 100755
--- a/gmp/mpn/x86/t-zdisp.sh
+++ b/gmp/mpn/x86/t-zdisp.sh
@@ -2,31 +2,20 @@
 #
 # Copyright 2000 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage: cd $(builddir)/mpn
diff --git a/gmp/mpn/x86/t-zdisp2.pl b/gmp/mpn/x86/t-zdisp2.pl
index b441b6579a..d5e2d93dc0 100755
--- a/gmp/mpn/x86/t-zdisp2.pl
+++ b/gmp/mpn/x86/t-zdisp2.pl
@@ -2,31 +2,20 @@
 #
 # Copyright 2001, 2002 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage: cd $(builddir)/mpn
@@ -82,7 +71,7 @@ sub process {
   }
 }
 
-# Ensure we're using the right SQR_TOOM2_THRESHOLD for the part of the
+# Ensure we're using the right SQR_KARATSUBA_THRESHOLD for the part of the
 # tree being processed.
 sub process_mparam {
   my $file = "$File::Find::dir/gmp-mparam.h";
@@ -90,10 +79,10 @@ sub process_mparam {
     print "$file\n" if $opt{'t'};
     open MPARAM, "<$file" or die;
     while (<MPARAM>) {
-      if (/^#define SQR_TOOM2_THRESHOLD[ \t]*([0-9][0-9]*)/) {
+      if (/^#define SQR_KARATSUBA_THRESHOLD[ \t]*([0-9][0-9]*)/) {
         open KARA, ">$tempfile" or die;
-        print KARA "define(\`SQR_TOOM2_THRESHOLD',$1)\n\n";
-        print "define(\`SQR_TOOM2_THRESHOLD',$1)\n" if $opt{'t'};
+        print KARA "define(\`SQR_KARATSUBA_THRESHOLD',$1)\n\n";
+        print "define(\`SQR_KARATSUBA_THRESHOLD',$1)\n" if $opt{'t'};
         close KARA or die;
         last;
       }
diff --git a/gmp/mpn/x86/udiv.asm b/gmp/mpn/x86/udiv.asm
index a3ee08860f..5c7d3f3533 100644
--- a/gmp/mpn/x86/udiv.asm
+++ b/gmp/mpn/x86/udiv.asm
@@ -1,32 +1,21 @@
 dnl  x86 mpn_udiv_qrnnd -- 2 by 1 limb division
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/umul.asm b/gmp/mpn/x86/umul.asm
index 34fe434400..d0116de6d9 100644
--- a/gmp/mpn/x86/umul.asm
+++ b/gmp/mpn/x86/umul.asm
@@ -1,32 +1,21 @@
 dnl  mpn_umul_ppmm -- 1x1->2 limb multiplication
 
 dnl  Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/mpn/x86/x86-defs.m4 b/gmp/mpn/x86/x86-defs.m4
index 1538b6820c..5b4a8e1fad 100644
--- a/gmp/mpn/x86/x86-defs.m4
+++ b/gmp/mpn/x86/x86-defs.m4
@@ -4,33 +4,23 @@ divert(-1)
 dnl  m4 macros for x86 assembler.
 
 
-dnl  Copyright 1999-2003, 2007, 2010, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl  Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 dnl  Notes:
@@ -51,7 +41,7 @@ dnl     This is only a problem in macro definitions, not in ordinary text,
 dnl     and not in macro parameters like text passed to forloop() or ifdef().
 
 
-deflit(GMP_LIMB_BYTES, 4)
+deflit(BYTES_PER_MP_LIMB, 4)
 
 
 dnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We
@@ -68,41 +58,24 @@ dnl  order they appear in that structure.
 
 define(CPUVEC_FUNCS_LIST,
 ``add_n',
-`addlsh1_n',
-`addlsh2_n',
 `addmul_1',
-`addmul_2',
-`bdiv_dbm1c',
-`cnd_add_n',
-`cnd_sub_n',
-`com',
 `copyd',
 `copyi',
 `divexact_1',
+`divexact_by3c',
 `divrem_1',
 `gcd_1',
 `lshift',
-`lshiftc',
 `mod_1',
-`mod_1_1p',
-`mod_1_1p_cps',
-`mod_1s_2p',
-`mod_1s_2p_cps',
-`mod_1s_4p',
-`mod_1s_4p_cps',
 `mod_34lsub1',
 `modexact_1c_odd',
 `mul_1',
 `mul_basecase',
-`mullo_basecase',
 `preinv_divrem_1',
 `preinv_mod_1',
-`redc_1',
-`redc_2',
 `rshift',
 `sqr_basecase',
 `sub_n',
-`sublsh1_n',
 `submul_1'')
 
 
@@ -922,7 +895,7 @@ dnl         movl_code_address(L(foo),%eax)
 dnl
 dnl  This macro is only meant for use in ASSERT()s or when testing, since
 dnl  the PIC sequence it generates will want to be done with a ret balancing
-dnl  the call on CPUs with return address branch prediction.
+dnl  the call on CPUs with return address branch predition.
 dnl
 dnl  The addl generated here has a backward reference to the label, and so
 dnl  won't suffer from the two forwards references bug in old gas (described
@@ -955,9 +928,7 @@ m4_assert_numargs(1)
 
 dnl  Usage LEA(symbol,reg)
 
-define(`LEA',
-m4_assert_numargs(2)
-`ifdef(`PIC',`
+define(`LEA',`
 define(`EPILOGUE_cpu',
 `
 L(movl_eip_`'substr($2,1)):
@@ -965,12 +936,11 @@ L(movl_eip_`'substr($2,1)):
 	ret_internal
 	SIZE($'`1, .-$'`1)')
 
-	call	L(movl_eip_`'substr($2,1))
-	addl	$_GLOBAL_OFFSET_TABLE_, $2
-	movl	$1@GOT($2), $2
-',`
-	movl	`$'$1, $2
-')')
+        call    L(movl_eip_`'substr($2,1))
+        addl    $_GLOBAL_OFFSET_TABLE_, $2
+        movl    $1@GOT($2), $2
+')
+
 
 define(`DEF_OBJECT',
 m4_assert_numargs_range(1,2)
@@ -983,17 +953,4 @@ define(`END_OBJECT',
 m4_assert_numargs(1)
 `	SIZE(`$1',.-`$1')')
 
-dnl  Usage: CALL(funcname)
-dnl
-
-define(`CALL',
-m4_assert_numargs(1)
-`ifdef(`PIC',
-  `call	GSYM_PREFIX`'$1@PLT',
-  `call	GSYM_PREFIX`'$1')')
-
-ifdef(`PIC',
-`define(`PIC_WITH_EBX')',
-`undefine(`PIC_WITH_EBX')')
-
 divert`'dnl
diff --git a/gmp/mpn/x86_64/README b/gmp/mpn/x86_64/README
index 9c8a586622..c89f841027 100644
--- a/gmp/mpn/x86_64/README
+++ b/gmp/mpn/x86_64/README
@@ -3,28 +3,17 @@ Copyright 2003, 2004, 2006, 2008 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
diff --git a/gmp/mpn/x86_64/addaddmul_1msb0.asm b/gmp/mpn/x86_64/addaddmul_1msb0.asm
index 87c21b4aca..89e7bed980 100644
--- a/gmp/mpn/x86_64/addaddmul_1msb0.asm
+++ b/gmp/mpn/x86_64/addaddmul_1msb0.asm
@@ -3,41 +3,26 @@ dnl  AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
 dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.167
-C AMD K10	 2.167
-C Intel P4	12.0
-C Intel core2	 4.0
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
+C K8:		 2.167
+C P4:		12.0
+C P6-15:	 4.0
 
 C TODO
 C  * Perhaps handle various n mod 3 sizes better.  The code now is too large.
@@ -82,7 +67,7 @@ L(top):	mul	%r9
 	mul	%r8
 	add	%rax, %r10
 	mov	-16(bp,n,8), %rax
-	mov	$0, R32(%r11)
+	mov	$0, %r11d
 	adc	%rdx, %r11
 	mul	%r9
 	add	%rax, %r10
@@ -92,7 +77,7 @@ L(top):	mul	%r9
 	mul	%r8
 	add	%rax, %r11
 	mov	-8(bp,n,8), %rax
-	mov	$0, R32(%r12)
+	mov	$0, %r12d
 	adc	%rdx, %r12
 	mul	%r9
 	add	%rax, %r11
@@ -102,7 +87,7 @@ L(top):	mul	%r9
 	add	%rax, %r12
 	mov	%r11, -8(rp,n,8)
 	mov	(bp,n,8), %rax
-	mov	$0, R32(%r10)
+	mov	$0, %r10d
 	adc	%rdx, %r10
 	add	$3, n
 	js	L(top)
@@ -119,7 +104,7 @@ L(end):	cmp	$1, R32(n)
 	mul	%r8
 	add	%rax, %r10
 	mov	-16(bp), %rax
-	mov	$0, R32(%r11)
+	mov	$0, %r11d
 	adc	%rdx, %r11
 	mul	%r9
 	add	%rax, %r10
@@ -129,7 +114,7 @@ L(end):	cmp	$1, R32(n)
 	mul	%r8
 	add	%rax, %r11
 	mov	-8(bp), %rax
-	mov	$0, R32(%r12)
+	mov	$0, %r12d
 	adc	%rdx, %r12
 	mul	%r9
 	add	%rax, %r11
@@ -148,7 +133,7 @@ L(end):	cmp	$1, R32(n)
 	mul	%r8
 	add	%rax, %r10
 	mov	-8(bp), %rax
-	mov	$0, R32(%r11)
+	mov	$0, %r11d
 	adc	%rdx, %r11
 	mul	%r9
 	add	%rax, %r10
diff --git a/gmp/mpn/x86_64/aorrlsh1_n.asm b/gmp/mpn/x86_64/addlsh1_n.asm
index 6ee0872823..e142f9ef9e 100644
--- a/gmp/mpn/x86_64/aorrlsh1_n.asm
+++ b/gmp/mpn/x86_64/addlsh1_n.asm
@@ -1,55 +1,37 @@
 dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
 
-dnl  Copyright 2003, 2005-2009, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2
-C AMD K10	 2
-C AMD bd1	 ?
-C AMD bobcat	 ?
-C Intel P4	 13
-C Intel core2	 3.45
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 2
+C K10:		 2
+C P4:		13
+C P6-15:	 3.45
 
 
 C Sometimes speed degenerates, supposedly related to that some operand
 C alignments cause cache conflicts.
 
 C The speed is limited by decoding/issue bandwidth.  There are 22 instructions
-C in the loop, which corresponds to ceil(22/3)/4 = 1.83 c/l.
+C in the loop, which corresponds to ceil(26/3)/4 = 2.0 c/l.
 
 C INPUT PARAMETERS
 define(`rp',`%rdi')
@@ -57,25 +39,10 @@ define(`up',`%rsi')
 define(`vp',`%rdx')
 define(`n', `%rcx')
 
-ifdef(`OPERATION_addlsh1_n', `
-  define(ADDSUB,	add)
-  define(ADCSBB,	adc)
-  define(func,		mpn_addlsh1_n)')
-ifdef(`OPERATION_rsblsh1_n', `
-  define(ADDSUB,	sub)
-  define(ADCSBB,	sbb)
-  define(func,		mpn_rsblsh1_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
+PROLOGUE(mpn_addlsh1_n)
 	push	%rbp
 
 	mov	(vp), %r8
@@ -97,11 +64,11 @@ L(b11):	add	%r8, %r8
 	mov	16(vp,n,8), %r10
 	adc	%r10, %r10
 	sbb	R32(%rax), R32(%rax)	C save scy
-	ADDSUB	(up,n,8), %r8
-	ADCSBB	8(up,n,8), %r9
+	add	(up,n,8), %r8
+	adc	8(up,n,8), %r9
 	mov	%r8, (rp,n,8)
 	mov	%r9, 8(rp,n,8)
-	ADCSBB	16(up,n,8), %r10
+	adc	16(up,n,8), %r10
 	mov	%r10, 16(rp,n,8)
 	sbb	R32(%rbp), R32(%rbp)	C save acy
 	add	$3, n
@@ -111,8 +78,8 @@ L(b10):	add	%r8, %r8
 	mov	8(vp,n,8), %r9
 	adc	%r9, %r9
 	sbb	R32(%rax), R32(%rax)	C save scy
-	ADDSUB	(up,n,8), %r8
-	ADCSBB	8(up,n,8), %r9
+	add	(up,n,8), %r8
+	adc	8(up,n,8), %r9
 	mov	%r8, (rp,n,8)
 	mov	%r9, 8(rp,n,8)
 	sbb	R32(%rbp), R32(%rbp)	C save acy
@@ -121,7 +88,7 @@ L(b10):	add	%r8, %r8
 
 L(b01):	add	%r8, %r8
 	sbb	R32(%rax), R32(%rax)	C save scy
-	ADDSUB	(up,n,8), %r8
+	add	(up,n,8), %r8
 	mov	%r8, (rp,n,8)
 	sbb	R32(%rbp), R32(%rbp)	C save acy
 	inc	n
@@ -142,13 +109,13 @@ L(b00):	adc	%r8, %r8
 	sbb	R32(%rax), R32(%rax)	C save scy
 	add	R32(%rbp), R32(%rbp)	C restore acy
 
-	ADCSBB	(up,n,8), %r8
+	adc	(up,n,8), %r8
 	nop				C Hammer speedup!
-	ADCSBB	8(up,n,8), %r9
+	adc	8(up,n,8), %r9
 	mov	%r8, (rp,n,8)
 	mov	%r9, 8(rp,n,8)
-	ADCSBB	16(up,n,8), %r10
-	ADCSBB	24(up,n,8), %r11
+	adc	16(up,n,8), %r10
+	adc	24(up,n,8), %r11
 	mov	%r10, 16(rp,n,8)
 	mov	%r11, 24(rp,n,8)
 
@@ -156,15 +123,9 @@ L(b00):	adc	%r8, %r8
 	add	$4, n
 	js	L(top)
 
-L(end):
-ifdef(`OPERATION_addlsh1_n',`
-	add	R32(%rbp), R32(%rax)
-	neg	R32(%rax)')
-ifdef(`OPERATION_rsblsh1_n',`
-	sub	R32(%rax), R32(%rbp)
-	movslq	R32(%rbp), %rax')
+L(end):	add	R32(%rbp), R32(%rax)
+	neg	R32(%rax)
 
 	pop	%rbp
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/addmul_2.asm b/gmp/mpn/x86_64/addmul_2.asm
index 18307d719f..8f133c3b00 100644
--- a/gmp/mpn/x86_64/addmul_2.asm
+++ b/gmp/mpn/x86_64/addmul_2.asm
@@ -1,51 +1,39 @@
 dnl  AMD64 mpn_addmul_2 -- Multiply an n-limb vector with a 2-limb vector and
 dnl  add the result to a third limb vector.
 
-dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.375
-C AMD K10	 2.375
-C Intel P4	15-16
-C Intel core2	 4.45
-C Intel NHM	 4.32
-C Intel SBR	 3.4
-C Intel atom	 ?
-C VIA nano	 4.4
+C K8,K9:	 2.375
+C K10:		 2.375
+C P4:		 ?
+C P6-15:	 4.45
 
 C This code is the result of running a code generation and optimization tool
 C suite written by David Harvey and Torbjorn Granlund.
 
 C TODO
-C  * Tune feed-in and wind-down code.
+C  * Work on feed-in and wind-down code.
+C  * Convert "mov $0" to "xor".
+C  * Adjust initial lea to save some bytes.
+C  * Perhaps adjust n from n_param&3 value?
 
 C INPUT PARAMETERS
 define(`rp',     `%rdi')
@@ -61,124 +49,119 @@ define(`w2', `%rbp')
 define(`w3', `%r10')
 define(`n',  `%r11')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_addmul_2)
-	FUNC_ENTRY(4)
-	mov	n_param, n
 	push	%rbx
 	push	%rbp
 
-	mov	0(vp), v0
+	mov	(vp), v0
 	mov	8(vp), v1
 
-	mov	R32(n_param), R32(%rbx)
-	mov	(up), %rax
-	lea	-8(up,n_param,8), up
-	lea	-8(rp,n_param,8), rp
-	mul	v0
+	mov	n_param, n
 	neg	n
-	and	$3, R32(%rbx)
-	jz	L(b0)
-	cmp	$2, R32(%rbx)
-	jc	L(b1)
-	jz	L(b2)
-
-L(b3):	mov	%rax, w1
+	lea	-32(up,n_param,8), up
+	lea	-32(rp,n_param,8), rp
+
+	and	$3, R32(n_param)
+	jz	L(am2p0)
+	cmp	$2, R32(n_param)
+	jc	L(am2p1)
+	jz	L(am2p2)
+L(am2p3):
+	mov	32(up,n,8), %rax
+	mul	v0
+	mov	%rax, w1
+	mov	32(up,n,8), %rax
 	mov	%rdx, w2
 	xor	R32(w3), R32(w3)
-	mov	8(up,n,8), %rax
-	dec	n
-	jmp	L(lo3)
-
-L(b2):	mov	%rax, w2
-	mov	8(up,n,8), %rax
-	mov	%rdx, w3
-	xor	R32(w0), R32(w0)
-	add	$-2, n
-	jmp	L(lo2)
-
-L(b1):	mov	%rax, w3
-	mov	8(up,n,8), %rax
-	mov	%rdx, w0
-	xor	R32(w1), R32(w1)
-	inc	n
-	jmp	L(lo1)
-
-L(b0):	mov	$0, R32(w3)
+	add	$2, n
+	jmp	L(am3)
+L(am2p0):
+	mov	32(up,n,8), %rax
+	mul	v0
 	mov	%rax, w0
-	mov	8(up,n,8), %rax
+	mov	32(up,n,8), %rax
 	mov	%rdx, w1
 	xor	R32(w2), R32(w2)
-	jmp	L(lo0)
+	add	$3, n
+	jmp	L(am0)
+L(am2p1):
+	mov	32(up,n,8), %rax
+	mul	v0
+	mov	%rax, w3
+	mov	32(up,n,8), %rax
+	mov	%rdx, w0
+	xor	R32(w1), R32(w1)
+	jmp	L(am1)
+L(am2p2):
+	mov	32(up,n,8), %rax
+	mul	v0
+	mov	%rax, w2
+	mov	32(up,n,8), %rax
+	mov	%rdx, w3
+	xor	R32(w0), R32(w0)
+	xor	R32(w1), R32(w1)
+	add	$1, n
+	jmp	L(am2)
 
 	ALIGN(32)
-L(top):	mov	$0, R32(w1)
-	mul	v0
-	add	%rax, w3
-	mov	(up,n,8), %rax
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-L(lo1):	mul	v1
+L(top):
 	add	w3, (rp,n,8)
-	mov	$0, R32(w3)
 	adc	%rax, w0
-	mov	$0, R32(w2)
 	mov	8(up,n,8), %rax
 	adc	%rdx, w1
+	mov	$0, R32(w2)
 	mul	v0
 	add	%rax, w0
 	mov	8(up,n,8), %rax
 	adc	%rdx, w1
 	adc	$0, R32(w2)
-L(lo0):	mul	v1
+L(am0):	mul	v1
 	add	w0, 8(rp,n,8)
 	adc	%rax, w1
 	adc	%rdx, w2
 	mov	16(up,n,8), %rax
+	mov	$0, R32(w3)
 	mul	v0
 	add	%rax, w1
+	mov	16(up,n,8), %rax
 	adc	%rdx, w2
 	adc	$0, R32(w3)
-	mov	16(up,n,8), %rax
-L(lo3):	mul	v1
+L(am3):	mul	v1
 	add	w1, 16(rp,n,8)
 	adc	%rax, w2
-	adc	%rdx, w3
-	xor	R32(w0), R32(w0)
 	mov	24(up,n,8), %rax
+	adc	%rdx, w3
 	mul	v0
+	mov	$0, R32(w0)
 	add	%rax, w2
-	mov	24(up,n,8), %rax
 	adc	%rdx, w3
+	mov	$0, R32(w1)
+	mov	24(up,n,8), %rax
 	adc	$0, R32(w0)
-L(lo2):	mul	v1
+L(am2):	mul	v1
 	add	w2, 24(rp,n,8)
 	adc	%rax, w3
 	adc	%rdx, w0
 	mov	32(up,n,8), %rax
-	add	$4, n
-	js	L(top)
-
-L(end):	xor	R32(w1), R32(w1)
 	mul	v0
 	add	%rax, w3
-	mov	(up), %rax
+	mov	32(up,n,8), %rax
 	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	w3, (rp)
+	adc	$0, R32(w1)
+L(am1):	mul	v1
+	add	$4, n
+	js	L(top)
+
+	add	w3, (rp,n,8)
 	adc	%rax, w0
 	adc	%rdx, w1
-	mov	w0, 8(rp)
+	mov	w0, 8(rp,n,8)
 	mov	w1, %rax
 
 	pop	%rbp
 	pop	%rbx
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/aorrlsh2_n.asm b/gmp/mpn/x86_64/aorrlsh2_n.asm
deleted file mode 100644
index 999e972fb4..0000000000
--- a/gmp/mpn/x86_64/aorrlsh2_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
-dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009-2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 62)
-
-ifdef(`OPERATION_addlsh2_n',`
-  define(ADDSUB,	add)
-  define(ADCSBB,	adc)
-  define(func,		mpn_addlsh2_n)')
-ifdef(`OPERATION_rsblsh2_n',`
-  define(ADDSUB,	sub)
-  define(ADCSBB,	sbb)
-  define(func,		mpn_rsblsh2_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-include_mpn(`x86_64/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86_64/aorrlshC_n.asm b/gmp/mpn/x86_64/aorrlshC_n.asm
deleted file mode 100644
index 5a9fd4dfb9..0000000000
--- a/gmp/mpn/x86_64/aorrlshC_n.asm
+++ /dev/null
@@ -1,160 +0,0 @@
-dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
-dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
-
-dnl  Copyright 2009-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-C	     cycles/limb
-C AMD K8,K9	 2
-C AMD K10	 2
-C Intel P4	 ?
-C Intel core2	 3
-C Intel NHM	 2.75
-C Intel SBR	 2.55
-C Intel atom	 ?
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
-
-define(M, eval(m4_lshift(1,LSH)))
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(vp), %r8
-	lea	(,%r8,M), %r12
-	shr	$RSH, %r8
-
-	mov	R32(n), R32(%rax)
-	lea	(rp,n,8), rp
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	neg	n
-	and	$3, R8(%rax)
-	je	L(b00)
-	cmp	$2, R8(%rax)
-	jc	L(b01)
-	je	L(b10)
-
-L(b11):	mov	8(vp,n,8), %r10
-	lea	(%r8,%r10,M), %r14
-	shr	$RSH, %r10
-	mov	16(vp,n,8), %r11
-	lea	(%r10,%r11,M), %r15
-	shr	$RSH, %r11
-	ADDSUB	(up,n,8), %r12
-	ADCSBB	8(up,n,8), %r14
-	ADCSBB	16(up,n,8), %r15
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r12, (rp,n,8)
-	mov	%r14, 8(rp,n,8)
-	mov	%r15, 16(rp,n,8)
-	add	$3, n
-	js	L(top)
-	jmp	L(end)
-
-L(b01):	mov	%r8, %r11
-	ADDSUB	(up,n,8), %r12
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r12, (rp,n,8)
-	add	$1, n
-	js	L(top)
-	jmp	L(end)
-
-L(b10):	mov	8(vp,n,8), %r11
-	lea	(%r8,%r11,M), %r15
-	shr	$RSH, %r11
-	ADDSUB	(up,n,8), %r12
-	ADCSBB	8(up,n,8), %r15
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r12, (rp,n,8)
-	mov	%r15, 8(rp,n,8)
-	add	$2, n
-	js	L(top)
-	jmp	L(end)
-
-L(b00):	mov	8(vp,n,8), %r9
-	mov	16(vp,n,8), %r10
-	jmp	L(e00)
-
-	ALIGN(16)
-L(top):	mov	16(vp,n,8), %r10
-	mov	(vp,n,8), %r8
-	mov	8(vp,n,8), %r9
-	lea	(%r11,%r8,M), %r12
-	shr	$RSH, %r8
-L(e00):	lea	(%r8,%r9,M), %r13
-	shr	$RSH, %r9
-	mov	24(vp,n,8), %r11
-	lea	(%r9,%r10,M), %r14
-	shr	$RSH, %r10
-	lea	(%r10,%r11,M), %r15
-	shr	$RSH, %r11
-	add	R32(%rax), R32(%rax)		  C restore carry
-	ADCSBB	(up,n,8), %r12
-	ADCSBB	8(up,n,8), %r13
-	ADCSBB	16(up,n,8), %r14
-	ADCSBB	24(up,n,8), %r15
-	mov	%r12, (rp,n,8)
-	mov	%r13, 8(rp,n,8)
-	mov	%r14, 16(rp,n,8)
-	sbb	R32(%rax), R32(%rax)		  C save carry for next
-	mov	%r15, 24(rp,n,8)
-	add	$4, n
-	js	L(top)
-L(end):
-
-ifelse(ADDSUB,add,`
-	sub	R32(%r11), R32(%rax)
-	neg	R32(%rax)
-',`
-	add	R32(%r11), R32(%rax)
-	movslq	R32(%rax), %rax
-')
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/aorrlsh_n.asm b/gmp/mpn/x86_64/aorrlsh_n.asm
index 5ca128fbf3..55176f7aa1 100644
--- a/gmp/mpn/x86_64/aorrlsh_n.asm
+++ b/gmp/mpn/x86_64/aorrlsh_n.asm
@@ -1,45 +1,32 @@
 dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
+dnl  ("rsb" means reversed subtract, name mandated by mpn_sublsh1_n which
+dnl  subtacts the shifted operand from the unshifted operand.)
 
-dnl  Copyright 2006, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 3.1	< 3.85 for lshift + add_n
-C AMD K10	 3.1	< 3.85 for lshift + add_n
-C Intel P4	14.6	> 7.33 for lshift + add_n
-C Intel core2	 3.87	> 3.27 for lshift + add_n
-C Intel NHM	 4	> 3.75 for lshift + add_n
-C Intel SBR	(5.8)	> 3.46 for lshift + add_n
-C Intel atom	(7.75)	< 8.75 for lshift + add_n
-C VIA nano	 4.7	< 6.25 for lshift + add_n
+C K8,K9:	 3.25	(mpn_lshift + mpn_add_n costs about 4.1 c/l)
+C K10:		 3.25	(mpn_lshift + mpn_add_n costs about 4.1 c/l)
+C P4:		14
+C P6-15:	 4
 
 C This was written quickly and not optimized at all.  Surely one could get
 C closer to 3 c/l or perhaps even under 3 c/l.  Ideas:
@@ -54,67 +41,65 @@ define(`rp',	`%rdi')
 define(`up',	`%rsi')
 define(`vp',	`%rdx')
 define(`n',	`%rcx')
-define(`cnt',	`%r8')
+define(`cnt'	`%r8')
 
 ifdef(`OPERATION_addlsh_n',`
-  define(ADCSBB,       `adc')
+  define(ADDSUBC,       `adc')
   define(func, mpn_addlsh_n)
 ')
 ifdef(`OPERATION_rsblsh_n',`
-  define(ADCSBB,       `sbb')
+  define(ADDSUBC,       `sbb')
   define(func, mpn_rsblsh_n)
 ')
 
 MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
+
 	push	%r12
 	push	%r13
 	push	%r14
-	push	%rbp
+	push	%r15
 	push	%rbx
 
 	mov	n, %rax
-	xor	R32(%rbx), R32(%rbx)	C clear carry save register
-	mov	R32(%r8), R32(%rcx)	C shift count
-	xor	R32(%rbp), R32(%rbp)	C limb carry
+	xor	%ebx, %ebx		C clear carry save register
+	mov	%r8d, %ecx		C shift count
+	xor	%r15d, %r15d		C limb carry
 
-	mov	R32(%rax), R32(%r11)
-	and	$3, R32(%r11)
+	mov	%eax, %r11d
+	and	$3, %r11d
 	je	L(4)
-	sub	$1, R32(%r11)
+	sub	$1, %r11d
 
-L(012):	mov	(vp), %r8
+L(oopette):
+	mov	0(vp), %r8
 	mov	%r8, %r12
-	shl	R8(%rcx), %r8
-	or	%rbp, %r8
-	neg	R8(%rcx)
-	mov	%r12, %rbp
-	shr	R8(%rcx), %rbp
-	neg	R8(%rcx)
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	(up), %r8
-	mov	%r8, (rp)
-	sbb	R32(%rbx), R32(%rbx)
+	shl	%cl, %r8
+	or	%r15, %r8
+	neg	%cl
+	mov	%r12, %r15
+	shr	%cl, %r15
+	neg	%cl
+	add	%ebx, %ebx
+	ADDSUBC	0(up), %r8
+	mov	%r8, 0(rp)
+	sbb	%ebx, %ebx
 	lea	8(up), up
 	lea	8(vp), vp
 	lea	8(rp), rp
-	sub	$1, R32(%r11)
-	jnc	L(012)
+	sub	$1, %r11d
+	jnc	L(oopette)
 
-L(4):	sub	$4, %rax
+L(4):
+	sub	$4, %rax
 	jc	L(end)
 
-	ALIGN(16)
-L(top):	mov	(vp), %r8
+L(oop):
+	mov	0(vp), %r8
 	mov	%r8, %r12
 	mov	8(vp), %r9
 	mov	%r9, %r13
@@ -122,55 +107,55 @@ L(top):	mov	(vp), %r8
 	mov	%r10, %r14
 	mov	24(vp), %r11
 
-	shl	R8(%rcx), %r8
-	shl	R8(%rcx), %r9
-	shl	R8(%rcx), %r10
-	or	%rbp, %r8
-	mov	%r11, %rbp
-	shl	R8(%rcx), %r11
+	shl	%cl, %r8
+	shl	%cl, %r9
+	shl	%cl, %r10
+	or	%r15, %r8
+	mov	%r11, %r15
+	shl	%cl, %r11
 
-	neg	R8(%rcx)
+	neg	%cl
 
-	shr	R8(%rcx), %r12
-	shr	R8(%rcx), %r13
-	shr	R8(%rcx), %r14
-	shr	R8(%rcx), %rbp		C used next iteration
+	shr	%cl, %r12
+	shr	%cl, %r13
+	shr	%cl, %r14
+	shr	%cl, %r15		C used next loop
 
 	or	%r12, %r9
 	or	%r13, %r10
 	or	%r14, %r11
 
-	neg	R8(%rcx)
+	neg	%cl
 
-	add	R32(%rbx), R32(%rbx)	C restore carry flag
+	add	%ebx, %ebx		C restore carry flag
 
-	ADCSBB	(up), %r8
-	ADCSBB	8(up), %r9
-	ADCSBB	16(up), %r10
-	ADCSBB	24(up), %r11
+	ADDSUBC	0(up), %r8
+	ADDSUBC	8(up), %r9
+	ADDSUBC	16(up), %r10
+	ADDSUBC	24(up), %r11
 
-	mov	%r8, (rp)
+	mov	%r8, 0(rp)
 	mov	%r9, 8(rp)
 	mov	%r10, 16(rp)
 	mov	%r11, 24(rp)
 
-	sbb	R32(%rbx), R32(%rbx)	C save carry flag
+	sbb	%ebx, %ebx		C save carry flag
 
 	lea	32(up), up
 	lea	32(vp), vp
 	lea	32(rp), rp
 
 	sub	$4, %rax
-	jnc	L(top)
-
-L(end):	add	R32(%rbx), R32(%rbx)
-	ADCSBB	$0, %rbp
-	mov	%rbp, %rax
+	jnc	L(oop)
+L(end):
+	add	%ebx, %ebx
+	adc	$0, %r15
+	mov	%r15, %rax
 	pop	%rbx
-	pop	%rbp
+	pop	%r15
 	pop	%r14
 	pop	%r13
 	pop	%r12
-	FUNC_EXIT()
+
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/aors_err1_n.asm b/gmp/mpn/x86_64/aors_err1_n.asm
deleted file mode 100644
index 54d0b3f9b7..0000000000
--- a/gmp/mpn/x86_64/aors_err1_n.asm
+++ /dev/null
@@ -1,225 +0,0 @@
-dnl  AMD64 mpn_add_err1_n, mpn_sub_err1_n
-
-dnl  Contributed by David Harvey.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 2.75 (degenerates to 3 c/l for some alignments)
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`ep',	`%rcx')
-define(`yp',	`%r8')
-define(`n',	`%r9')
-define(`cy_param',	`8(%rsp)')
-
-define(`el',	`%rbx')
-define(`eh',	`%rbp')
-define(`t0',	`%r10')
-define(`t1',	`%r11')
-define(`t2',	`%r12')
-define(`t3',	`%r13')
-define(`w0',	`%r14')
-define(`w1',	`%r15')
-
-ifdef(`OPERATION_add_err1_n', `
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_add_err1_n)')
-ifdef(`OPERATION_sub_err1_n', `
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_sub_err1_n)')
-
-MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	mov	cy_param, %rax
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	lea	(rp,n,8), rp
-
-	mov	R32(n), R32(%r10)
-	and	$3, R32(%r10)
-	jz	L(0mod4)
-	cmp	$2, R32(%r10)
-	jc	L(1mod4)
-	jz	L(2mod4)
-L(3mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	xor	R32(t0), R32(t0)
-	xor	R32(t1), R32(t1)
-	lea	-24(yp,n,8), yp
-	neg	n
-
-	shr	$1, %al		   C restore carry
-	mov	(up,n,8), w0
-	mov	8(up,n,8), w1
-	ADCSBB	(vp,n,8), w0
-	mov	w0, (rp,n,8)
-	cmovc	16(yp), el
-	ADCSBB	8(vp,n,8), w1
-	mov	w1, 8(rp,n,8)
-	cmovc	8(yp), t0
-	mov	16(up,n,8), w0
-	ADCSBB	16(vp,n,8), w0
-	mov	w0, 16(rp,n,8)
-	cmovc	(yp), t1
-	setc	%al		   C save carry
-	add	t0, el
-	adc	$0, eh
-	add	t1, el
-	adc	$0, eh
-
-	add	$3, n
-	jnz	L(loop)
-	jmp	L(end)
-
-	ALIGN(16)
-L(0mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	lea	(yp,n,8), yp
-	neg	n
-	jmp	L(loop)
-
-	ALIGN(16)
-L(1mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	lea	-8(yp,n,8), yp
-	neg	n
-
-	shr	$1, %al		   C restore carry
-	mov	(up,n,8), w0
-	ADCSBB	(vp,n,8), w0
-	mov	w0, (rp,n,8)
-	cmovc	(yp), el
-	setc	%al		   C save carry
-
-	add	$1, n
-	jnz	L(loop)
-	jmp	L(end)
-
-	ALIGN(16)
-L(2mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	xor	R32(t0), R32(t0)
-	lea	-16(yp,n,8), yp
-	neg	n
-
-	shr	$1, %al		   C restore carry
-	mov	(up,n,8), w0
-	mov	8(up,n,8), w1
-	ADCSBB	(vp,n,8), w0
-	mov	w0, (rp,n,8)
-	cmovc	8(yp), el
-	ADCSBB	8(vp,n,8), w1
-	mov	w1, 8(rp,n,8)
-	cmovc	(yp), t0
-	setc	%al		   C save carry
-	add	t0, el
-	adc	$0, eh
-
-	add	$2, n
-	jnz	L(loop)
-	jmp	L(end)
-
-	ALIGN(32)
-L(loop):
-	shr	$1, %al		   C restore carry
-	mov	-8(yp), t0
-	mov	$0, R32(t3)
-	mov	(up,n,8), w0
-	mov	8(up,n,8), w1
-	ADCSBB	(vp,n,8), w0
-	cmovnc	t3, t0
-	ADCSBB	8(vp,n,8), w1
-	mov	-16(yp), t1
-	mov	w0, (rp,n,8)
-	mov	16(up,n,8), w0
-	mov	w1, 8(rp,n,8)
-	cmovnc	t3, t1
-	mov	-24(yp), t2
-	ADCSBB	16(vp,n,8), w0
-	cmovnc	t3, t2
-	mov	24(up,n,8), w1
-	ADCSBB	24(vp,n,8), w1
-	cmovc	-32(yp), t3
-	setc	%al		   C save carry
-	add	t0, el
-	adc	$0, eh
-	add	t1, el
-	adc	$0, eh
-	add	t2, el
-	adc	$0, eh
-	mov	w0, 16(rp,n,8)
-	add	t3, el
-	lea	-32(yp), yp
-	adc	$0, eh
-	mov	w1, 24(rp,n,8)
-	add	$4, n
-	jnz	L(loop)
-
-L(end):
-	mov	el, (ep)
-	mov	eh, 8(ep)
-
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/aors_err2_n.asm b/gmp/mpn/x86_64/aors_err2_n.asm
deleted file mode 100644
index ce5c2a49b6..0000000000
--- a/gmp/mpn/x86_64/aors_err2_n.asm
+++ /dev/null
@@ -1,172 +0,0 @@
-dnl  AMD64 mpn_add_err2_n, mpn_sub_err2_n
-
-dnl  Contributed by David Harvey.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 4.5
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 6.9
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`ep',	`%rcx')
-define(`yp1',	`%r8')
-define(`yp2',   `%r9')
-define(`n_param',     `8(%rsp)')
-define(`cy_param',    `16(%rsp)')
-
-define(`cy1',   `%r14')
-define(`cy2',   `%rax')
-
-define(`n',     `%r10')
-
-define(`w',     `%rbx')
-define(`e1l',	`%rbp')
-define(`e1h',	`%r11')
-define(`e2l',	`%r12')
-define(`e2h',	`%r13')
-
-
-ifdef(`OPERATION_add_err2_n', `
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_add_err2_n)')
-ifdef(`OPERATION_sub_err2_n', `
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_sub_err2_n)')
-
-MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n)
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	mov	cy_param, cy2
-	mov	n_param, n
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-
-	xor	R32(e1l), R32(e1l)
-	xor	R32(e1h), R32(e1h)
-	xor	R32(e2l), R32(e2l)
-	xor	R32(e2h), R32(e2h)
-
-	sub	yp1, yp2
-
-	lea	(rp,n,8), rp
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-
-	test	$1, n
-	jnz	L(odd)
-
-	lea	-8(yp1,n,8), yp1
-	neg	n
-	jmp	L(top)
-
-	ALIGN(16)
-L(odd):
-	lea	-16(yp1,n,8), yp1
-	neg	n
-	shr	$1, cy2
-	mov	(up,n,8), w
-	ADCSBB	(vp,n,8), w
-	cmovc	8(yp1), e1l
-	cmovc	8(yp1,yp2), e2l
-	mov	w, (rp,n,8)
-	sbb	cy2, cy2
-	inc	n
-	jz	L(end)
-
-	ALIGN(16)
-L(top):
-	mov	(up,n,8), w
-	shr	$1, cy2		C restore carry
-	ADCSBB	(vp,n,8), w
-	mov	w, (rp,n,8)
-	sbb	cy1, cy1	C generate mask, preserve CF
-
-	mov	8(up,n,8), w
-	ADCSBB	8(vp,n,8), w
-	mov	w, 8(rp,n,8)
-	sbb	cy2, cy2	C generate mask, preserve CF
-
-	mov	(yp1), w	C (e1h:e1l) += cy1 * yp1 limb
-	and	cy1, w
-	add	w, e1l
-	adc	$0, e1h
-
-	and	(yp1,yp2), cy1	C (e2h:e2l) += cy1 * yp2 limb
-	add	cy1, e2l
-	adc	$0, e2h
-
-	mov	-8(yp1), w	C (e1h:e1l) += cy2 * next yp1 limb
-	and	cy2, w
-	add	w, e1l
-	adc	$0, e1h
-
-	mov	-8(yp1,yp2), w	C (e2h:e2l) += cy2 * next yp2 limb
-	and	cy2, w
-	add	w, e2l
-	adc	$0, e2h
-
-	add	$2, n
-	lea	-16(yp1), yp1
-	jnz	L(top)
-L(end):
-
-	mov	e1l, (ep)
-	mov	e1h, 8(ep)
-	mov	e2l, 16(ep)
-	mov	e2h, 24(ep)
-
-	and	$1, %eax	C return carry
-
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/aors_err3_n.asm b/gmp/mpn/x86_64/aors_err3_n.asm
deleted file mode 100644
index bb6d0c5366..0000000000
--- a/gmp/mpn/x86_64/aors_err3_n.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl  AMD64 mpn_add_err3_n, mpn_sub_err3_n
-
-dnl  Contributed by David Harvey.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 7.0
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`ep',	`%rcx')
-define(`yp1',	`%r8')
-define(`yp2',   `%r9')
-define(`yp3_param',   `8(%rsp)')
-define(`n_param',     `16(%rsp)')
-define(`cy_param',    `24(%rsp)')
-
-define(`n',     `%r10')
-define(`yp3',   `%rcx')
-define(`t',     `%rbx')
-
-define(`e1l',	`%rbp')
-define(`e1h',	`%r11')
-define(`e2l',	`%r12')
-define(`e2h',	`%r13')
-define(`e3l',   `%r14')
-define(`e3h',   `%r15')
-
-
-
-ifdef(`OPERATION_add_err3_n', `
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_add_err3_n)')
-ifdef(`OPERATION_sub_err3_n', `
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_sub_err3_n)')
-
-MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n)
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	mov	cy_param, %rax
-	mov	n_param, n
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	push	ep
-	mov	64(%rsp), yp3       C load from yp3_param
-
-	xor	R32(e1l), R32(e1l)
-	xor	R32(e1h), R32(e1h)
-	xor	R32(e2l), R32(e2l)
-	xor	R32(e2h), R32(e2h)
-	xor	R32(e3l), R32(e3l)
-	xor	R32(e3h), R32(e3h)
-
-	sub	yp1, yp2
-	sub	yp1, yp3
-
-	lea	-8(yp1,n,8), yp1
-	lea	(rp,n,8), rp
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	neg	n
-
-	ALIGN(16)
-L(top):
-	shr	$1, %rax		C restore carry
-	mov	(up,n,8), %rax
-	ADCSBB	(vp,n,8), %rax
-	mov	%rax, (rp,n,8)
-	sbb	%rax, %rax		C save carry and generate mask
-
-	mov	(yp1), t
-	and	%rax, t
-	add	t, e1l
-	adc	$0, e1h
-
-	mov	(yp1,yp2), t
-	and	%rax, t
-	add	t, e2l
-	adc	$0, e2h
-
-	mov	(yp1,yp3), t
-	and	%rax, t
-	add	t, e3l
-	adc	$0, e3h
-
-	lea	-8(yp1), yp1
-	inc	n
-	jnz     L(top)
-
-L(end):
-	and	$1, %eax
-	pop	ep
-
-	mov	e1l, (ep)
-	mov	e1h, 8(ep)
-	mov	e2l, 16(ep)
-	mov	e2h, 24(ep)
-	mov	e3l, 32(ep)
-	mov	e3h, 40(ep)
-
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/aors_n.asm b/gmp/mpn/x86_64/aors_n.asm
index 8941f7a17b..dae5408ba4 100644
--- a/gmp/mpn/x86_64/aors_n.asm
+++ b/gmp/mpn/x86_64/aors_n.asm
@@ -1,57 +1,40 @@
 dnl  AMD64 mpn_add_n, mpn_sub_n
 
-dnl  Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 1.5
-C AMD K10	 1.5
-C AMD bd1	 1.8
-C AMD bobcat	 2.5
-C Intel P4
-C Intel core2	 4.9
-C Intel NHM	 5.5
-C Intel SBR	 1.61
-C Intel IBR	 1.61
-C Intel atom	 4
-C VIA nano	 3.25
-
-C The loop of this code is the result of running a code generation and
+C K8,K9:	 1.5
+C K10:		 1.5
+C P4:		 ?
+C P6-15 (Core2): 4.9
+C P6-28 (Atom):	 4
+
+C The inner loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
 
 C INPUT PARAMETERS
-define(`rp',	`%rdi')	C rcx
-define(`up',	`%rsi')	C rdx
-define(`vp',	`%rdx')	C r8
-define(`n',	`%rcx')	C r9
-define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+define(`cy',	`%r8')		C (only for mpn_add_nc)
 
 ifdef(`OPERATION_add_n', `
 	define(ADCSBB,	      adc)
@@ -64,71 +47,29 @@ ifdef(`OPERATION_sub_n', `
 
 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
 	mov	R32(n), R32(%rax)
-	shr	$2, n
 	and	$3, R32(%rax)
+	shr	$2, n
 	bt	$0, %r8			C cy flag <- carry parameter
-	jrcxz	L(lt4)
-
-	mov	(up), %r8
-	mov	8(up), %r9
-	dec	n
-	jmp	L(mid)
-
+	jz	L(1)
+	jmp	L(ent)
 EPILOGUE()
 	ALIGN(16)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	mov	R32(n), R32(%rax)
 	shr	$2, n
+	jz	L(0)
 	and	$3, R32(%rax)
-	jrcxz	L(lt4)
 
-	mov	(up), %r8
+L(ent):	mov	(up), %r8
 	mov	8(up), %r9
 	dec	n
 	jmp	L(mid)
 
-L(lt4):	dec	R32(%rax)
-	mov	(up), %r8
-	jnz	L(2)
-	ADCSBB	(vp), %r8
-	mov	%r8, (rp)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
-	ret
-
-L(2):	dec	R32(%rax)
-	mov	8(up), %r9
-	jnz	L(3)
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
-	ret
-
-L(3):	mov	16(up), %r10
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	ADCSBB	16(vp), %r10
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	setc	R8(%rax)
-	FUNC_EXIT()
-	ret
-
 	ALIGN(16)
 L(top):	ADCSBB	(vp), %r8
 	ADCSBB	8(vp), %r9
@@ -162,8 +103,36 @@ L(end):	lea	32(up), up
 
 	inc	R32(%rax)
 	dec	R32(%rax)
-	jnz	L(lt4)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
+	jnz	L(1)
+	adc	%eax, %eax
+	ret
+
+L(0):	test	R32(%rax), R32(%rax)
+L(1):	dec	R32(%rax)
+	mov	(up), %r8
+	jnz	L(2)
+	ADCSBB	(vp), %r8
+	mov	%r8, (rp)
+	adc	%eax, %eax
+	ret
+
+L(2):	dec	R32(%rax)
+	mov	8(up), %r9
+	jnz	L(3)
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	adc	%eax, %eax
+	ret
+
+L(3):	mov	16(up), %r10
+	ADCSBB	(vp), %r8
+	ADCSBB	8(vp), %r9
+	ADCSBB	16(vp), %r10
+	mov	%r8, (rp)
+	mov	%r9, 8(rp)
+	mov	%r10, 16(rp)
+	setc	%al
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/aorsmul_1.asm b/gmp/mpn/x86_64/aorsmul_1.asm
index e3fc005757..a25c74ebdc 100644
--- a/gmp/mpn/x86_64/aorsmul_1.asm
+++ b/gmp/mpn/x86_64/aorsmul_1.asm
@@ -1,60 +1,45 @@
 dnl  AMD64 mpn_addmul_1 and mpn_submul_1.
 
-dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 2.5
-C AMD bd1	 5.0
-C AMD bobcat	 6.17
-C Intel P4	14.9
-C Intel core2	 5.09
-C Intel NHM	 4.9
-C Intel SBR	 4.0
-C Intel atom	21.3
-C VIA nano	 5.0
-
-C The loop of this code is the result of running a code generation and
+C K8,K9:	 2.5
+C K10:		 2.5
+C P4:		14.9
+C P6-15 (Core2): 5.09
+C P6-28 (Atom):	21.3
+
+C The inner loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
 
-C TODO
-C  * The loop is great, but the prologue and epilogue code was quickly written.
-C    Tune it!
+C TODO:
+C  * The inner loop is great, but the prologue and epilogue code was
+C    quickly written.  Tune it!
 
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vl',      `%rcx')   C r9
+C INPUT PARAMETERS
+define(`rp',	 `%rdi')
+define(`up',	 `%rsi')
+define(`n_param',`%rdx')
+define(`vl',	 `%rcx')
 
-define(`n',       `%r11')
+define(`n',	`%r11')
 
 ifdef(`OPERATION_addmul_1',`
       define(`ADDSUB',        `add')
@@ -65,33 +50,17 @@ ifdef(`OPERATION_submul_1',`
       define(`func',  `mpn_submul_1')
 ')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
 
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`vl', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``r11'')	') dnl
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(func)
-
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
 	mov	(up), %rax		C read first u limb early
 	push	%rbx
-IFSTD(`	mov	n_param, %rbx   ')	C move away n from rdx, mul uses it
-IFDOS(`	mov	n, %rbx         ')
+	mov	n_param, %rbx		C move away n from rdx, mul uses it
 	mul	vl
-IFSTD(`	mov	%rbx, n         ')
+	mov	%rbx, %r11
 
 	and	$3, R32(%rbx)
 	jz	L(b0)
@@ -143,7 +112,7 @@ L(top):	ADDSUB	%r10, (rp,n,8)
 	adc	%rax, %r9
 	mov	(up,n,8), %rax
 	adc	%rdx, %r8
-	mov	$0, R32(%r10)
+	mov	$0, %r10d
 L(L1):	mul	vl
 	ADDSUB	%r9, 8(rp,n,8)
 	adc	%rax, %r8
@@ -156,11 +125,11 @@ L(L0):	mov	8(up,n,8), %rax
 L(L3):	mov	16(up,n,8), %rax
 	mul	vl
 	ADDSUB	%rbx, 24(rp,n,8)
-	mov	$0, R32(%r8)		C zero
-	mov	%r8, %rbx		C zero
+	mov	$0, %r8d		# zero
+	mov	%r8, %rbx		# zero
 	adc	%rax, %r10
 	mov	24(up,n,8), %rax
-	mov	%r8, %r9		C zero
+	mov	%r8, %r9		# zero
 	adc	%rdx, %r9
 L(L2):	mul	vl
 	add	$4, n
@@ -174,7 +143,5 @@ L(ret):	adc	$0, %rdx
 	mov	%rdx, %rax
 
 	pop	%rbx
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/addmul_2.asm b/gmp/mpn/x86_64/atom/addmul_2.asm
deleted file mode 100644
index c1dcdc44aa..0000000000
--- a/gmp/mpn/x86_64/atom/addmul_2.asm
+++ /dev/null
@@ -1,186 +0,0 @@
-dnl  AMD64 mpn_addmul_2 optimised for Intel Atom.
-
-dnl  Copyright 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom	18.8		this
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vp',      `%rcx')   C r9
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%rbx')
-define(`w1', `%rcx')
-define(`w2', `%rbp')
-define(`w3', `%r10')
-define(`n',  `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_addmul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(up), %rax
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	mov	n_param, n
-	mul	v0
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b10)
-
-L(b00):	mov	%rax, w0
-	mov	(up), %rax
-	mov	%rdx, w1
-	xor	R32(w2), R32(w2)
-	lea	-8(rp), rp
-	jmp	L(lo0)
-
-L(b10):	mov	%rax, w2
-	mov	(up), %rax
-	mov	%rdx, w3
-	xor	R32(w0), R32(w0)
-	lea	-16(up), up
-	lea	-24(rp), rp
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n)
-	jnz	L(b11)
-
-L(b01):	mov	%rax, w3
-	mov	%rdx, w0
-	mov	(up), %rax
-	xor	R32(w1), R32(w1)
-	lea	8(up), up
-	dec	n
-	jmp	L(lo1)
-
-L(b11):	mov	%rax, w1
-	mov	(up), %rax
-	mov	%rdx, w2
-	xor	R32(w3), R32(w3)
-	lea	-8(up), up
-	lea	-16(rp), rp
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):
-L(lo1):	mul	v1
-	add	w3, (rp)
-	mov	$0, R32(w2)
-	adc	%rax, w0
-	mov	(up), %rax
-	adc	%rdx, w1
-	mul	v0
-	add	%rax, w0
-	mov	(up), %rax
-	adc	%rdx, w1
-	adc	$0, R32(w2)
-L(lo0):	mul	v1
-	add	w0, 8(rp)
-	adc	%rax, w1
-	mov	8(up), %rax
-	mov	$0, R32(w3)
-	adc	%rdx, w2
-	mul	v0
-	add	%rax, w1
-	mov	8(up), %rax
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-L(lo3):	mul	v1
-	add	w1, 16(rp)
-	adc	%rax, w2
-	mov	16(up), %rax
-	mov	$0, R32(w0)
-	adc	%rdx, w3
-	mul	v0
-	add	%rax, w2
-	mov	16(up), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-L(lo2):	mul	v1
-	add	w2, 24(rp)
-	adc	%rax, w3
-	mov	24(up), %rax
-	adc	%rdx, w0
-	mov	$0, R32(w1)
-	lea	32(rp), rp
-	mul	v0
-	lea	32(up), up
-	add	%rax, w3
-	adc	%rdx, w0
-	mov	-8(up), %rax
-	adc	$0, R32(w1)
-	sub	$4, n
-	ja	L(top)
-
-L(end):	mul	v1
-	add	w3, (rp)
-	adc	%rax, w0
-	adc	%rdx, w1
-	mov	w0, 8(rp)
-	mov	w1, %rax
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/aorrlsh1_n.asm b/gmp/mpn/x86_64/atom/aorrlsh1_n.asm
deleted file mode 100644
index f44de19fef..0000000000
--- a/gmp/mpn/x86_64/atom/aorrlsh1_n.asm
+++ /dev/null
@@ -1,238 +0,0 @@
-dnl  AMD64 mpn_addlsh1_n, mpn_rsblsh1_n optimised for Intel Atom.
-dnl  Used also for AMD bd1.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C  * This code is slightly large at 433 bytes.
-C  * sublsh1_n.asm and this file use the same basic pattern.
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bd1	 2.3
-C AMD bobcat	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 4.875	(4.75 is probably possible)
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp',       `%rdx')
-define(`n',        `%rcx')
-define(`cy',       `%r8')
-
-ifdef(`OPERATION_addlsh1_n', `
-  define(ADDSUB,	add)
-  define(ADCSBB,	adc)
-  define(func_n,	mpn_addlsh1_n)
-  define(func_nc,	mpn_addlsh1_nc)')
-ifdef(`OPERATION_rsblsh1_n', `
-  define(ADDSUB,	sub)
-  define(ADCSBB,	sbb)
-  define(func_n,	mpn_rsblsh1_n)
-  define(func_nc,	mpn_rsblsh1_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbp
-	xor	R32(%rbp), R32(%rbp)
-L(ent):	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	jz	L(b0)
-	cmp	$2, R32(%rax)
-	jz	L(b2)
-	jg	L(b3)
-
-L(b1):	mov	(vp), %r8
-	add	%r8, %r8
-	lea	8(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	ADCSBB	(up), %r8
-	mov	%r8, (rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	8(up), up
-	lea	8(rp), rp
-	jmp	L(b0)
-
-L(b2):	mov	(vp), %r8
-	add	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	lea	16(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	ADCSBB	(up), %r8
-	mov	%r8, (rp)
-	ADCSBB	8(up), %r9
-	mov	%r9, 8(rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	16(up), up
-	lea	16(rp), rp
-	jmp	L(b0)
-
-L(b3):	mov	(vp), %r8
-	add	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	mov	16(vp), %r10
-	adc	%r10, %r10
-	lea	24(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	ADCSBB	(up), %r8
-	mov	%r8, (rp)
-	ADCSBB	8(up), %r9
-	mov	%r9, 8(rp)
-	ADCSBB	16(up), %r10
-	mov	%r10, 16(rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	24(up), up
-	lea	24(rp), rp
-
-L(b0):	test	$4, R8(n)
-	jz	L(skp)
-	add	R32(%rax), R32(%rax)	C restore scy
-	mov	(vp), %r8
-	adc	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	mov	16(vp), %r10
-	adc	%r10, %r10
-	mov	24(vp), %r11
-	adc	%r11, %r11
-	lea	32(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	ADCSBB	(up), %r8
-	mov	%r8, (rp)
-	ADCSBB	8(up), %r9
-	mov	%r9, 8(rp)
-	ADCSBB	16(up), %r10
-	mov	%r10, 16(rp)
-	ADCSBB	24(up), %r11
-	mov	%r11, 24(rp)
-	lea	32(up), up
-	lea	32(rp), rp
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-
-L(skp):	cmp	$8, n
-	jl	L(rtn)
-
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%rbx
-	lea	-64(rp), rp
-	jmp	L(x)
-
-	ALIGN(16)
-L(top):	add	R32(%rax), R32(%rax)	C restore scy
-	lea	64(rp), rp
-	mov	(vp), %r8
-	adc	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	mov	16(vp), %r10
-	adc	%r10, %r10
-	mov	24(vp), %r11
-	adc	%r11, %r11
-	mov	32(vp), %r12
-	adc	%r12, %r12
-	mov	40(vp), %r13
-	adc	%r13, %r13
-	mov	48(vp), %r14
-	adc	%r14, %r14
-	mov	56(vp), %rbx
-	adc	%rbx, %rbx
-	lea	64(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	ADCSBB	(up), %r8
-	mov	%r8, (rp)
-	ADCSBB	8(up), %r9
-	mov	%r9, 8(rp)
-	ADCSBB	16(up), %r10
-	mov	%r10, 16(rp)
-	ADCSBB	24(up), %r11
-	mov	%r11, 24(rp)
-	ADCSBB	32(up), %r12
-	mov	%r12, 32(rp)
-	ADCSBB	40(up), %r13
-	mov	%r13, 40(rp)
-	ADCSBB	48(up), %r14
-	mov	%r14, 48(rp)
-	ADCSBB	56(up), %rbx
-	mov	%rbx, 56(rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	64(up), up
-L(x):	sub	$8, n
-	jge	L(top)
-
-L(end):	pop	%rbx
-	pop	%r14
-	pop	%r13
-	pop	%r12
-L(rtn):
-ifdef(`OPERATION_addlsh1_n',`
-	add	R32(%rbp), R32(%rax)
-	neg	R32(%rax)')
-ifdef(`OPERATION_rsblsh1_n',`
-	sub	R32(%rax), R32(%rbp)
-	movslq	R32(%rbp), %rax')
-
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbp
-	neg	%r8			C set CF
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/aorrlsh2_n.asm b/gmp/mpn/x86_64/atom/aorrlsh2_n.asm
deleted file mode 100644
index 02fb29dd74..0000000000
--- a/gmp/mpn/x86_64/atom/aorrlsh2_n.asm
+++ /dev/null
@@ -1,191 +0,0 @@
-dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
-dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
-dnl  Optimised for Intel Atom.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 5.75
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp',       `%rdx')
-define(`n',        `%rcx')
-
-define(`LSH', 2)
-define(`RSH', 62)
-define(M, eval(m4_lshift(1,LSH)))
-
-ifdef(`OPERATION_addlsh2_n', `
-  define(ADDSUB,	add)
-  define(ADCSBB,	adc)
-  define(func_n,	mpn_addlsh2_n)
-  define(func_nc,	mpn_addlsh2_nc)')
-ifdef(`OPERATION_rsblsh2_n', `
-  define(ADDSUB,	sub)
-  define(ADCSBB,	sbb)
-  define(func_n,	mpn_rsblsh2_n)
-  define(func_nc,	mpn_rsblsh2_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	jz	L(b0)			C we rely on rax = 0 at target
-	cmp	$2, R32(%rax)
-	mov	$0, R32(%rax)
-	jz	L(b2)
-	jg	L(b3)
-
-L(b1):	mov	(vp), %r9
-	lea	(%rax,%r9,M), %rbp
-	shr	$RSH, %r9
-	sub	$1, n
-	lea	-8(up), up
-	lea	-8(rp), rp
-	jz	L(cj1)
-	mov	8(vp), %r10
-	lea	(%r9,%r10,M), %r9
-	shr	$RSH, %r10
-	mov	16(vp), %r11
-	lea	24(vp), vp
-	mov	(vp), %r8
-	lea	(%r10,%r11,M), %r10
-	shr	$RSH, %r11
-	add	R32(%rax), R32(%rax)
-	jmp	L(L1)
-
-L(b2):	lea	-32(rp), rp
-	mov	(vp), %r8
-	lea	-32(up), up
-	lea	(%rax,%r8,M), %rbx
-	shr	$RSH, %r8
-	mov	8(vp), %r9
-	sub	$2, n
-	jle	L(end)
-	jmp	L(top)
-
-L(b3):	lea	-24(up), up
-	mov	(vp), %r11
-	lea	-24(rp), rp
-	mov	8(vp), %r8
-	lea	(%rax,%r11,M), %r10
-	shr	$RSH, %r11
-	lea	8(vp), vp
-	lea	(%r11,%r8,M), %rbx
-	add	$1, n
-	jmp	L(L3)
-
-L(b0):	lea	-16(up), up
-	mov	(vp), %r10
-	lea	(%rax,%r10,M), %r9
-	shr	$RSH, %r10
-	mov	8(vp), %r11
-	lea	-16(rp), rp
-	mov	16(vp), %r8
-	lea	(%r10,%r11,M), %r10
-	shr	$RSH, %r11
-	add	R32(%rax), R32(%rax)
-	lea	16(vp), vp
-	jmp	L(L0)
-
-	ALIGN(16)
-L(top):	lea	(%r8,%r9,M), %rbp
-	shr	$RSH, %r9
-	lea	32(up), up
-	mov	16(vp), %r10
-	lea	(%r9,%r10,M), %r9
-	shr	$RSH, %r10
-	mov	24(vp), %r11
-	lea	32(rp), rp
-	lea	32(vp), vp
-	mov	(vp), %r8
-	lea	(%r10,%r11,M), %r10
-	shr	$RSH, %r11
-	add	R32(%rax), R32(%rax)
-	ADCSBB	(up), %rbx
-	mov	%rbx, (rp)
-L(L1):	ADCSBB	8(up), %rbp
-	mov	%rbp, 8(rp)
-L(L0):	ADCSBB	16(up), %r9
-	lea	(%r11,%r8,M), %rbx
-	mov	%r9, 16(rp)
-L(L3):	ADCSBB	24(up), %r10
-	sbb	R32(%rax), R32(%rax)
-L(L2):	shr	$RSH, %r8
-	mov	8(vp), %r9
-	mov	%r10, 24(rp)
-	sub	$4, n
-	jg	L(top)
-
-L(end):	lea	(%r8,%r9,M), %rbp
-	shr	$RSH, %r9
-	lea	32(up), up
-	lea	32(rp), rp
-	add	R32(%rax), R32(%rax)
-	ADCSBB	(up), %rbx
-	mov	%rbx, (rp)
-L(cj1):	ADCSBB	8(up), %rbp
-	mov	%rbp, 8(rp)
-
-ifdef(`OPERATION_addlsh2_n',`
-	mov	R32(n), R32(%rax)	C zero rax
-	adc	%r9, %rax')
-ifdef(`OPERATION_rsblsh2_n',`
-	sbb	n, %r9			C subtract 0
-	mov	%r9, %rax')
-
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/aors_n.asm b/gmp/mpn/x86_64/atom/aors_n.asm
index 2c0b7b31a8..32c19424f0 100644
--- a/gmp/mpn/x86_64/atom/aors_n.asm
+++ b/gmp/mpn/x86_64/atom/aors_n.asm
@@ -1,37 +1,142 @@
 dnl  X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
 
-dnl  Copyright 2003-2005, 2007, 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+
+C	     cycles/limb
+C K8,K9:	 1.85
+C K10:		 ?
+C P4:		 ?
+C P6-15 (Core2): ?
+C P6-28 (Atom):	 3
+
+C INPUT PARAMETERS
+define(`rp',	`%rdi')
+define(`up',	`%rsi')
+define(`vp',	`%rdx')
+define(`n',	`%rcx')
+define(`cy',	`%r8')		C (only for mpn_add_nc)
+
+ifdef(`OPERATION_add_n', `
+	define(ADCSBB,	      adc)
+	define(func,	      mpn_add_n)
+	define(func_nc,	      mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+	define(ADCSBB,	      sbb)
+	define(func,	      mpn_sub_n)
+	define(func_nc,	      mpn_sub_nc)')
 
 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-include_mpn(`x86_64/coreisbr/aors_n.asm')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func_nc)
+	jmp	L(ent)
+EPILOGUE()
+PROLOGUE(func)
+	xor	%r8, %r8
+L(ent):
+	mov	R32(%rcx), R32(%rax)
+	shr	$2, %rcx
+	and	$3, R32(%rax)
+	jz	L(b0)
+	cmp	$2, R32(%rax)
+	jz	L(b2)
+	jg	L(b3)
+
+L(b1):	mov	(%rsi), %r10
+	test	R32(%rcx), R32(%rcx)
+	bt	$0, R32(%r8)
+	jnz	L(gt1)
+	ADCSBB	(%rdx), %r10
+	mov	%r10, (%rdi)
+	mov	R32(%rcx), R32(%rax)		C zero rax
+	adc	R32(%rax), R32(%rax)
+	ret
+L(gt1):	ADCSBB	(%rdx), %r10
+	mov	8(%rsi), %r11
+	lea	16(%rsi), %rsi
+	lea	-16(%rdx), %rdx
+	lea	-16(%rdi), %rdi
+	jmp	L(m1)
+
+L(b2):	mov	(%rsi), %r9
+	mov	8(%rsi), %r10
+	lea	-8(%rdx), %rdx
+	test	R32(%rcx), R32(%rcx)
+	bt	$0, R32(%r8)
+	jnz	L(gt2)
+	lea	-40(%rdi), %rdi
+	jmp	L(e2)
+L(gt2):	ADCSBB	8(%rdx), %r9
+	mov	16(%rsi), %r11
+	lea	-8(%rsi), %rsi
+	lea	-8(%rdi), %rdi
+	jmp	L(m2)
+
+L(b3):	mov	(%rsi), %rax
+	mov	8(%rsi), %r9
+	mov	16(%rsi), %r10
+	test	R32(%rcx), R32(%rcx)
+	bt	$0, %r8
+	jnz	L(gt3)
+	lea	-32(%rdi), %rdi
+	jmp	L(e3)
+L(gt3):	ADCSBB	(%rdx), %rax
+	jmp	L(m3)
+
+L(b0):	mov	(%rsi), %r11
+	neg	R32(%r8)
+	lea	-24(%rdx), %rdx
+	lea	-24(%rdi), %rdi
+	lea	8(%rsi), %rsi
+	jmp	L(m0)
+
+	ALIGN(8)
+L(top):	mov	%r11, 24(%rdi)
+	ADCSBB	(%rdx), %rax
+	lea	32(%rdi), %rdi
+L(m3):	mov	%rax, (%rdi)
+	ADCSBB	8(%rdx), %r9
+	mov	24(%rsi), %r11
+L(m2):	mov	%r9, 8(%rdi)
+	ADCSBB	16(%rdx), %r10
+	lea	32(%rsi), %rsi
+L(m1):	mov	%r10, 16(%rdi)
+L(m0):	ADCSBB	24(%rdx), %r11
+	mov	(%rsi), %rax
+	mov	8(%rsi), %r9
+	lea	32(%rdx), %rdx
+	dec	%rcx
+	mov	16(%rsi), %r10
+	jnz	L(top)
+
+	mov	%r11, 24(%rdi)
+L(e3):	ADCSBB	(%rdx), %rax
+	mov	%rax, 32(%rdi)
+L(e2):	ADCSBB	8(%rdx), %r9
+	mov	%r9, 40(%rdi)
+L(e1):	ADCSBB	16(%rdx), %r10
+	mov	%r10, 48(%rdi)
+	mov	R32(%rcx), R32(%rax)		C zero rax
+	adc	R32(%rax), R32(%rax)
+	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/aorsmul_1.asm b/gmp/mpn/x86_64/atom/aorsmul_1.asm
deleted file mode 100644
index e95315347c..0000000000
--- a/gmp/mpn/x86_64/atom/aorsmul_1.asm
+++ /dev/null
@@ -1,190 +0,0 @@
-dnl  AMD64 mpn_addmul_1/mpn_submul_1 optimised for Intel Atom.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom	19.37		this
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',       `%rbx')
-
-ifdef(`OPERATION_addmul_1',`
-  define(`ADDSUB', `add')
-  define(`func',   `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-  define(`ADDSUB', `sub')
-  define(`func',   `mpn_submul_1')
-')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	push	%rbx
-
-	mov	(up), %rax
-	lea	-8(up,n_param,8), up
-	lea	-16(rp,n_param,8), rp
-
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n_param)
-	jnz	L(b10)
-
-L(b00):	mov	$1, R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	%rax, %r11
-	mov	8(up,n,8), %rax
-	mov	%rdx, %r10
-	mul	v0
-	mov	%rax, %r8
-	mov	16(up,n,8), %rax
-	jmp	L(lo0)
-
-L(b10):	mov	$3, R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	%rax, %r11
-	mov	-8(up,n,8), %rax
-	mov	%rdx, %r10
-	mul	v0
-	test	n, n
-	jns	L(cj2)
-	mov	%rax, %r8
-	mov	(up,n,8), %rax
-	mov	%rdx, %r9
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n_param)
-	jnz	L(b11)
-
-L(b01):	mov	$2, R32(n)
-	sub	n_param, n
-	mul	v0
-	test	n, n
-	jns	L(cj1)
-	mov	%rax, %r8
-	mov	(up,n,8), %rax
-	mov	%rdx, %r9
-	mul	v0
-	mov	%rax, %r11
-	mov	8(up,n,8), %rax
-	mov	%rdx, %r10
-	jmp	L(lo1)
-
-L(b11):	xor	R32(n), R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	%rax, %r8
-	mov	16(up,n,8), %rax
-	mov	%rdx, %r9
-	mul	v0
-	mov	%rax, %r11
-	mov	24(up,n,8), %rax
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):	mul	v0
-	ADDSUB	%r8, -16(rp,n,8)
-	mov	%rax, %r8
-	mov	(up,n,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-L(lo2):	mul	v0
-	ADDSUB	%r11, -8(rp,n,8)
-	mov	%rax, %r11
-	mov	8(up,n,8), %rax
-	adc	%r10, %r8
-	mov	%rdx, %r10
-	adc	$0, %r9
-L(lo1):	mul	v0
-	ADDSUB	%r8, (rp,n,8)
-	mov	%rax, %r8
-	adc	%r9, %r11
-	mov	16(up,n,8), %rax
-	adc	$0, %r10
-L(lo0):	mov	%rdx, %r9
-	mul	v0
-	ADDSUB	%r11, 8(rp,n,8)
-	mov	%rax, %r11
-	adc	%r10, %r8
-	mov	24(up,n,8), %rax
-	adc	$0, %r9
-L(lo3):	add	$4, n
-	mov	%rdx, %r10
-	js	L(top)
-
-L(end):	mul	v0
-	ADDSUB	%r8, -16(rp,n,8)
-	adc	%r9, %r11
-	adc	$0, %r10
-L(cj2):	ADDSUB	%r11, -8(rp,n,8)
-	adc	%r10, %rax
-	adc	$0, %rdx
-L(cj1):	ADDSUB	%rax, (rp,n,8)
-	mov	$0, R32(%rax)
-	adc	%rdx, %rax
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/atom/com.asm b/gmp/mpn/x86_64/atom/com.asm
deleted file mode 100644
index 6b6460fffe..0000000000
--- a/gmp/mpn/x86_64/atom/com.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_com optimised for Intel Atom.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_com)
-include_mpn(`x86_64/fastsse/com-palignr.asm')
diff --git a/gmp/mpn/x86_64/atom/copyd.asm b/gmp/mpn/x86_64/atom/copyd.asm
deleted file mode 100644
index e3092794c0..0000000000
--- a/gmp/mpn/x86_64/atom/copyd.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyd optimised for Intel Atom.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyd)
-include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/gmp/mpn/x86_64/atom/copyi.asm b/gmp/mpn/x86_64/atom/copyi.asm
deleted file mode 100644
index 00ec3c23c6..0000000000
--- a/gmp/mpn/x86_64/atom/copyi.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyi optimised for Intel Atom.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyi)
-include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/gmp/mpn/x86_64/atom/dive_1.asm b/gmp/mpn/x86_64/atom/dive_1.asm
deleted file mode 100644
index d9ba5fe6f0..0000000000
--- a/gmp/mpn/x86_64/atom/dive_1.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_divexact_1)
-include_mpn(`x86_64/nano/dive_1.asm')
diff --git a/gmp/mpn/x86_64/atom/gmp-mparam.h b/gmp/mpn/x86_64/atom/gmp-mparam.h
index 6816dfc362..f06dab4556 100644
--- a/gmp/mpn/x86_64/atom/gmp-mparam.h
+++ b/gmp/mpn/x86_64/atom/gmp-mparam.h
@@ -1,220 +1,76 @@
-/* Intel Atom/64 gmp-mparam.h -- Compiler/machine parameter header file.
+/* Inte Atom gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
-or both in parallel, as here.
+/* Generated by tuneup.c, 2009-01-14, gcc 4.2 */
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-#define SHLD_SLOW 1
-#define SHRD_SLOW 1
-
-/* 1667 MHz Pineview (Atom D510) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD     MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     12
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           15
-
-#define MUL_TOOM22_THRESHOLD                12
-#define MUL_TOOM33_THRESHOLD                74
-#define MUL_TOOM44_THRESHOLD               118
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               212
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      84
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      80
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD      58
-
-#define SQR_BASECASE_THRESHOLD               6
-#define SQR_TOOM2_THRESHOLD                 23
-#define SQR_TOOM3_THRESHOLD                 49
-#define SQR_TOOM4_THRESHOLD                130
-#define SQR_TOOM6_THRESHOLD                173
-#define SQR_TOOM8_THRESHOLD                238
-
-#define MULMID_TOOM42_THRESHOLD             16
-
-#define MULMOD_BNM1_THRESHOLD               10
-#define SQRMOD_BNM1_THRESHOLD               12
-
-#define MUL_FFT_MODF_THRESHOLD             252  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    252, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {      7, 5}, {     15, 6}, {     13, 7}, {      7, 6}, \
-    {     15, 7}, {      8, 6}, {     17, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     19, 8}, {     11, 7}, {     23, 8}, \
-    {     13, 9}, {      7, 8}, {     15, 7}, {     31, 8}, \
-    {     19, 9}, {     11, 8}, {     25, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     39, 9}, {     23,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511, 9}, {    287, 8}, {    575,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    175, 9}, {    351, 8}, \
-    {    703,11}, {     95,10}, {    191, 9}, {    383, 8}, \
-    {    767,10}, {    207, 9}, {    415,10}, {    223,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1151,10}, \
-    {    319, 9}, {    639,11}, {    175,10}, {    351, 9}, \
-    {    703, 8}, {   1407, 7}, {   2815,10}, {    383,11}, \
-    {    207,10}, {    415,11}, {    223,10}, {    447,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    287,10}, {    575, 9}, {   1151,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    415,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    511,12}, {    287,11}, {    575,10}, \
-    {   1151,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,10}, {   1407,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    575,11}, {   1151,13}, \
-    {    319,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    831,13}, {    447,12}, {    895,11}, {   1791,14}, \
-    {    255,13}, {    511,12}, {   1023,13}, {    575,12}, \
-    {   1151,13}, {    703,12}, {   1407,14}, {    383,13}, \
-    {    831,12}, {   1663,13}, {    895,12}, {   1791,15}, \
-    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1151,14}, {    639,13}, {   1407,12}, {   2815,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1919,12}, \
-    {   3839,15}, {    511,14}, {   1023,13}, {   2175,14}, \
-    {   1151,13}, {   2431,14}, {   1407,13}, {   2815,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 185
-#define MUL_FFT_THRESHOLD                 2240
-
-#define SQR_FFT_MODF_THRESHOLD             208  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    208, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {     13, 8}, \
-    {      7, 7}, {     17, 8}, {      9, 7}, {     19, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
-    {     19, 9}, {     11, 8}, {     23,10}, {      7, 9}, \
-    {     15, 8}, {     31, 9}, {     23,10}, {     15, 9}, \
-    {     39,10}, {     23,11}, {     15,10}, {     31, 9}, \
-    {     63, 8}, {    127,10}, {     39, 9}, {     79, 8}, \
-    {    159,10}, {     47, 8}, {    191,10}, {     55,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
-    {     71, 9}, {    143, 8}, {    287, 7}, {    575,10}, \
-    {     79, 9}, {    159,11}, {     47, 9}, {    191,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    143, 9}, {    287, 8}, {    575,10}, \
-    {    159, 9}, {    319, 8}, {    639,10}, {    175, 9}, \
-    {    351, 8}, {    703,10}, {    191, 9}, {    383,10}, \
-    {    207,11}, {    111,10}, {    223, 9}, {    447,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,11}, \
-    {    143,10}, {    287, 9}, {    575,11}, {    159,10}, \
-    {    319, 9}, {    639,11}, {    175,10}, {    351, 9}, \
-    {    703,11}, {    191,10}, {    383,11}, {    207,10}, \
-    {    415,11}, {    223,10}, {    447,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    287,10}, \
-    {    575,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,12}, {    223,11}, {    447,10}, \
-    {    895,13}, {    127,12}, {    255,11}, {    511,12}, \
-    {    287,11}, {    575,12}, {    319,11}, {    639,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    575,11}, {   1151,13}, {    319,12}, \
-    {    703,11}, {   1407,13}, {    383,12}, {    831,13}, \
-    {    447,12}, {    895,14}, {    255,13}, {    511,12}, \
-    {   1023,13}, {    575,12}, {   1151,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    895,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1151,14}, {    639,13}, {   1407,12}, \
-    {   2815,14}, {    767,13}, {   1663,14}, {    895,13}, \
-    {   1791,12}, {   3583,15}, {    511,14}, {   1023,13}, \
-    {   2047,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1407,13}, {   2815,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 175
-#define SQR_FFT_THRESHOLD                 1600
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  34
-#define MULLO_MUL_N_THRESHOLD             4392
-
-#define DC_DIV_QR_THRESHOLD                 32
-#define DC_DIVAPPR_Q_THRESHOLD             122
-#define DC_BDIV_QR_THRESHOLD                35
-#define DC_BDIV_Q_THRESHOLD                 76
-
-#define INV_MULMOD_BNM1_THRESHOLD           22
-#define INV_NEWTON_THRESHOLD               163
-#define INV_APPR_THRESHOLD                 134
-
-#define BINV_NEWTON_THRESHOLD              179
-#define REDC_1_TO_REDC_2_THRESHOLD          17
-#define REDC_2_TO_REDC_N_THRESHOLD          43
-
-#define MU_DIV_QR_THRESHOLD                855
-#define MU_DIVAPPR_Q_THRESHOLD             872
-#define MUPI_DIV_QR_THRESHOLD               83
-#define MU_BDIV_QR_THRESHOLD               748
-#define MU_BDIV_Q_THRESHOLD                807
-
-#define POWM_SEC_TABLE  1,16,114,452,1603
-
-#define MATRIX22_STRASSEN_THRESHOLD         13
-#define HGCD_THRESHOLD                     102
-#define HGCD_APPR_THRESHOLD                 95
-#define HGCD_REDUCE_THRESHOLD             1329
-#define GCD_DC_THRESHOLD                   268
-#define GCDEXT_DC_THRESHOLD                221
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        26
-#define SET_STR_DC_THRESHOLD               418
-#define SET_STR_PRECOMPUTE_THRESHOLD      1420
-
-#define FAC_DSC_THRESHOLD                 1065
-#define FAC_ODD_THRESHOLD                    0  /* always */
+#define MUL_KARATSUBA_THRESHOLD          10
+#define MUL_TOOM3_THRESHOLD              66
+#define MUL_TOOM44_THRESHOLD            118
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          18
+#define SQR_TOOM3_THRESHOLD              98
+#define SQR_TOOM4_THRESHOLD             166
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              24
+#define MULLOW_MUL_N_THRESHOLD          170
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 30
+#define POWM_THRESHOLD                   48
+
+#define MATRIX22_STRASSEN_THRESHOLD      17
+#define HGCD_THRESHOLD                   86
+#define GCD_DC_THRESHOLD                196
+#define GCDEXT_DC_THRESHOLD             236
+#define JACOBI_BASE_METHOD                3
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 8
+#define MOD_1_2_THRESHOLD                 9
+#define MOD_1_4_THRESHOLD                24
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             19
+#define GET_STR_PRECOMPUTE_THRESHOLD     35
+#define SET_STR_DC_THRESHOLD            268
+#define SET_STR_PRECOMPUTE_THRESHOLD   1142
+
+#define MUL_FFT_TABLE  { 272, 544, 1088, 1792, 5120, 12288, 49152, 196608, 786432, 0 }
+#define MUL_FFT_MODF_THRESHOLD          240
+#define MUL_FFT_THRESHOLD              1408
+
+#define SQR_FFT_TABLE  { 240, 544, 1216, 2304, 5120, 12288, 49152, 196608, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD          240
+#define SQR_FFT_THRESHOLD              1408
+
+/* These tables need to be updated.  */
+
+#define MUL_FFT_TABLE2 {{1,4}, {209,5}, {417,6}, {961,7}, {2177,8}, {4865,9}, {5633,8}, {6401,9}, {7681,8}, {8449,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {24065,11}, {30721,10}, {48129,11}, {63489,10}, {81409,11}, {96257,9}, {106497,10}, {107521,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {140289,9}, {140801,10}, {147201,11}, {161793,10}, {212481,11}, {228865,12}, {258049,11}, {457729,13}, {516097,12}, {520193,11}, {588801,12}, {651265,11}, {719873,12}, {782337,11}, {849921,12}, {916481,13}, {1040385,12}, {1439745,13}, {1564673,12}, {1830913,11}, {1832961,13}, {1835009,14}, {MP_SIZE_T_MAX, 0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {177,5}, {353,6}, {833,7}, {2177,8}, {4865,9}, {5633,8}, {6401,10}, {7169,9}, {11777,10}, {15361,9}, {19969,10}, {23553,9}, {24065,11}, {30721,10}, {48129,11}, {63489,10}, {65537,9}, {73217,8}, {73601,9}, {80129,10}, {80897,9}, {81665,11}, {96257,9}, {97793,8}, {98817,10}, {99329,12}, {126977,10}, {130049,9}, {131073,10}, {143361,9}, {144385,10}, {151041,9}, {151553,10}, {154113,9}, {154625,10}, {157697,9}, {159745,10}, {195585,9}, {196609,11}, {206849,10}, {207873,11}, {220161,10}, {222209,11}, {228865,12}, {258049,11}, {272385,10}, {274433,11}, {276481,10}, {277505,11}, {280577,10}, {282625,11}, {391169,10}, {397313,11}, {401409,10}, {423937,11}, {457729,13}, {516097,12}, {520193,11}, {588801,12}, {651265,11}, {718849,12}, {782337,11}, {845825,12}, {915457,13}, {1040385,12}, {1437697,13}, {1564673,12}, {1830913,14}, {MP_SIZE_T_MAX, 0}}
diff --git a/gmp/mpn/x86_64/atom/lshift.asm b/gmp/mpn/x86_64/atom/lshift.asm
deleted file mode 100644
index 1b37d5dccf..0000000000
--- a/gmp/mpn/x86_64/atom/lshift.asm
+++ /dev/null
@@ -1,123 +0,0 @@
-dnl  AMD64 mpn_lshift -- mpn left shift, optimised for Atom.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 4.5
-C VIA nano	 ?
-
-C TODO
-C  * Consider using 4-way unrolling.  We reach 4 c/l, but the code is 2.5 times
-C    larger.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_lshift)
-	FUNC_ENTRY(4)
-	lea	-8(up,n,8), up
-	lea	-8(rp,n,8), rp
-	shr	R32(n)
-	mov	(up), %rax
-	jnc	L(evn)
-
-	mov	%rax, %r11
-	shl	R8(%rcx), %r11
-	neg	R8(%rcx)
-	shr	R8(%rcx), %rax
-	test	n, n
-	jnz	L(gt1)
-	mov	%r11, (rp)
-	FUNC_EXIT()
-	ret
-
-L(gt1):	mov	-8(up), %r8
-	mov	%r8, %r10
-	shr	R8(%rcx), %r8
-	jmp	L(lo1)
-
-L(evn):	mov	%rax, %r10
-	neg	R8(%rcx)
-	shr	R8(%rcx), %rax
-	mov	-8(up), %r9
-	mov	%r9, %r11
-	shr	R8(%rcx), %r9
-	neg	R8(%rcx)
-	dec	n
-	lea	8(rp), rp
-	lea	-8(up), up
-	jz	L(end)
-
-	ALIGN(8)
-L(top):	shl	R8(%rcx), %r10
-	or	%r10, %r9
-	shl	R8(%rcx), %r11
-	neg	R8(%rcx)
-	mov	-8(up), %r8
-	mov	%r8, %r10
-	mov	%r9, -8(rp)
-	shr	R8(%rcx), %r8
-	lea	-16(rp), rp
-L(lo1):	mov	-16(up), %r9
-	or	%r11, %r8
-	mov	%r9, %r11
-	shr	R8(%rcx), %r9
-	lea	-16(up), up
-	neg	R8(%rcx)
-	mov	%r8, (rp)
-	dec	n
-	jg	L(top)
-
-L(end):	shl	R8(%rcx), %r10
-	or	%r10, %r9
-	shl	R8(%rcx), %r11
-	mov	%r9, -8(rp)
-	mov	%r11, -16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/lshiftc.asm b/gmp/mpn/x86_64/atom/lshiftc.asm
deleted file mode 100644
index 7385f8fd44..0000000000
--- a/gmp/mpn/x86_64/atom/lshiftc.asm
+++ /dev/null
@@ -1,127 +0,0 @@
-dnl  AMD64 mpn_lshiftc -- mpn left shift with complement, optimised for Atom.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 5
-C VIA nano	 ?
-
-C TODO
-C  * Consider using 4-way unrolling.  We reach 4.5 c/l, but the code is 2.5
-C    times larger.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_lshiftc)
-	FUNC_ENTRY(4)
-	lea	-8(up,n,8), up
-	lea	-8(rp,n,8), rp
-	shr	R32(n)
-	mov	(up), %rax
-	jnc	L(evn)
-
-	mov	%rax, %r11
-	shl	R8(%rcx), %r11
-	neg	R8(%rcx)
-	shr	R8(%rcx), %rax
-	test	n, n
-	jnz	L(gt1)
-	not	%r11
-	mov	%r11, (rp)
-	FUNC_EXIT()
-	ret
-
-L(gt1):	mov	-8(up), %r8
-	mov	%r8, %r10
-	shr	R8(%rcx), %r8
-	jmp	L(lo1)
-
-L(evn):	mov	%rax, %r10
-	neg	R8(%rcx)
-	shr	R8(%rcx), %rax
-	mov	-8(up), %r9
-	mov	%r9, %r11
-	shr	R8(%rcx), %r9
-	neg	R8(%rcx)
-	lea	8(rp), rp
-	lea	-8(up), up
-	jmp	L(lo0)
-
-C	ALIGN(16)
-L(top):	shl	R8(%rcx), %r10
-	or	%r10, %r9
-	shl	R8(%rcx), %r11
-	not	%r9
-	neg	R8(%rcx)
-	mov	-8(up), %r8
-	lea	-16(rp), rp
-	mov	%r8, %r10
-	shr	R8(%rcx), %r8
-	mov	%r9, 8(rp)
-L(lo1):	or	%r11, %r8
-	mov	-16(up), %r9
-	mov	%r9, %r11
-	shr	R8(%rcx), %r9
-	lea	-16(up), up
-	neg	R8(%rcx)
-	not	%r8
-	mov	%r8, (rp)
-L(lo0):	dec	n
-	jg	L(top)
-
-L(end):	shl	R8(%rcx), %r10
-	or	%r10, %r9
-	not	%r9
-	shl	R8(%rcx), %r11
-	not	%r11
-	mov	%r9, -8(rp)
-	mov	%r11, -16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/mul_1.asm b/gmp/mpn/x86_64/atom/mul_1.asm
deleted file mode 100644
index d76a3d3b8c..0000000000
--- a/gmp/mpn/x86_64/atom/mul_1.asm
+++ /dev/null
@@ -1,143 +0,0 @@
-dnl  AMD64 mpn_mul_1 optimised for Intel Atom.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom	17.3		this
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',       `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_1)
-	FUNC_ENTRY(4)
-	xor	%r8, %r8
-L(com):	mov	(up), %rax
-	lea	-16(up,n_param,8), up
-	lea	-8(rp,n_param,8), rp
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	mov	%r8, %r9
-	test	$2, R8(n_param)
-	jnz	L(b10)
-
-L(b00):	mov	$2, R32(n)
-	sub	n_param, n
-	jmp	L(lo0)
-
-L(bx1):	test	$2, R8(n_param)
-	jnz	L(b11)
-
-L(b01):	mov	$3, R32(n)
-	sub	n_param, n
-	mul	v0
-	cmp	$2, n
-	jnz	L(lo1)
-	jmp	L(cj1)
-
-L(b11):	mov	$1, R32(n)
-	sub	n_param, n
-	jmp	L(lo3)
-
-L(b10):	xor	R32(n), R32(n)
-	sub	n_param, n
-	jmp	L(lo2)
-
-L(top):	mul	v0
-	mov	%r9, -24(rp,n,8)
-L(lo1):	xor	%r9d, %r9d
-	add	%rax, %r8
-	mov	(up,n,8), %rax
-	adc	%rdx, %r9
-	mov	%r8, -16(rp,n,8)
-L(lo0):	xor	%r8d, %r8d
-	mul	v0
-	add	%rax, %r9
-	mov	8(up,n,8), %rax
-	adc	%rdx, %r8
-	mov	%r9, -8(rp,n,8)
-L(lo3):	xor	%r9d, %r9d
-	mul	v0
-	add	%rax, %r8
-	mov	16(up,n,8), %rax
-	adc	%rdx, %r9
-	mov	%r8, (rp,n,8)
-L(lo2):	xor	%r8d, %r8d
-	mul	v0
-	add	%rax, %r9
-	mov	24(up,n,8), %rax
-	adc	%rdx, %r8
-	add	$4, n
-	js	L(top)
-
-L(end):	mul	v0
-	mov	%r9, -8(rp)
-L(cj1):	add	%rax, %r8
-	mov	$0, R32(%rax)
-	adc	%rdx, %rax
-	mov	%r8, (rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-
-PROLOGUE(mpn_mul_1c)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	jmp	L(com)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/atom/mul_2.asm b/gmp/mpn/x86_64/atom/mul_2.asm
deleted file mode 100644
index f3fc3afdd1..0000000000
--- a/gmp/mpn/x86_64/atom/mul_2.asm
+++ /dev/null
@@ -1,186 +0,0 @@
-dnl  AMD64 mpn_mul_2 optimised for Intel Atom.
-
-dnl  Copyright 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bd2
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel PNR
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom	17.75		this
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vp',      `%rcx')   C r9
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%rbx')
-define(`w1', `%rcx')
-define(`w2', `%rbp')
-define(`w3', `%r10')
-define(`n',  `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(up), %rax
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	mov	n_param, n
-	mul	v0
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b10)
-
-L(b00):	mov	%rax, w0
-	mov	(up), %rax
-	mov	%rdx, w1
-	xor	R32(w2), R32(w2)
-	lea	-8(rp), rp
-	jmp	L(lo0)
-
-L(b10):	mov	%rax, w2
-	mov	(up), %rax
-	mov	%rdx, w3
-	xor	R32(w0), R32(w0)
-	lea	-16(up), up
-	lea	-24(rp), rp
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n)
-	jnz	L(b11)
-
-L(b01):	mov	%rax, w3
-	mov	%rdx, w0
-	mov	(up), %rax
-	xor	R32(w1), R32(w1)
-	lea	8(up), up
-	dec	n
-	jmp	L(lo1)
-
-L(b11):	mov	%rax, w1
-	mov	(up), %rax
-	mov	%rdx, w2
-	xor	R32(w3), R32(w3)
-	lea	-8(up), up
-	lea	-16(rp), rp
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):
-L(lo1):	mul	v1
-	add	%rax, w0
-	mov	(up), %rax
-	mov	$0, R32(w2)
-	mov	w3, (rp)
-	adc	%rdx, w1
-	mul	v0
-	add	%rax, w0
-	mov	(up), %rax
-	adc	%rdx, w1
-	adc	$0, R32(w2)
-L(lo0):	mul	v1
-	add	%rax, w1
-	mov	8(up), %rax
-	mov	w0, 8(rp)
-	adc	%rdx, w2
-	mul	v0
-	add	%rax, w1
-	mov	8(up), %rax
-	adc	%rdx, w2
-	mov	$0, R32(w3)
-	adc	$0, R32(w3)
-L(lo3):	mul	v1
-	add	%rax, w2
-	mov	16(up), %rax
-	mov	w1, 16(rp)
-	mov	$0, R32(w0)
-	adc	%rdx, w3
-	mul	v0
-	add	%rax, w2
-	mov	16(up), %rax
-	adc	%rdx, w3
-L(lo2):	mov	$0, R32(w1)
-	mov	w2, 24(rp)
-	adc	$0, R32(w0)
-	mul	v1
-	add	%rax, w3
-	mov	24(up), %rax
-	lea	32(up), up
-	adc	%rdx, w0
-	mul	v0
-	lea	32(rp), rp
-	add	%rax, w3
-	adc	%rdx, w0
-	mov	-8(up), %rax
-	adc	$0, R32(w1)
-	sub	$4, n
-	ja	L(top)
-
-L(end):	mul	v1
-	mov	w3, (rp)
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	w0, 8(rp)
-	mov	w1, %rax
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/popcount.asm b/gmp/mpn/x86_64/atom/popcount.asm
deleted file mode 100644
index fb14dd3d31..0000000000
--- a/gmp/mpn/x86_64/atom/popcount.asm
+++ /dev/null
@@ -1,35 +0,0 @@
-dnl  x86-64 mpn_popcount.
-
-dnl  Copyright 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_popcount)
-include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86_64/atom/redc_1.asm b/gmp/mpn/x86_64/atom/redc_1.asm
deleted file mode 100644
index d93c19fdc0..0000000000
--- a/gmp/mpn/x86_64/atom/redc_1.asm
+++ /dev/null
@@ -1,574 +0,0 @@
-dnl  X86-64 mpn_redc_1 optimised for Intel Atom.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bull	 ?
-C AMD pile	 ?
-C AMD steam	 ?
-C AMD bobcat	 5.0
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel IBR	 ?
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C TODO
-C  * Micro-optimise, none performed thus far.
-C  * Consider inlining mpn_add_n.
-C  * Single basecases out before the pushes.
-C  * Make lead-in code for the inner loops be more similar.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv',       `%r8')    C stack
-
-define(`i',           `%r14')
-define(`j',           `%r15')
-define(`mp',          `%r12')
-define(`q0',          `%r13')
-define(`w0',          `%rbp')
-define(`w1',          `%r9')
-define(`w2',          `%r10')
-define(`w3',          `%r11')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), q0
-	mov	n, j			C outer loop induction var
-	lea	(mp_param,n,8), mp
-	lea	(up,n,8), up
-	neg	n
-	imul	u0inv, q0		C first iteration q0
-
-	test	$1, R8(n)
-	jz	L(bx0)
-
-L(bx1):	test	$2, R8(n)
-	jz	L(b3)
-
-L(b1):	cmp	$-1, R32(n)
-	jz	L(n1)
-
-L(otp1):lea	1(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %rbp
-	mov	8(mp,n,8), %rax
-	mov	%rdx, %r9
-	mul	q0
-	mov	%rax, %rbx
-	mov	16(mp,n,8), %rax
-	mov	%rdx, %r10
-	mul	q0
-	add	(up,n,8), %rbp
-	mov	%rax, %rbp
-	adc	%r9, %rbx
-	mov	24(mp,n,8), %rax
-	adc	$0, %r10
-	mov	%rdx, %r9
-	mul	q0
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	mov	%rax, %r11
-	adc	%r10, %rbp
-	mov	32(mp,n,8), %rax
-	adc	$0, %r9
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e1)
-
-	ALIGNx
-L(tp1):	mul	q0
-	add	%rbp, -24(up,i,8)
-	mov	%rax, %rbp
-	mov	(mp,i,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-	mul	q0
-	add	%r11, -16(up,i,8)
-	mov	%rax, %r11
-	mov	8(mp,i,8), %rax
-	adc	%r10, %rbp
-	mov	%rdx, %r10
-	adc	$0, %r9
-	mul	q0
-	add	%rbp, -8(up,i,8)
-	mov	%rax, %rbp
-	adc	%r9, %r11
-	mov	16(mp,i,8), %rax
-	adc	$0, %r10
-	mov	%rdx, %r9
-	mul	q0
-	add	%r11, (up,i,8)
-	mov	%rax, %r11
-	adc	%r10, %rbp
-	mov	24(mp,i,8), %rax
-	adc	$0, %r9
-L(e1):	add	$4, i
-	mov	%rdx, %r10
-	js	L(tp1)
-
-L(ed1):	mul	q0
-	add	%rbp, I(-24(up),-24(up,i,8))
-	adc	%r9, %r11
-	adc	$0, %r10
-	add	%r11, I(-16(up),-16(up,i,8))
-	adc	%r10, %rax
-	adc	$0, %rdx
-	add	%rax, I(-8(up),-8(up,i,8))
-	adc	$0, %rdx
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp1)
-	jmp	L(cj)
-
-L(b3):	cmp	$-3, R32(n)
-	jz	L(n3)
-
-L(otp3):lea	3(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %rbp
-	mov	8(mp,n,8), %rax
-	mov	%rdx, %r9
-	mul	q0
-	mov	%rax, %rbx
-	mov	16(mp,n,8), %rax
-	mov	%rdx, %r10
-	mul	q0
-	add	(up,n,8), %rbp
-	mov	%rax, %rbp
-	mov	24(mp,n,8), %rax
-	adc	%r9, %rbx
-	mov	%rdx, %r9
-	adc	$0, %r10
-	mul	q0
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	mov	%rax, %r11
-	mov	32(mp,n,8), %rax
-	adc	%r10, %rbp
-	mov	%rdx, %r10
-	adc	$0, %r9
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e3)
-
-	ALIGNx
-L(tp3):	mul	q0
-	add	%rbp, -24(up,i,8)
-	mov	%rax, %rbp
-	mov	(mp,i,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-	mul	q0
-	add	%r11, -16(up,i,8)
-	mov	%rax, %r11
-	mov	8(mp,i,8), %rax
-	adc	%r10, %rbp
-	mov	%rdx, %r10
-	adc	$0, %r9
-L(e3):	mul	q0
-	add	%rbp, -8(up,i,8)
-	mov	%rax, %rbp
-	adc	%r9, %r11
-	mov	16(mp,i,8), %rax
-	adc	$0, %r10
-	mov	%rdx, %r9
-	mul	q0
-	add	%r11, (up,i,8)
-	mov	%rax, %r11
-	adc	%r10, %rbp
-	mov	24(mp,i,8), %rax
-	adc	$0, %r9
-	add	$4, i
-	mov	%rdx, %r10
-	js	L(tp3)
-
-L(ed3):	mul	q0
-	add	%rbp, I(-24(up),-24(up,i,8))
-	adc	%r9, %r11
-	adc	$0, %r10
-	add	%r11, I(-16(up),-16(up,i,8))
-	adc	%r10, %rax
-	adc	$0, %rdx
-	add	%rax, I(-8(up),-8(up,i,8))
-	adc	$0, %rdx
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp3)
-C	jmp	L(cj)
-
-L(cj):
-IFSTD(`	lea	(up,n,8), up		C param 2: up
-	lea	(up,n,8), %rdx		C param 3: up - n
-	neg	R32(n)		')	C param 4: n
-
-IFDOS(`	lea	(up,n,8), %rdx		C param 2: up
-	lea	(%rdx,n,8), %r8		C param 3: up - n
-	neg	R32(n)
-	mov	n, %r9			C param 4: n
-	mov	rp, %rcx	')	C param 1: rp
-
-	CALL(	mpn_add_n)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b2)
-
-L(b0):	cmp	$-4, R32(n)
-	jz	L(n4)
-
-L(otp0):lea	4(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %r11
-	mov	8(mp,n,8), %rax
-	mov	%rdx, %r10
-	mul	q0
-	mov	%rax, %rbx
-	mov	16(mp,n,8), %rax
-	mov	%rdx, %r9
-	mul	q0
-	add	(up,n,8), %r11
-	mov	%rax, %r11
-	adc	%r10, %rbx
-	mov	24(mp,n,8), %rax
-	adc	$0, %r9
-	mov	%rdx, %r10
-	mul	q0
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	mov	%rax, %rbp
-	mov	32(mp,n,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e0)
-
-	ALIGNx
-L(tp0):	mul	q0
-	add	%rbp, -24(up,i,8)
-	mov	%rax, %rbp
-	mov	(mp,i,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-L(e0):	mul	q0
-	add	%r11, -16(up,i,8)
-	mov	%rax, %r11
-	mov	8(mp,i,8), %rax
-	adc	%r10, %rbp
-	mov	%rdx, %r10
-	adc	$0, %r9
-	mul	q0
-	add	%rbp, -8(up,i,8)
-	mov	%rax, %rbp
-	adc	%r9, %r11
-	mov	16(mp,i,8), %rax
-	adc	$0, %r10
-	mov	%rdx, %r9
-	mul	q0
-	add	%r11, (up,i,8)
-	mov	%rax, %r11
-	adc	%r10, %rbp
-	mov	24(mp,i,8), %rax
-	adc	$0, %r9
-	add	$4, i
-	mov	%rdx, %r10
-	js	L(tp0)
-
-L(ed0):	mul	q0
-	add	%rbp, I(-24(up),-24(up,i,8))
-	adc	%r9, %r11
-	adc	$0, %r10
-	add	%r11, I(-16(up),-16(up,i,8))
-	adc	%r10, %rax
-	adc	$0, %rdx
-	add	%rax, I(-8(up),-8(up,i,8))
-	adc	$0, %rdx
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp0)
-	jmp	L(cj)
-
-L(b2):	cmp	$-2, R32(n)
-	jz	L(n2)
-
-L(otp2):lea	2(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %r11
-	mov	8(mp,n,8), %rax
-	mov	%rdx, %r10
-	mul	q0
-	mov	%rax, %rbx
-	mov	16(mp,n,8), %rax
-	mov	%rdx, %r9
-	mul	q0
-	add	(up,n,8), %r11
-	mov	%rax, %r11
-	adc	%r10, %rbx
-	mov	24(mp,n,8), %rax
-	adc	$0, %r9
-	mov	%rdx, %r10
-	mul	q0
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	mov	%rax, %rbp
-	mov	32(mp,n,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e2)
-
-	ALIGNx
-L(tp2):	mul	q0
-	add	%rbp, -24(up,i,8)
-	mov	%rax, %rbp
-	mov	(mp,i,8), %rax
-	adc	%r9, %r11
-	mov	%rdx, %r9
-	adc	$0, %r10
-	mul	q0
-	add	%r11, -16(up,i,8)
-	mov	%rax, %r11
-	mov	8(mp,i,8), %rax
-	adc	%r10, %rbp
-	mov	%rdx, %r10
-	adc	$0, %r9
-	mul	q0
-	add	%rbp, -8(up,i,8)
-	mov	%rax, %rbp
-	adc	%r9, %r11
-	mov	16(mp,i,8), %rax
-	adc	$0, %r10
-	mov	%rdx, %r9
-L(e2):	mul	q0
-	add	%r11, (up,i,8)
-	mov	%rax, %r11
-	adc	%r10, %rbp
-	mov	24(mp,i,8), %rax
-	adc	$0, %r9
-	add	$4, i
-	mov	%rdx, %r10
-	js	L(tp2)
-
-L(ed2):	mul	q0
-	add	%rbp, I(-24(up),-24(up,i,8))
-	adc	%r9, %r11
-	adc	$0, %r10
-	add	%r11, I(-16(up),-16(up,i,8))
-	adc	%r10, %rax
-	adc	$0, %rdx
-	add	%rax, I(-8(up),-8(up,i,8))
-	adc	$0, %rdx
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp2)
-	jmp	L(cj)
-
-L(n1):	mov	(mp_param), %rax
-	mul	q0
-	add	-8(up), %rax
-	adc	(up), %rdx
-	mov	%rdx, (rp)
-	mov	$0, R32(%rax)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-L(n2):	mov	(mp_param), %rax
-	mov	-16(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	-8(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, q0
-	imul	u0inv, q0		C next q0
-	mov	-16(mp), %rax
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	(up), %r14
-	mul	q0
-	add	%rax, %r14
-	adc	$0, %rdx
-	add	%r9, %r14
-	adc	$0, %rdx
-	xor	R32(%rax), R32(%rax)
-	add	%r11, %r14
-	adc	8(up), %rdx
-	mov	%r14, (rp)
-	mov	%rdx, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-	ALIGNx
-L(n3):	mov	-24(mp), %rax
-	mov	-24(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	-16(mp), %rax
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	-16(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	add	%r11, %rbp
-	mov	-8(up), %r10
-	adc	$0, %r9
-	mul	q0
-	mov	%rbp, q0
-	imul	u0inv, q0		C next q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	%rbp, -16(up)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, -8(up)
-	mov	%r11, -24(up)		C up[0]
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(n3)
-
-	mov	-48(up), %rdx
-	mov	-40(up), %rbx
-	xor	R32(%rax), R32(%rax)
-	add	%rbp, %rdx
-	adc	%r10, %rbx
-	adc	-8(up), %r11
-	mov	%rdx, (rp)
-	mov	%rbx, 8(rp)
-	mov	%r11, 16(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-L(n4):	mov	-32(mp), %rax
-	mul	q0
-	mov	%rax, %r11
-	mov	-24(mp), %rax
-	mov	%rdx, %r10
-	mul	q0
-	mov	%rax, %rbx
-	mov	-16(mp), %rax
-	mov	%rdx, %r9
-	mul	q0
-	add	-32(up), %r11
-	mov	%rax, %r11
-	adc	%r10, %rbx
-	mov	-8(mp), %rax
-	adc	$0, %r9
-	mov	%rdx, %r10
-	mul	q0
-	add	-24(up), %rbx
-	mov	%rbx, -24(up)
-	adc	%r9, %r11
-	adc	$0, %r10
-	imul	u0inv, %rbx		C next q limb
-	add	%r11, -16(up)
-	adc	%r10, %rax
-	adc	$0, %rdx
-	add	%rax, -8(up)
-	adc	$0, %rdx
-	mov	%rdx, -32(up)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	dec	j
-	lea	8(up), up		C up++
-	jnz	L(n4)
-	jmp	L(cj)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/atom/rsh1aors_n.asm b/gmp/mpn/x86_64/atom/rsh1aors_n.asm
deleted file mode 100644
index 6f5f6384a7..0000000000
--- a/gmp/mpn/x86_64/atom/rsh1aors_n.asm
+++ /dev/null
@@ -1,287 +0,0 @@
-dnl  x86-64 mpn_rsh1add_n/mpn_rsh1sub_n.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C  * Schedule loop less.  It is now almost surely overscheduled, resulting in
-C    large feed-in and wind-down code.
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NMH	 ?
-C Intel SBR	 ?
-C Intel atom	 5.25
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n',`%rcx')
-
-ifdef(`OPERATION_rsh1add_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func_n,	      mpn_rsh1add_n)
-	define(func_nc,	      mpn_rsh1add_nc)')
-ifdef(`OPERATION_rsh1sub_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func_n,	      mpn_rsh1sub_n)
-	define(func_nc,	      mpn_rsh1sub_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), %r15
-	ADDSUB	(vp), %r15
-	sbb	R32(%rbx), R32(%rbx)
-	xor	R32(%rax), R32(%rax)
-	shr	%r15
-	adc	R32(%rax), R32(%rax)	C return value
-
-	mov	R32(n), R32(%rbp)
-	and	$3, R32(%rbp)
-	jz	L(b0)
-	cmp	$2, R32(%rbp)
-	jae	L(b23)
-
-L(b1):	dec	n
-	jnz	L(gt1)
-	shl	$63, %rbx
-	add	%rbx, %r15
-	mov	%r15, (rp)
-	jmp	L(cj1)
-L(gt1):	lea	24(up), up
-	lea	24(vp), vp
-	mov	-16(up), %r9
-	add	R32(%rbx), R32(%rbx)
-	mov	-8(up), %r10
-	lea	24(rp), rp
-	mov	(up), %r11
-	ADCSBB	-16(vp), %r9
-	ADCSBB	-8(vp), %r10
-	mov	%r15, %r12
-	ADCSBB	(vp), %r11
-	mov	%r9, %r13
-	sbb	R32(%rbx), R32(%rbx)
-	mov	%r11, %r15
-	mov	%r10, %r14
-	shl	$63, %r11
-	shl	$63, %r10
-	shl	$63, %r9
-	or	%r9, %r12
-	shr	%r13
-	mov	8(up), %r8
-	shr	%r14
-	or	%r10, %r13
-	shr	%r15
-	or	%r11, %r14
-	sub	$4, n
-	jz	L(cj5)
-L(gt5):	mov	16(up), %r9
-	add	R32(%rbx), R32(%rbx)
-	mov	24(up), %r10
-	ADCSBB	8(vp), %r8
-	mov	%r15, %rbp
-	mov	32(up), %r11
-	jmp	L(lo1)
-
-L(b23):	jnz	L(b3)
-	mov	8(up), %r8
-	sub	$2, n
-	jnz	L(gt2)
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	8(vp), %r8
-	mov	%r8, %r12
-	jmp	L(cj2)
-L(gt2):	mov	16(up), %r9
-	add	R32(%rbx), R32(%rbx)
-	mov	24(up), %r10
-	ADCSBB	8(vp), %r8
-	mov	%r15, %rbp
-	mov	32(up), %r11
-	ADCSBB	16(vp), %r9
-	lea	32(up), up
-	ADCSBB	24(vp), %r10
-	mov	%r9, %r13
-	ADCSBB	32(vp), %r11
-	mov	%r8, %r12
-	jmp	L(lo2)
-
-L(b3):	lea	40(up), up
-	lea	8(vp), vp
-	mov	%r15, %r14
-	add	R32(%rbx), R32(%rbx)
-	mov	-32(up), %r11
-	ADCSBB	0(vp), %r11
-	lea	8(rp), rp
-	sbb	R32(%rbx), R32(%rbx)
-	mov	%r11, %r15
-	shl	$63, %r11
-	mov	-24(up), %r8
-	shr	%r15
-	or	%r11, %r14
-	sub	$3, n
-	jnz	L(gt3)
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	8(vp), %r8
-	jmp	L(cj3)
-L(gt3):	mov	-16(up), %r9
-	add	R32(%rbx), R32(%rbx)
-	mov	-8(up), %r10
-	ADCSBB	8(vp), %r8
-	mov	%r15, %rbp
-	mov	(up), %r11
-	ADCSBB	16(vp), %r9
-	ADCSBB	24(vp), %r10
-	mov	%r8, %r12
-	jmp	L(lo3)
-
-L(b0):	lea	48(up), up
-	lea	16(vp), vp
-	add	R32(%rbx), R32(%rbx)
-	mov	-40(up), %r10
-	lea	16(rp), rp
-	mov	-32(up), %r11
-	ADCSBB	-8(vp), %r10
-	mov	%r15, %r13
-	ADCSBB	(vp), %r11
-	sbb	R32(%rbx), R32(%rbx)
-	mov	%r11, %r15
-	mov	%r10, %r14
-	shl	$63, %r11
-	shl	$63, %r10
-	mov	-24(up), %r8
-	shr	%r14
-	or	%r10, %r13
-	shr	%r15
-	or	%r11, %r14
-	sub	$4, n
-	jnz	L(gt4)
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	8(vp), %r8
-	jmp	L(cj4)
-L(gt4):	mov	-16(up), %r9
-	add	R32(%rbx), R32(%rbx)
-	mov	-8(up), %r10
-	ADCSBB	8(vp), %r8
-	mov	%r15, %rbp
-	mov	(up), %r11
-	ADCSBB	16(vp), %r9
-	jmp	L(lo0)
-
-	ALIGN(8)
-L(top):	mov	16(up), %r9
-	shr	%r14
-	or	%r10, %r13
-	shr	%r15
-	or	%r11, %r14
-	add	R32(%rbx), R32(%rbx)
-	mov	24(up), %r10
-	mov	%rbp, (rp)
-	ADCSBB	8(vp), %r8
-	mov	%r15, %rbp
-	lea	32(rp), rp
-	mov	32(up), %r11
-L(lo1):	ADCSBB	16(vp), %r9
-	lea	32(up), up
-	mov	%r12, -24(rp)
-L(lo0):	ADCSBB	24(vp), %r10
-	mov	%r8, %r12
-	mov	%r13, -16(rp)
-L(lo3):	ADCSBB	32(vp), %r11
-	mov	%r9, %r13
-	mov	%r14, -8(rp)
-L(lo2):	sbb	R32(%rbx), R32(%rbx)
-	shl	$63, %r8
-	mov	%r11, %r15
-	shr	%r12
-	mov	%r10, %r14
-	shl	$63, %r9
-	lea	32(vp), vp
-	shl	$63, %r10
-	or	%r8, %rbp
-	shl	$63, %r11
-	or	%r9, %r12
-	shr	%r13
-	mov	8(up), %r8
-	sub	$4, n
-	jg	L(top)
-
-L(end):	shr	%r14
-	or	%r10, %r13
-	shr	%r15
-	or	%r11, %r14
-	mov	%rbp, (rp)
-	lea	32(rp), rp
-L(cj5):	add	R32(%rbx), R32(%rbx)
-	ADCSBB	8(vp), %r8
-	mov	%r12, -24(rp)
-L(cj4):	mov	%r13, -16(rp)
-L(cj3):	mov	%r8, %r12
-	mov	%r14, -8(rp)
-L(cj2):	sbb	R32(%rbx), R32(%rbx)
-	shl	$63, %r8
-	shr	%r12
-	or	%r8, %r15
-	shl	$63, %rbx
-	add	%rbx, %r12
-	mov	%r15, (rp)
-	mov	%r12, 8(rp)
-L(cj1):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/rshift.asm b/gmp/mpn/x86_64/atom/rshift.asm
deleted file mode 100644
index 29c027de49..0000000000
--- a/gmp/mpn/x86_64/atom/rshift.asm
+++ /dev/null
@@ -1,121 +0,0 @@
-dnl  AMD64 mpn_rshift -- mpn right shift, optimised for Atom.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 4.5
-C VIA nano	 ?
-
-C TODO
-C  * Consider using 4-way unrolling.  We reach 4 c/l, but the code is 2.5 times
-C    larger.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_rshift)
-	FUNC_ENTRY(4)
-	shr	R32(n)
-	mov	(up), %rax
-	jnc	L(evn)
-
-	mov	%rax, %r11
-	shr	R8(cnt), %r11
-	neg	R8(cnt)
-	shl	R8(cnt), %rax
-	test	n, n
-	jnz	L(gt1)
-	mov	%r11, (rp)
-	FUNC_EXIT()
-	ret
-
-L(gt1):	mov	8(up), %r8
-	mov	%r8, %r10
-	shl	R8(cnt), %r8
-	jmp	L(lo1)
-
-L(evn):	mov	%rax, %r10
-	neg	R8(cnt)
-	shl	R8(cnt), %rax
-	mov	8(up), %r9
-	mov	%r9, %r11
-	shl	R8(cnt), %r9
-	neg	R8(cnt)
-	dec	n
-	lea	-8(rp), rp
-	lea	8(up), up
-	jz	L(end)
-
-	ALIGN(8)
-L(top):	shr	R8(cnt), %r10
-	or	%r10, %r9
-	shr	R8(cnt), %r11
-	neg	R8(cnt)
-	mov	8(up), %r8
-	mov	%r8, %r10
-	mov	%r9, 8(rp)
-	shl	R8(cnt), %r8
-	lea	16(rp), rp
-L(lo1):	mov	16(up), %r9
-	or	%r11, %r8
-	mov	%r9, %r11
-	shl	R8(cnt), %r9
-	lea	16(up), up
-	neg	R8(cnt)
-	mov	%r8, (rp)
-	dec	n
-	jg	L(top)
-
-L(end):	shr	R8(cnt), %r10
-	or	%r10, %r9
-	shr	R8(cnt), %r11
-	mov	%r9, 8(rp)
-	mov	%r11, 16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/atom/sublsh1_n.asm b/gmp/mpn/x86_64/atom/sublsh1_n.asm
deleted file mode 100644
index 1306acde2b..0000000000
--- a/gmp/mpn/x86_64/atom/sublsh1_n.asm
+++ /dev/null
@@ -1,242 +0,0 @@
-dnl  AMD64 mpn_sublsh1_n optimised for Intel Atom.
-dnl  Used also for AMD bd1.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C  * This code is slightly large at 501 bytes.
-C  * aorrlsh1_n.asm and this file use the same basic pattern.
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bd1	 2.3
-C AMD bobcat	 ?
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel atom	 5	(4.875 is probably possible)
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp',       `%rdx')
-define(`n',        `%rcx')
-define(`cy',       `%r8')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sublsh1_n)
-	FUNC_ENTRY(4)
-	push	%rbp
-	push	%r15
-	xor	R32(%rbp), R32(%rbp)
-L(ent):	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	jz	L(b0)
-	cmp	$2, R32(%rax)
-	jz	L(b2)
-	jg	L(b3)
-
-L(b1):	mov	(vp), %r8
-	add	%r8, %r8
-	lea	8(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	mov	(up), %r15
-	sbb	%r8, %r15
-	mov	%r15, (rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	8(up), up
-	lea	8(rp), rp
-	jmp	L(b0)
-
-L(b2):	mov	(vp), %r8
-	add	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	lea	16(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	mov	(up), %r15
-	sbb	%r8, %r15
-	mov	%r15, (rp)
-	mov	8(up), %r15
-	sbb	%r9, %r15
-	mov	%r15, 8(rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	16(up), up
-	lea	16(rp), rp
-	jmp	L(b0)
-
-L(b3):	mov	(vp), %r8
-	add	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	mov	16(vp), %r10
-	adc	%r10, %r10
-	lea	24(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	mov	(up), %r15
-	sbb	%r8, %r15
-	mov	%r15, (rp)
-	mov	8(up), %r15
-	sbb	%r9, %r15
-	mov	%r15, 8(rp)
-	mov	16(up), %r15
-	sbb	%r10, %r15
-	mov	%r15, 16(rp)
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	lea	24(up), up
-	lea	24(rp), rp
-
-L(b0):	test	$4, R8(n)
-	jz	L(skp)
-	add	R32(%rax), R32(%rax)	C restore scy
-	mov	(vp), %r8
-	adc	%r8, %r8
-	mov	8(vp), %r9
-	adc	%r9, %r9
-	mov	16(vp), %r10
-	adc	%r10, %r10
-	mov	24(vp), %r11
-	adc	%r11, %r11
-	lea	32(vp), vp
-	sbb	R32(%rax), R32(%rax)	C save scy
-	add	R32(%rbp), R32(%rbp)	C restore acy
-	mov	(up), %r15
-	sbb	%r8, %r15
-	mov	%r15, (rp)
-	mov	8(up), %r15
-	sbb	%r9, %r15
-	mov	%r15, 8(rp)
-	mov	16(up), %r15
-	sbb	%r10, %r15
-	mov	%r15, 16(rp)
-	mov	24(up), %r15
-	sbb	%r11, %r15
-	mov	%r15, 24(rp)
-	lea	32(up), up
-	lea	32(rp), rp
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-
-L(skp):	cmp	$8, n
-	jl	L(rtn)
-
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%rbx
-	lea	-64(rp), rp
-	jmp	L(x)
-
-	ALIGN(16)
-L(top):	mov	(vp), %r8
-	add	R32(%rax), R32(%rax)
-	lea	64(vp), vp
-	adc	%r8, %r8
-	mov	-56(vp), %r9
-	adc	%r9, %r9
-	mov	-48(vp), %r10
-	adc	%r10, %r10
-	mov	-40(vp), %r11
-	adc	%r11, %r11
-	mov	-32(vp), %r12
-	adc	%r12, %r12
-	mov	-24(vp), %r13
-	adc	%r13, %r13
-	mov	-16(vp), %r14
-	adc	%r14, %r14
-	mov	-8(vp), %r15
-	adc	%r15, %r15
-	sbb	R32(%rax), R32(%rax)
-	add	R32(%rbp), R32(%rbp)
-	mov	(up), %rbp
-	lea	64(rp), rp
-	mov	8(up), %rbx
-	sbb	%r8, %rbp
-	mov	32(up), %r8
-	mov	%rbp, (rp)
-	sbb	%r9, %rbx
-	mov	16(up), %rbp
-	mov	%rbx, 8(rp)
-	sbb	%r10, %rbp
-	mov	24(up), %rbx
-	mov	%rbp, 16(rp)
-	sbb	%r11, %rbx
-	mov	%rbx, 24(rp)
-	sbb	%r12, %r8
-	mov	40(up), %r9
-	mov	%r8, 32(rp)
-	sbb	%r13, %r9
-	mov	48(up), %rbp
-	mov	%r9, 40(rp)
-	sbb	%r14, %rbp
-	mov	56(up), %rbx
-	mov	%rbp, 48(rp)
-	sbb	%r15, %rbx
-	lea	64(up), up
-	mov	%rbx, 56(rp)
-	sbb	R32(%rbp), R32(%rbp)
-L(x):	sub	$8, n
-	jge	L(top)
-
-L(end):	pop	%rbx
-	pop	%r14
-	pop	%r13
-	pop	%r12
-L(rtn):
-	add	R32(%rbp), R32(%rax)
-	neg	R32(%rax)
-
-	pop	%r15
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-PROLOGUE(mpn_sublsh1_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbp
-	push	%r15
-	neg	%r8			C set CF
-	sbb	R32(%rbp), R32(%rbp)	C save acy
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bd1/README b/gmp/mpn/x86_64/bd1/README
deleted file mode 100644
index ccd210e0d6..0000000000
--- a/gmp/mpn/x86_64/bd1/README
+++ /dev/null
@@ -1,11 +0,0 @@
-This directory contains code for AMD bulldozer including its piledriver update.
-
-We currently make limited use of SIMD instructions, both via the MPN_PATH and
-via inclusion of x86_64/fastsse files.
-
-The bd1 cores share one SIMD/FPU pipeline for two integer units.  This probably
-means that an all-core GMP load (such as a HPC load) might run slower if there
-is significant SIMD dependency.
-
-We should perhaps allow a special 'bd1nosimd' pseudo cpu-name excluding any
-SIMD code.
diff --git a/gmp/mpn/x86_64/bd1/aorrlsh1_n.asm b/gmp/mpn/x86_64/bd1/aorrlsh1_n.asm
deleted file mode 100644
index c34a5fa134..0000000000
--- a/gmp/mpn/x86_64/bd1/aorrlsh1_n.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  AMD64 mpn_addlsh1_n and mpn_rsblsh1_n
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
-include_mpn(`x86_64/atom/aorrlsh1_n.asm')
diff --git a/gmp/mpn/x86_64/bd1/aorsmul_1.asm b/gmp/mpn/x86_64/bd1/aorsmul_1.asm
deleted file mode 100644
index 96fec9f5ac..0000000000
--- a/gmp/mpn/x86_64/bd1/aorsmul_1.asm
+++ /dev/null
@@ -1,181 +0,0 @@
-dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD Bulldozer.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bd1	 4.5-4.7
-C AMD bobcat
-C Intel P4
-C Intel core2
-C Intel NHM
-C Intel SBR
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-C TODO
-C  * Try to make loop run closer to 4 c/l.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',       `%r11')
-
-ifdef(`OPERATION_addmul_1',`
-      define(`ADDSUB',        `add')
-      define(`func',  `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-      define(`ADDSUB',        `sub')
-      define(`func',  `mpn_submul_1')
-')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`v0', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``r11'')	') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
-	mov	(up), %rax		C read first u limb early
-	push	%rbx
-IFSTD(`	mov	n_param, %rbx	')	C move away n from rdx, mul uses it
-IFDOS(`	mov	n, %rbx		')
-	mul	v0
-
-IFSTD(`	mov	%rbx, n		')
-
-	and	$3, R32(%rbx)
-	lea	-16(rp,n,8), rp
-	jz	L(b0)
-	cmp	$2, R32(%rbx)
-	jb	L(b1)
-	jz	L(b2)
-
-L(b3):	mov	$0, R32(%r8)
-	mov	%rax, %rbx
-	mov	$0, R32(%r9)
-	mov	8(up), %rax
-	mov	%rdx, %r10
-	lea	(up,n,8), up
-	not	n
-	jmp	L(L3)
-
-L(b0):	mov	$0, R32(%r10)
-	mov	%rax, %r8
-	mov	%rdx, %rbx
-	mov	8(up), %rax
-	lea	(up,n,8), up
-	neg	n
-	jmp	L(L0)
-
-L(b1):	cmp	$1, n
-	jz	L(n1)
-	mov	%rax, %r9
-	mov	8(up), %rax
-	mov	%rdx, %r8
-	mov	$0, R32(%rbx)
-	lea	(up,n,8), up
-	neg	n
-	inc	n
-	jmp	L(L1)
-
-L(b2):	mov	$0, R32(%rbx)
-	mov	%rax, %r10
-	mov	%rdx, %r9
-	mov	8(up), %rax
-	mov	$0, R32(%r8)
-	lea	(up,n,8), up
-	neg	n
-	add	$2, n
-	jns	L(end)
-
-	ALIGN(32)
-L(top):	mul	v0
-	ADDSUB	%r10, (rp,n,8)
-	adc	%rax, %r9
-	mov	(up,n,8), %rax
-	adc	%rdx, %r8
-L(L1):	mul	v0
-	mov	$0, R32(%r10)
-	ADDSUB	%r9, 8(rp,n,8)
-	adc	%rax, %r8
-	adc	%rdx, %rbx
-	mov	8(up,n,8), %rax
-L(L0):	mul	v0
-	ADDSUB	%r8, 16(rp,n,8)
-	mov	$0, R32(%r8)
-	adc	%rax, %rbx
-	mov	$0, R32(%r9)
-	mov	16(up,n,8), %rax
-	adc	%rdx, %r10
-L(L3):	mul	v0
-	ADDSUB	%rbx, 24(rp,n,8)
-	mov	$0, R32(%rbx)
-	adc	%rax, %r10
-	adc	%rdx, %r9
-	mov	24(up,n,8), %rax
-	add	$4, n
-	js	L(top)
-
-L(end):	mul	v0
-	ADDSUB	%r10, (rp)
-	adc	%r9, %rax
-	adc	%r8, %rdx
-L(n1):	ADDSUB	%rax, 8(rp)
-	adc	$0, %rdx
-	mov	%rdx, %rax
-
-	pop	%rbx
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/bd1/com.asm b/gmp/mpn/x86_64/bd1/com.asm
deleted file mode 100644
index 43f356117a..0000000000
--- a/gmp/mpn/x86_64/bd1/com.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_com optimised for AMD bd1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_com)
-include_mpn(`x86_64/fastsse/com-palignr.asm')
diff --git a/gmp/mpn/x86_64/bd1/copyd.asm b/gmp/mpn/x86_64/bd1/copyd.asm
deleted file mode 100644
index 675cdc3f6b..0000000000
--- a/gmp/mpn/x86_64/bd1/copyd.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyd optimised for AMD bd1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyd)
-include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/gmp/mpn/x86_64/bd1/copyi.asm b/gmp/mpn/x86_64/bd1/copyi.asm
deleted file mode 100644
index ceef036585..0000000000
--- a/gmp/mpn/x86_64/bd1/copyi.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyi optimised for AMD bd1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyi)
-include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/gmp/mpn/x86_64/bd1/gcd_1.asm b/gmp/mpn/x86_64/bd1/gcd_1.asm
deleted file mode 100644
index 3d8e5c7ab1..0000000000
--- a/gmp/mpn/x86_64/bd1/gcd_1.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  AMD64 mpn_gcd_1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_gcd_1)
-include_mpn(`x86_64/core2/gcd_1.asm')
diff --git a/gmp/mpn/x86_64/bd1/gmp-mparam.h b/gmp/mpn/x86_64/bd1/gmp-mparam.h
deleted file mode 100644
index 5014f9f469..0000000000
--- a/gmp/mpn/x86_64/bd1/gmp-mparam.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/* AMD bd1 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 3600 MHz Bulldozer Zambezi */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        13
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        28
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           22
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                59
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               274
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     107
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     115
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     150
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 22
-#define SQR_TOOM3_THRESHOLD                 85
-#define SQR_TOOM4_THRESHOLD                242
-#define SQR_TOOM6_THRESHOLD                318
-#define SQR_TOOM8_THRESHOLD                478
-
-#define MULMID_TOOM42_THRESHOLD             22
-
-#define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               14
-
-#define MUL_FFT_MODF_THRESHOLD             404  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    404, 5}, {     19, 6}, {     11, 5}, {     23, 6}, \
-    {     19, 7}, {     10, 6}, {     25, 7}, {     15, 6}, \
-    {     31, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {    103,12}, {     31,11}, {     63,10}, \
-    {    135,11}, {     79,10}, {    167,11}, {     95,10}, \
-    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    303,11}, {    159,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    543,11}, \
-    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
-    {    319, 9}, {   1279,11}, {    367,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    415,12}, {    223,11}, \
-    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,12}, {    287,11}, \
-    {    575,10}, {   1151,11}, {    607,10}, {   1215,12}, \
-    {    319,10}, {   1279,11}, {    671,12}, {    351,11}, \
-    {    703,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,10}, {   1663,12}, {    447,11}, \
-    {    895,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    543,11}, {   1087,10}, {   2175,12}, \
-    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
-    {    319,11}, {   1279,12}, {    671,11}, {   1343,10}, \
-    {   2687,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
-    {    831,11}, {   1663,13}, {    447,12}, {    895,11}, \
-    {   1791,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1087,11}, {   2175,13}, {    575,12}, {   1215,11}, \
-    {   2431,10}, {   4863,12}, {   1343,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
-    {    831,12}, {   1727,11}, {   3455,13}, {    895,12}, \
-    {   1791,13}, {    959,15}, {    255,14}, {    511,13}, \
-    {   1087,12}, {   2175,13}, {   1151,12}, {   2303,13}, \
-    {   1215,12}, {   2431,11}, {   4863,13}, {   1343,12}, \
-    {   2687,13}, {   1471,12}, {   2943,11}, {   5887,14}, \
-    {    767,13}, {   1599,12}, {   3199,13}, {   1727,14}, \
-    {    895,13}, {   1791,12}, {   3583,13}, {   1919,12}, \
-    {   3839,15}, {    511,14}, {   1023,13}, {   2175,14}, \
-    {   1151,13}, {   2303,12}, {   4607,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2943,12}, {   5887,15}, {    767,14}, {   1535,13}, \
-    {   3199,14}, {   1663,13}, {   3455,12}, {   6911,14}, \
-    {   1791,13}, {   3583,14}, {   1919,13}, {   3839,16}, \
-    {    511,15}, {   1023,14}, {   2175,13}, {   4351,14}, \
-    {   2303,13}, {   4607,14}, {   2431,13}, {   4863,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 217
-#define MUL_FFT_THRESHOLD                 3712
-
-#define SQR_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    380, 5}, {     17, 6}, {      9, 5}, {     23, 6}, \
-    {     21, 7}, {     11, 6}, {     25, 7}, {     13, 6}, \
-    {     27, 7}, {     15, 6}, {     31, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135,11}, {     79,10}, {    159,11}, {     95,10}, \
-    {    191,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271,11}, {    143,10}, \
-    {    287, 9}, {    575,10}, {    303,11}, {    159,10}, \
-    {    319,12}, {     95,11}, {    191,10}, {    383,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543, 9}, {   1087,11}, {    303,10}, \
-    {    607,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671, 9}, {   1343,11}, {    351,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
-    {    351,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,10}, {   2175,12}, {    575,11}, {   1151,12}, \
-    {    607,13}, {    319,12}, {    639,11}, {   1279,12}, \
-    {    671,11}, {   1343,10}, {   2687,12}, {    703,11}, \
-    {   1407,13}, {    383,12}, {    767,11}, {   1599,10}, \
-    {   3199,12}, {    831,13}, {    447,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1087,11}, {   2175,13}, \
-    {    575,12}, {   1215,11}, {   2431,10}, {   4863,13}, \
-    {    639,12}, {   1343,11}, {   2687,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1599,11}, \
-    {   3199,13}, {    831,12}, {   1727,13}, {    895,15}, \
-    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1215,12}, {   2431,11}, {   4863,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1471,12}, {   2943,11}, \
-    {   5887,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1727,14}, {    895,13}, {   1791,12}, {   3583,13}, \
-    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2303,12}, {   4607,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
-    {   1407,13}, {   2943,12}, {   5887,15}, {    767,14}, \
-    {   1535,13}, {   3199,14}, {   1663,13}, {   3327,12}, \
-    {   6655,13}, {   3455,12}, {   6911,14}, {   1791,13}, \
-    {   3583,14}, {   1919,13}, {   3839,16}, {    511,15}, \
-    {   1023,14}, {   2175,13}, {   4351,14}, {   2303,13}, \
-    {   4607,14}, {   2431,13}, {   4863,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 220
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  39
-#define MULLO_MUL_N_THRESHOLD             7246
-
-#define DC_DIV_QR_THRESHOLD                 54
-#define DC_DIVAPPR_Q_THRESHOLD             180
-#define DC_BDIV_QR_THRESHOLD                47
-#define DC_BDIV_Q_THRESHOLD                 80
-
-#define INV_MULMOD_BNM1_THRESHOLD           38
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 188
-
-#define BINV_NEWTON_THRESHOLD              248
-#define REDC_1_TO_REDC_2_THRESHOLD          52
-#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1360
-#define MUPI_DIV_QR_THRESHOLD              108
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1360
-
-#define POWM_SEC_TABLE  1,16,194,386,452,2245
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     108
-#define HGCD_APPR_THRESHOLD                 51
-#define HGCD_REDUCE_THRESHOLD             2681
-#define GCD_DC_THRESHOLD                   474
-#define GCDEXT_DC_THRESHOLD                298
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                13
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               418
-#define SET_STR_PRECOMPUTE_THRESHOLD      1289
-
-#define FAC_DSC_THRESHOLD                  252
-#define FAC_ODD_THRESHOLD                   23
diff --git a/gmp/mpn/x86_64/bd1/hamdist.asm b/gmp/mpn/x86_64/bd1/hamdist.asm
deleted file mode 100644
index 93e1e5632b..0000000000
--- a/gmp/mpn/x86_64/bd1/hamdist.asm
+++ /dev/null
@@ -1,38 +0,0 @@
-dnl  AMD64 mpn_hamdist -- hamming distance.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_hamdist)
-include_mpn(`x86_64/k10/hamdist.asm')
diff --git a/gmp/mpn/x86_64/bd1/mul_1.asm b/gmp/mpn/x86_64/bd1/mul_1.asm
deleted file mode 100644
index e59667c085..0000000000
--- a/gmp/mpn/x86_64/bd1/mul_1.asm
+++ /dev/null
@@ -1,184 +0,0 @@
-dnl  AMD64 mpn_mul_1 optimised for AMD Bulldozer.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bd1	 4
-C AMD bobcat
-C Intel P4
-C Intel core2
-C Intel NHM
-C Intel SBR
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-C TODO
-C  * Move loop code into feed-in blocks, to save insn for zeroing regs.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',       `%rbx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`v0', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``rbx'')	') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_1c)
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
-	mov	(up), %rax		C read first u limb early
-	push	%rbx
-IFSTD(`	mov	n_param, %r11	')	C move away n from rdx, mul uses it
-IFDOS(`	mov	n, %r11		')
-	mul	v0
-
-IFSTD(` add	%r8, %rax	')
-IFDOS(` add	64(%rsp), %rax	')	C 40 + 3*8  (3 push insns)
-	adc	$0, %rdx
-	jmp	L(common)
-
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_mul_1)
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
-	mov	(up), %rax		C read first u limb early
-	push	%rbx
-IFSTD(`	mov	n_param, %r11	')	C move away n from rdx, mul uses it
-IFDOS(`	mov	n, %r11		')
-	mul	v0
-
-L(common):
-IFSTD(`	mov	%r11, n		')
-
-	and	$3, R32(%r11)
-	lea	-16(rp,n,8), rp
-	jz	L(b0)
-	cmp	$2, R32(%r11)
-	jb	L(b1)
-	jz	L(b2)
-
-L(b3):	mov	%rax, %r10
-	mov	%rdx, %r11
-	mov	8(up), %rax
-	mul	v0
-	lea	(up,n,8), up
-	not	n
-	jmp	L(L3)
-
-L(b0):	mov	%rax, %r9
-	mov	%rdx, %r10
-	mov	8(up), %rax
-	lea	(up,n,8), up
-	neg	n
-	jmp	L(L0)
-
-L(b1):	mov	%rax, %r8
-	cmp	$1, n
-	jz	L(n1)
-	mov	%rdx, %r9
-	lea	(up,n,8), up
-	neg	n
-	mov	%r8, 16(rp,n,8)
-	inc	n
-	jmp	L(L1)
-
-L(b2):	mov	%rax, %r11
-	mov	%rdx, %r8
-	mov	8(up), %rax
-	lea	(up,n,8), up
-	neg	n
-	add	$2, n
-	jns	L(end)
-
-	ALIGN(16)
-L(top):	mul	v0
-	mov	%rdx, %r9
-	add	%rax, %r8
-	adc	$0, %r9
-	mov	%r8, 8(rp,n,8)
-	mov	%r11, (rp,n,8)
-L(L1):	mov	(up,n,8), %rax
-	mul	v0
-	add	%rax, %r9
-	mov	%rdx, %r10
-	mov	8(up,n,8), %rax
-	adc	$0, %r10
-L(L0):	mul	v0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	mov	16(up,n,8), %rax
-	adc	$0, %r11
-	mul	v0
-	mov	%r9, 16(rp,n,8)
-L(L3):	add	%rax, %r11
-	mov	%r10, 24(rp,n,8)
-	mov	%rdx, %r8
-	adc	$0, %r8
-	add	$4, n
-	mov	-8(up,n,8), %rax
-	js	L(top)
-
-L(end):	mul	v0
-	add	%rax, %r8
-	adc	$0, %rdx
-	mov	%r11, (rp)
-L(n1):	mov	%r8, 8(rp)
-	mov	%rdx, %rax
-
-	pop	%rbx
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/bd1/mul_2.asm b/gmp/mpn/x86_64/bd1/mul_2.asm
deleted file mode 100644
index 4ed5f30561..0000000000
--- a/gmp/mpn/x86_64/bd1/mul_2.asm
+++ /dev/null
@@ -1,192 +0,0 @@
-dnl  AMD64 mpn_mul_2 optimised for AMD Bulldozer.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bull	4.36		average, quite fluctuating
-C AMD pile	4.38		slighty fluctuating
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-C Scheme: genxmul --mul
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vp',      `%rcx')   C r9
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%rbx')
-define(`w1', `%rcx')
-define(`w2', `%rbp')
-define(`w3', `%r10')
-define(`n',  `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(up), %rax
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	(up,n_param,8), up
-	lea	(rp,n_param,8), rp
-
-	mov	n_param, n
-	mul	v0
-	neg	n
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b10)
-
-L(b00):	mov	%rax, w0
-	mov	%rdx, w1
-	xor	R32(w2), R32(w2)
-	mov	(up,n,8), %rax
-	jmp	L(lo0)
-
-L(b10):	mov	%rax, w2
-	mov	%rdx, w3
-	mov	(up,n,8), %rax
-	xor	R32(w0), R32(w0)
-	mul	v1
-	add	$-2, n
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n)
-	jz	L(b11)
-
-L(b01):	mov	%rax, w3
-	mov	%rdx, w0
-	mov	(up,n,8), %rax
-	mul	v1
-	xor	R32(w1), R32(w1)
-	inc	n
-	jmp	L(lo1)
-
-L(b11):	mov	%rax, w1
-	mov	%rdx, w2
-	mov	(up,n,8), %rax
-	xor	R32(w3), R32(w3)
-	dec	n
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):	mov	-8(up,n,8), %rax
-	mul	v1
-	mov	w2, -16(rp,n,8)
-L(lo1):	add	%rax, w0
-	mov	w3, -8(rp,n,8)
-	adc	%rdx, w1
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	$0, R32(w2)
-	add	%rax, w0
-	adc	%rdx, w1
-	adc	$0, R32(w2)
-	mov	(up,n,8), %rax
-L(lo0):	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,n,8), %rax
-	mul	v0
-	add	%rax, w1
-	mov	w0, (rp,n,8)
-	mov	$0, R32(w3)
-	mov	8(up,n,8), %rax
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-L(lo3):	mul	v1
-	add	%rax, w2
-	mov	16(up,n,8), %rax
-	adc	%rdx, w3
-	mul	v0
-	add	%rax, w2
-	mov	16(up,n,8), %rax
-	mov	$0, R32(w0)
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	w1, 8(rp,n,8)
-L(lo2):	add	%rax, w3
-	adc	%rdx, w0
-	mov	24(up,n,8), %rax
-	mul	v0
-	add	%rax, w3
-	adc	%rdx, w0
-	mov	$0, R32(w1)
-	adc	$0, R32(w1)
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mov	-8(up,n,8), %rax
-	mul	v1
-	mov	w2, -16(rp,n,8)
-	add	%rax, w0
-	mov	w3, -8(rp,n,8)
-	adc	%rdx, w1
-	mov	w0, (rp,n,8)
-	mov	w1, %rax
-
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bd1/mul_basecase.asm b/gmp/mpn/x86_64/bd1/mul_basecase.asm
deleted file mode 100644
index e47ba587cd..0000000000
--- a/gmp/mpn/x86_64/bd1/mul_basecase.asm
+++ /dev/null
@@ -1,416 +0,0 @@
-dnl  AMD64 mpn_mul_basecase optimised for AMD Bulldozer and Piledriver.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_1		mul_2		mul_3		addmul_2
-C AMD K8,K9
-C AMD K10
-C AMD bull	~4.8		~4.55		-		~4.3
-C AMD pile	~4.6		~4.55		-		~4.55
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-C TODO
-C  * Merge bull-specific mul_1, if it is not slower the TOOM22 range.
-C    Alternatively, we could tweak the present code (which was loopmixed for a
-C    different CPU).
-C  * Merge faster mul_2, such as the one in the same directory as this file.
-C  * Further micro-optimise.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-
-define(`rp',      `%rdi')
-define(`up',      `%rsi')
-define(`un_param',`%rdx')
-define(`vp',      `%rcx')
-define(`vn',      `%r8')
-
-define(`un',      `%rbx')
-
-define(`w0',	`%r10')
-define(`w1',	`%r11')
-define(`w2',	`%r12')
-define(`w3',	`%r13')
-define(`n',	`%rbp')
-define(`v0',	`%r9')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-	push	%rbx
-	push	%rbp
-	mov	un_param, un		C free up rdx
-	neg	un
-
-	mov	(up), %rax		C shared for mul_1 and mul_2
-	lea	(up,un_param,8), up	C point at operand end
-	lea	(rp,un_param,8), rp	C point at rp[un-1]
-
-	mov	(vp), v0		C shared for mul_1 and mul_2
-	mul	v0			C shared for mul_1 and mul_2
-
-	test	$1, R8(vn)
-	jz	L(do_mul_2)
-
-L(do_mul_1):
-	test	$1, R8(un)
-	jnz	L(m1x1)
-
-L(m1x0):mov	%rax, w0		C un = 2, 4, 6, 8, ...
-	mov	%rdx, w1
-	mov	8(up,un,8), %rax
-	test	$2, R8(un)
-	jnz	L(m110)
-
-L(m100):lea	2(un), n		C un = 4, 8, 12, ...
-	jmp	L(m1l0)
-
-L(m110):lea	(un), n			C un = 2, 6, 10, ...
-	jmp	L(m1l2)
-
-L(m1x1):mov	%rax, w1		C un = 1, 3, 5, 7, ...
-	mov	%rdx, w0
-	test	$2, R8(un)
-	jz	L(m111)
-
-L(m101):lea	3(un), n		C un = 1, 5, 9, ...
-	test	n, n
-	js	L(m1l1)
-	mov	%rax, -8(rp)
-	mov	%rdx, (rp)
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(m111):lea	1(un), n		C un = 3, 7, 11, ...
-	mov	8(up,un,8), %rax
-	jmp	L(m1l3)
-
-	ALIGN(16)
-L(m1tp):mov	%rdx, w0
-	add	%rax, w1
-L(m1l1):mov	-16(up,n,8), %rax
-	adc	$0, w0
-	mul	v0
-	add	%rax, w0
-	mov	w1, -24(rp,n,8)
-	mov	-8(up,n,8), %rax
-	mov	%rdx, w1
-	adc	$0, w1
-L(m1l0):mul	v0
-	mov	w0, -16(rp,n,8)
-	add	%rax, w1
-	mov	%rdx, w0
-	mov	(up,n,8), %rax
-	adc	$0, w0
-L(m1l3):mul	v0
-	mov	w1, -8(rp,n,8)
-	mov	%rdx, w1
-	add	%rax, w0
-	mov	8(up,n,8), %rax
-	adc	$0, w1
-L(m1l2):mul	v0
-	mov	w0, (rp,n,8)
-	add	$4, n
-	jnc	L(m1tp)
-
-L(m1ed):add	%rax, w1
-	adc	$0, %rdx
-	mov	w1, I(-8(rp),-24(rp,n,8))
-	mov	%rdx, I((rp),-16(rp,n,8))
-
-	dec	R32(vn)
-	jz	L(ret2)
-
-	lea	8(vp), vp
-	lea	8(rp), rp
-	push	%r12
-	push	%r13
-	push	%r14
-	jmp	L(do_addmul)
-
-L(do_mul_2):
-define(`v1',	`%r14')
-	push	%r12
-	push	%r13
-	push	%r14
-
-	mov	8(vp), v1
-
-	test	$1, R8(un)
-	jnz	L(m2b1)
-
-L(m2b0):lea	(un), n
-	mov	%rax, w2		C 0
-	mov	(up,un,8), %rax
-	mov	%rdx, w1		C 1
-	mul	v1
-	mov	%rax, w0		C 1
-	mov	w2, (rp,un,8)		C 0
-	mov	8(up,un,8), %rax
-	mov	%rdx, w2		C 2
-	jmp	L(m2l0)
-
-L(m2b1):lea	1(un), n
-	mov	%rax, w0		C 1
-	mov	%rdx, w3		C 2
-	mov	(up,un,8), %rax
-	mul	v1
-	mov	w0, (rp,un,8)		C 1
-	mov	%rdx, w0		C 3
-	mov	%rax, w2		C 0
-	mov	8(up,un,8), %rax
-	jmp	L(m2l1)
-
-	ALIGN(32)
-L(m2tp):add	%rax, w2		C 0
-	mov	(up,n,8), %rax
-	adc	$0, w0			C 1
-L(m2l1):mul	v0
-	add	%rax, w2		C 0
-	mov	(up,n,8), %rax
-	mov	%rdx, w1		C 1
-	adc	$0, w1			C 1
-	mul	v1
-	add	w3, w2			C 0
-	adc	$0, w1			C 1
-	add	%rax, w0		C 1
-	mov	w2, (rp,n,8)		C 0
-	mov	8(up,n,8), %rax
-	mov	%rdx, w2		C 2
-	adc	$0, w2			C 2
-L(m2l0):mul	v0
-	add	%rax, w0		C 1
-	mov	%rdx, w3		C 2
-	adc	$0, w3			C 2
-	add	w1, w0			C 1
-	adc	$0, w3			C 2
-	mov	8(up,n,8), %rax
-	mul	v1
-	add	$2, n
-	mov	w0, -8(rp,n,8)		C 1
-	mov	%rdx, w0		C 3
-	jnc	L(m2tp)
-
-L(m2ed):add	%rax, w2
-	adc	$0, %rdx
-	add	w3, w2
-	adc	$0, %rdx
-	mov	w2, I((rp),(rp,n,8))
-	mov	%rdx, I(8(rp),8(rp,n,8))
-
-	add	$-2, R32(vn)
-	jz	L(ret5)
-
-	lea	16(vp), vp
-	lea	16(rp), rp
-
-
-L(do_addmul):
-	push	%r15
-	push	vn			C save vn in new stack slot
-define(`vn',	`(%rsp)')
-define(`X0',	`%r14')
-define(`X1',	`%r15')
-define(`v1',	`%r8')
-
-L(outer):
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	mov	(up,un,8), %rax
-	mul	v0
-
-	test	$1, R8(un)
-	jnz	L(bx1)
-
-L(bx0):	mov	%rax, X1
-	mov	(up,un,8), %rax
-	mov	%rdx, X0
-	mul	v1
-	test	$2, R8(un)
-	jnz	L(b10)
-
-L(b00):	lea	(un), n			C un = 4, 8, 12, ...
-	mov	(rp,un,8), w3
-	mov	%rax, w0
-	mov	8(up,un,8), %rax
-	mov	%rdx, w1
-	jmp	L(lo0)
-
-L(b10):	lea	2(un), n		C un = 2, 6, 10, ...
-	mov	(rp,un,8), w1
-	mov	%rdx, w3
-	mov	%rax, w2
-	mov	8(up,un,8), %rax
-	jmp	L(lo2)
-
-L(bx1):	mov	%rax, X0
-	mov	(up,un,8), %rax
-	mov	%rdx, X1
-	mul	v1
-	test	$2, R8(un)
-	jz	L(b11)
-
-L(b01):	lea	1(un), n		C un = 1, 5, 9, ...
-	mov	(rp,un,8), w2
-	mov	%rdx, w0
-	mov	%rax, w3
-	jmp	L(lo1)
-
-L(b11):	lea	-1(un), n		C un = 3, 7, 11, ...
-	mov	(rp,un,8), w0
-	mov	%rax, w1
-	mov	8(up,un,8), %rax
-	mov	%rdx, w2
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):
-L(lo2):	mul	v0
-	add	w1, X1
-	mov	X1, -16(rp,n,8)
-	mov	%rdx, X1
-	adc	%rax, X0
-	adc	$0, X1
-	mov	-8(up,n,8), %rax
-	mul	v1
-	mov	-8(rp,n,8), w1
-	mov	%rdx, w0
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-L(lo1):	mov	(up,n,8), %rax
-	mul	v0
-	add	w2, X0
-	mov	X0, -8(rp,n,8)
-	mov	%rdx, X0
-	adc	%rax, X1
-	mov	(up,n,8), %rax
-	adc	$0, X0
-	mov	(rp,n,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	mov	8(up,n,8), %rax
-	mov	%rdx, w1
-	adc	$0, w1
-L(lo0):	mul	v0
-	add	w3, X1
-	mov	X1, (rp,n,8)
-	adc	%rax, X0
-	mov	8(up,n,8), %rax
-	mov	%rdx, X1
-	adc	$0, X1
-	mov	8(rp,n,8), w3
-	mul	v1
-	add	w3, w0
-	adc	%rax, w1
-	mov	16(up,n,8), %rax
-	mov	%rdx, w2
-	adc	$0, w2
-L(lo3):	mul	v0
-	add	w0, X0
-	mov	X0, 8(rp,n,8)
-	mov	%rdx, X0
-	adc	%rax, X1
-	adc	$0, X0
-	mov	16(up,n,8), %rax
-	mov	16(rp,n,8), w0
-	mul	v1
-	mov	%rdx, w3
-	add	w0, w1
-	adc	%rax, w2
-	adc	$0, w3
-	mov	24(up,n,8), %rax
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mul	v0
-	add	w1, X1
-	mov	X1, I(-16(rp),-16(rp,n,8))
-	mov	%rdx, X1
-	adc	%rax, X0
-	adc	$0, X1
-	mov	I(-8(up),-8(up,n,8)), %rax
-	mul	v1
-	mov	I(-8(rp),-8(rp,n,8)), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, %rdx
-	add	w2, X0
-	adc	$0, X1
-	mov	X0, I(-8(rp),-8(rp,n,8))
-	add	w3, X1
-	mov	X1, I((rp),(rp,n,8))
-	adc	$0, %rdx
-	mov	%rdx, I(8(rp),8(rp,n,8))
-
-
-	addl	$-2, vn
-	lea	16(vp), vp
-	lea	16(rp), rp
-	jnz	L(outer)
-
-	pop	%rax		C deallocate vn slot
-	pop	%r15
-L(ret5):pop	%r14
-	pop	%r13
-	pop	%r12
-L(ret2):pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bd1/popcount.asm b/gmp/mpn/x86_64/bd1/popcount.asm
deleted file mode 100644
index 8f22a715b6..0000000000
--- a/gmp/mpn/x86_64/bd1/popcount.asm
+++ /dev/null
@@ -1,38 +0,0 @@
-dnl  AMD64 mpn_popcount -- population count.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_popcount)
-include_mpn(`x86_64/k10/popcount.asm')
diff --git a/gmp/mpn/x86_64/bd1/sec_tabselect.asm b/gmp/mpn/x86_64/bd1/sec_tabselect.asm
deleted file mode 100644
index e4360341d9..0000000000
--- a/gmp/mpn/x86_64/bd1/sec_tabselect.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_sec_tabselect.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sec_tabselect)
-include_mpn(`x86_64/fastsse/sec_tabselect.asm')
diff --git a/gmp/mpn/x86_64/bd1/sublsh1_n.asm b/gmp/mpn/x86_64/bd1/sublsh1_n.asm
deleted file mode 100644
index 4ba673d15a..0000000000
--- a/gmp/mpn/x86_64/bd1/sublsh1_n.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  AMD64 mpn_sublsh1_n
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sublsh1_n mpn_sublsh1_nc)
-include_mpn(`x86_64/atom/sublsh1_n.asm')
diff --git a/gmp/mpn/x86_64/bd2/gmp-mparam.h b/gmp/mpn/x86_64/bd2/gmp-mparam.h
deleted file mode 100644
index 16f25c4c7b..0000000000
--- a/gmp/mpn/x86_64/bd2/gmp-mparam.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 3200 MHz Piledriver Vishera  */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.8 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        17
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        34
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              2
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           21
-
-#define MUL_TOOM22_THRESHOLD                16
-#define MUL_TOOM33_THRESHOLD                54
-#define MUL_TOOM44_THRESHOLD               154
-#define MUL_TOOM6H_THRESHOLD               274
-#define MUL_TOOM8H_THRESHOLD               454
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     105
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     147
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 22
-#define SQR_TOOM3_THRESHOLD                 81
-#define SQR_TOOM4_THRESHOLD                218
-#define SQR_TOOM6_THRESHOLD                303
-#define SQR_TOOM8_THRESHOLD                430
-
-#define MULMID_TOOM42_THRESHOLD             20
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               13
-
-#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    376, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     13, 6}, {     27, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     49, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     71,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     99,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    135, 6}, {   2175, 7}, {   1151, 9}, {    303,10}, \
-    {    159, 9}, {    319, 8}, {    639, 9}, {    335,11}, \
-    {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143,10}, {    303,11}, \
-    {    159,12}, {     95,11}, {    191,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575,11}, {    303,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    511,12}, {    287,11}, {    575,10}, {   1151,12}, \
-    {    319,11}, {    639,10}, {   1279,12}, {    351,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,10}, \
-    {   1663,12}, {    447,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,10}, \
-    {   2175,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,10}, {   2687,12}, {    703,11}, {   1407,10}, \
-    {   2815,12}, {    735,13}, {    383,12}, {    767,11}, \
-    {   1599,12}, {    831,11}, {   1663,13}, {    447,12}, \
-    {    895,11}, {   1791,14}, {    255,13}, {    511,12}, \
-    {   1023,11}, {   2047,12}, {   1087,11}, {   2175,13}, \
-    {    575,12}, {   1151,11}, {   2303,12}, {   1215,11}, \
-    {   2431,10}, {   4863,13}, {    639,12}, {   1279,11}, \
-    {   2559,12}, {   1343,11}, {   2687,13}, {    703,12}, \
-    {   1407,11}, {   2815,14}, {    383,13}, {    767,12}, \
-    {   1599,13}, {    831,12}, {   1663,13}, {    895,12}, \
-    {   1791,15}, {    255,14}, {    511,13}, {   1023,12}, \
-    {   2047,13}, {   1087,12}, {   2175,13}, {   1151,12}, \
-    {   2303,13}, {   1215,12}, {   2431,11}, {   4863,14}, \
-    {    639,13}, {   1343,12}, {   2687,13}, {   1407,12}, \
-    {   2815,13}, {   1471,12}, {   2943,11}, {   5887,14}, \
-    {    767,13}, {   1599,12}, {   3199,13}, {   1727,14}, \
-    {    895,13}, {   1791,12}, {   3583,13}, {   1919,12}, \
-    {   3839,11}, {   7679,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2687,14}, {   1407,13}, {   2943,12}, \
-    {   5887,15}, {    767,14}, {   1535,13}, {   3199,14}, \
-    {   1663,13}, {   3455,12}, {   6911,14}, {   1791,13}, \
-    {   3583,14}, {   1919,13}, {   3839,12}, {   7679,16}, \
-    {    511,15}, {   1023,14}, {   2175,13}, {   4479,14}, \
-    {   2303,13}, {   4607,14}, {   2431,13}, {   4863,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 225
-#define MUL_FFT_THRESHOLD                 3712
-
-#define SQR_FFT_MODF_THRESHOLD             344  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    344, 5}, {     11, 4}, {     23, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     17, 6}, \
-    {     35, 8}, {      9, 7}, {     21, 8}, {     11, 7}, \
-    {     25, 8}, {     13, 7}, {     27, 8}, {     15, 7}, \
-    {     31, 8}, {     17, 7}, {     35, 8}, {     21, 9}, \
-    {     11, 8}, {     27, 9}, {     15, 8}, {     35, 9}, \
-    {     19, 8}, {     39, 9}, {     23, 8}, {     47, 9}, \
-    {     27,10}, {     15, 9}, {     39,10}, {     23, 9}, \
-    {     51,11}, {     15,10}, {     31, 9}, {     63,10}, \
-    {     39, 9}, {     79,10}, {     47, 9}, {     95,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135,11}, {     95,10}, {    191, 6}, {   3199, 7}, \
-    {   1727, 9}, {    447,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271,11}, {    143,10}, {    287, 9}, \
-    {    575,10}, {    303, 9}, {    607,10}, {    319,12}, \
-    {     95,11}, {    191,10}, {    383,11}, {    207,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,11}, \
-    {    351,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,10}, {    895,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,12}, {    287,11}, {    575,12}, {    319,10}, \
-    {   1279,12}, {    351,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    543,11}, {   1087,10}, {   2175,12}, \
-    {    575,13}, {    319,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
-    {    831,11}, {   1663,13}, {    447,12}, {    895,14}, \
-    {    255,13}, {    511,12}, {   1087,11}, {   2175,13}, \
-    {    575,11}, {   2303,12}, {   1215,11}, {   2431,10}, \
-    {   4863,13}, {    639,12}, {   1343,11}, {   2687,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1599,13}, {    831,12}, {   1727,11}, {   3455,13}, \
-    {    895,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2239,11}, {   4479,12}, {   2303,13}, {   1215,12}, \
-    {   2431,11}, {   4863,14}, {    639,13}, {   1279,12}, \
-    {   2559,13}, {   1343,12}, {   2687,13}, {   1407,12}, \
-    {   2815,13}, {   1471,12}, {   2943,11}, {   5887,14}, \
-    {    767,13}, {   1599,12}, {   3199,13}, {   1727,14}, \
-    {    895,13}, {   1791,12}, {   3583,13}, {   1919,12}, \
-    {   3839,15}, {    511,14}, {   1023,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2943,12}, {   5887,11}, {  11775,15}, {    767,14}, \
-    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,12}, \
-    {   6911,14}, {   1791,13}, {   3583,14}, {   1919,13}, \
-    {   3839,16}, {    511,15}, {   1023,14}, {   2175,13}, \
-    {   4479,14}, {   2303,13}, {   4607,14}, {   2431,13}, \
-    {   4863,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 214
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  37
-#define MULLO_MUL_N_THRESHOLD             8397
-
-#define DC_DIV_QR_THRESHOLD                 42
-#define DC_DIVAPPR_Q_THRESHOLD             173
-#define DC_BDIV_QR_THRESHOLD                42
-#define DC_BDIV_Q_THRESHOLD                 77
-
-#define INV_MULMOD_BNM1_THRESHOLD           30
-#define INV_NEWTON_THRESHOLD               202
-#define INV_APPR_THRESHOLD                 172
-
-#define BINV_NEWTON_THRESHOLD              238
-#define REDC_1_TO_REDC_2_THRESHOLD          44
-#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
-
-#define MU_DIV_QR_THRESHOLD               1308
-#define MU_DIVAPPR_Q_THRESHOLD            1334
-#define MUPI_DIV_QR_THRESHOLD               85
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1308
-
-#define POWM_SEC_TABLE  1,16,257,452,1099,2079
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                     110
-#define HGCD_APPR_THRESHOLD                 96
-#define HGCD_REDUCE_THRESHOLD             2479
-#define GCD_DC_THRESHOLD                   372
-#define GCDEXT_DC_THRESHOLD                293
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        20
-#define SET_STR_DC_THRESHOLD               204
-#define SET_STR_PRECOMPUTE_THRESHOLD      1160
-
-#define FAC_DSC_THRESHOLD                  166
-#define FAC_ODD_THRESHOLD                   24
diff --git a/gmp/mpn/x86_64/bdiv_dbm1c.asm b/gmp/mpn/x86_64/bdiv_dbm1c.asm
index a53bd52beb..8d73b9fe00 100644
--- a/gmp/mpn/x86_64/bdiv_dbm1c.asm
+++ b/gmp/mpn/x86_64/bdiv_dbm1c.asm
@@ -1,106 +1,98 @@
 dnl  x86_64 mpn_bdiv_dbm1.
 
-dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.25
-C AMD K10	 2.25
-C Intel P4	12.5
-C Intel core2	 4
-C Intel NHM	 3.75
-C Intel SBR	 3.6
-C Intel atom	20
-C VIA nano	 4
+C K8,K9:	 2.25
+C K10:		  ?
+C P4:		12.5
+C P6-15 (Core2): 4.0
+C P6-28 (Atom): 20
 
 C TODO
-C  * Optimise feed-in code.
-
-C INPUT PARAMETERS
-define(`qp',	  `%rdi')
-define(`up',	  `%rsi')
-define(`n_param', `%rdx')
-define(`bd',	  `%rcx')
-define(`cy',	  `%r8')
+C  * Do proper 4-way feed-in instead of the current epilogue
 
-define(`n',       `%r9')
+C INPUT PARAMETERS shared
+define(`qp',	`%rdi')
+define(`up',	`%rsi')
+define(`n',	`%rdx')
+define(`bd',	`%rcx')
+define(`cy',	`%r8')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_bdiv_dbm1c)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	mov	(up), %rax
-	mov	n_param, n
-	mov	R32(n_param), R32(%r11)
-	mul	bd
-	lea	(up,n,8), up
-	lea	(qp,n,8), qp
-	neg	n
-	and	$3, R32(%r11)
-	jz	L(lo0)
-	lea	-4(n,%r11), n
-	cmp	$2, R32(%r11)
-	jc	L(lo1)
-	jz	L(lo2)
-	jmp	L(lo3)
+	mov	(%rsi), %rax
+	mov	%rdx, %r9		C n
 
+	mul	%rcx
+	sub	%rax, %r8
+	mov	%r8, (%rdi)
+	sbb	%rdx, %r8
+
+	lea	(%rsi,%r9,8), %rsi
+	lea	(%rdi,%r9,8), %rdi
+	neg	%r9
+	add	$4, %r9
+	jns	L(end)
 	ALIGN(16)
-L(top):	mov	(up,n,8), %rax
-	mul	bd
-L(lo0):	sub	%rax, %r8
-	mov	%r8, (qp,n,8)
+L(top):
+	mov	-24(%rsi,%r9,8), %rax
+	mul	%rcx
+	sub	%rax, %r8
+	mov	%r8, -24(%rdi,%r9,8)
 	sbb	%rdx, %r8
-	mov	8(up,n,8), %rax
-	mul	bd
-L(lo3):	sub	%rax, %r8
-	mov	%r8, 8(qp,n,8)
+L(3):
+	mov	-16(%rsi,%r9,8), %rax
+	mul	%rcx
+	sub	%rax, %r8
+	mov	%r8, -16(%rdi,%r9,8)
 	sbb	%rdx, %r8
-	mov	16(up,n,8), %rax
-	mul	bd
-L(lo2):	sub	%rax, %r8
-	mov	%r8, 16(qp,n,8)
+L(2):
+	mov	-8(%rsi,%r9,8), %rax
+	mul	%rcx
+	sub	%rax, %r8
+	mov	%r8, -8(%rdi,%r9,8)
 	sbb	%rdx, %r8
-	mov	24(up,n,8), %rax
-	mul	bd
-L(lo1):	sub	%rax, %r8
-	mov	%r8, 24(qp,n,8)
+L(1):
+	mov	(%rsi,%r9,8), %rax
+	mul	%rcx
+	sub	%rax, %r8
+	mov	%r8, (%rdi,%r9,8)
 	sbb	%rdx, %r8
-	add	$4, n
-	jnz	L(top)
 
-	mov	%r8, %rax
-	FUNC_EXIT()
+	add	$4, %r9
+	js	L(top)
+L(end):
+	je	L(3x)
+	cmp	$2, %r9
+	jg	L(ret)
+	mov	$-1, %r9
+	je	L(1)
+	jmp	L(2)
+L(3x):
+	dec	%r9
+	jmp	L(3)
+
+L(ret):	mov	%r8, %rax
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/bdiv_q_1.asm b/gmp/mpn/x86_64/bdiv_q_1.asm
deleted file mode 100644
index 02eacbe6a8..0000000000
--- a/gmp/mpn/x86_64/bdiv_q_1.asm
+++ /dev/null
@@ -1,167 +0,0 @@
-dnl  AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by
-dnl  1-limb divisor, returning quotient only.
-
-dnl  Copyright 2001, 2002, 2004-2006, 2009, 2011, 2012 Free Software
-dnl  Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	10
-C AMD K10	10
-C Intel P4	33
-C Intel core2	13.25
-C Intel corei	14
-C Intel atom	42
-C VIA nano	 ?
-
-
-C INPUT PARAMETERS
-define(`rp',		`%rdi')
-define(`up',		`%rsi')
-define(`n',		`%rdx')
-define(`d',		`%rcx')
-define(`di',		`%r8')		C	just mpn_pi1_bdiv_q_1
-define(`ncnt',		`%r9')		C	just mpn_pi1_bdiv_q_1
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-	FUNC_ENTRY(4)
-	push	%rbx
-
-	mov	%rcx, %rax
-	xor	R32(%rcx), R32(%rcx)	C ncnt count
-	mov	%rdx, %r10
-
-	bt	$0, R32(%rax)
-	jnc	L(evn)			C skip bsfq unless divisor is even
-
-L(odd):	mov	%rax, %rbx
-	shr	R32(%rax)
-	and	$127, R32(%rax)		C d/2, 7 bits
-
-	LEA(	binvert_limb_table, %rdx)
-
-	movzbl	(%rdx,%rax), R32(%rax)	C inv 8 bits
-
-	mov	%rbx, %r11		C d without twos
-
-	lea	(%rax,%rax), R32(%rdx)	C 2*inv
-	imul	R32(%rax), R32(%rax)	C inv*inv
-	imul	R32(%rbx), R32(%rax)	C inv*inv*d
-	sub	R32(%rax), R32(%rdx)	C inv = 2*inv - inv*inv*d, 16 bits
-
-	lea	(%rdx,%rdx), R32(%rax)	C 2*inv
-	imul	R32(%rdx), R32(%rdx)	C inv*inv
-	imul	R32(%rbx), R32(%rdx)	C inv*inv*d
-	sub	R32(%rdx), R32(%rax)	C inv = 2*inv - inv*inv*d, 32 bits
-
-	lea	(%rax,%rax), %r8	C 2*inv
-	imul	%rax, %rax		C inv*inv
-	imul	%rbx, %rax		C inv*inv*d
-	sub	%rax, %r8		C inv = 2*inv - inv*inv*d, 64 bits
-
-	jmp	L(com)
-
-L(evn):	bsf	%rax, %rcx
-	shr	R8(%rcx), %rax
-	jmp	L(odd)
-EPILOGUE()
-
-PROLOGUE(mpn_pi1_bdiv_q_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-IFDOS(`	mov	64(%rsp), %r9	')
-	push	%rbx
-
-	mov	%rcx, %r11		C d
-	mov	%rdx, %r10		C n
-	mov	%r9, %rcx		C ncnt
-
-L(com):	mov	(up), %rax		C up[0]
-
-	dec	%r10
-	jz	L(one)
-
-	mov	8(up), %rdx		C up[1]
-	lea	(up,%r10,8), up		C up end
-	lea	(rp,%r10,8), rp		C rp end
-	neg	%r10			C -n
-
-	shrd	R8(%rcx), %rdx, %rax
-
-	xor	R32(%rbx), R32(%rbx)
-	jmp	L(ent)
-
-	ALIGN(8)
-L(top):
-	C rax	q
-	C rbx	carry bit, 0 or 1
-	C rcx	ncnt
-	C rdx
-	C r10	counter, limbs, negative
-
-	mul	%r11			C carry limb in rdx
-	mov	(up,%r10,8), %rax
-	mov	8(up,%r10,8), %r9
-	shrd	R8(%rcx), %r9, %rax
-	nop
-	sub	%rbx, %rax		C apply carry bit
-	setc	R8(%rbx)
-	sub	%rdx, %rax		C apply carry limb
-	adc	$0, %rbx
-L(ent):	imul	%r8, %rax
-	mov	%rax, (rp,%r10,8)
-	inc	%r10
-	jnz	L(top)
-
-	mul	%r11			C carry limb in rdx
-	mov	(up), %rax		C up high limb
-	shr	R8(%rcx), %rax
-	sub	%rbx, %rax		C apply carry bit
-	sub	%rdx, %rax		C apply carry limb
-	imul	%r8, %rax
-	mov	%rax, (rp)
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(one):	shr	R8(%rcx), %rax
-	imul	%r8, %rax
-	mov	%rax, (rp)
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/aors_n.asm b/gmp/mpn/x86_64/bobcat/aors_n.asm
deleted file mode 100644
index 22287b8558..0000000000
--- a/gmp/mpn/x86_64/bobcat/aors_n.asm
+++ /dev/null
@@ -1,150 +0,0 @@
-dnl  AMD64 mpn_add_n, mpn_sub_n optimised for bobcat.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2010-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bobcat	 2.28
-C Intel P4
-C Intel core2
-C Intel NHM
-C Intel SBR
-C Intel IBR
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimization tool suite written by David Harvey and Torbjorn Granlund.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')	C rcx
-define(`up',	`%rsi')	C rdx
-define(`vp',	`%rdx')	C r8
-define(`n',	`%rcx')	C r9
-define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
-
-ifdef(`OPERATION_add_n', `
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_add_n)
-	define(func_nc,	      mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_sub_n)
-	define(func_nc,	      mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	xor	%r8, %r8
-L(ent):	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b10)
-
-L(b00):	shr	$2, n
-	neg	%r8
-	mov	$3, R32(%rax)
-	mov	(up), %r10
-	mov	8(up), %r11
-	jmp	L(lo0)
-
-L(b10):	shr	$2, n
-	neg	%r8
-	mov	$1, R32(%rax)
-	mov	(up), %r8
-	mov	8(up), %r9
-	jrcxz	L(cj2)
-	jmp	L(top)
-
-L(bx1):	test	$2, R8(n)
-	jnz	L(b11)
-
-L(b01):	shr	$2, n
-	neg	%r8
-	mov	$0, R32(%rax)
-	mov	(up), %r9
-	jrcxz	L(cj1)
-	mov	8(up), %r10
-	jmp	L(lo1)
-
-	ALIGN(8)
-L(b11):	inc	n
-	shr	$2, n
-	neg	%r8
-	mov	$2, R32(%rax)
-	mov	(up), %r11
-	jmp	L(lo3)
-
-	ALIGN(4)
-L(top):	mov	8(up,%rax,8), %r10
-	ADCSBB	-8(vp,%rax,8), %r8
-	mov	%r8, -8(rp,%rax,8)
-L(lo1):	mov	16(up,%rax,8), %r11
-	ADCSBB	(vp,%rax,8), %r9
-	lea	4(%rax), %rax
-	mov	%r9, -32(rp,%rax,8)
-L(lo0):	ADCSBB	-24(vp,%rax,8), %r10
-	mov	%r10, -24(rp,%rax,8)
-L(lo3):	ADCSBB	-16(vp,%rax,8), %r11
-	dec	n
-	mov	-8(up,%rax,8), %r8
-	mov	%r11, -16(rp,%rax,8)
-L(lo2):	mov	(up,%rax,8), %r9
-	jnz	L(top)
-
-L(cj2):	ADCSBB	-8(vp,%rax,8), %r8
-	mov	%r8, -8(rp,%rax,8)
-L(cj1):	ADCSBB	(vp,%rax,8), %r9
-	mov	%r9, (rp,%rax,8)
-
-	mov	$0, R32(%rax)
-	adc	$0, R32(%rax)
-
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/aorsmul_1.asm b/gmp/mpn/x86_64/bobcat/aorsmul_1.asm
deleted file mode 100644
index 415a17cb7f..0000000000
--- a/gmp/mpn/x86_64/bobcat/aorsmul_1.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 4.5
-C AMD K10	 4.5
-C AMD bd1	 4.75
-C AMD bobcat	 5
-C Intel P4	17.7
-C Intel core2	 5.5
-C Intel NHM	 5.43
-C Intel SBR	 3.92
-C Intel atom	23
-C VIA nano	 5.63
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ifdef(`OPERATION_addmul_1',`
-      define(`ADDSUB',        `add')
-      define(`func',  `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-      define(`ADDSUB',        `sub')
-      define(`func',  `mpn_submul_1')
-')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-C Standard parameters
-define(`rp',              `%rdi')
-define(`up',              `%rsi')
-define(`n_param',         `%rdx')
-define(`v0',              `%rcx')
-C Standard allocations
-define(`n',               `%rbx')
-define(`w0',              `%r8')
-define(`w1',              `%r9')
-define(`w2',              `%r10')
-define(`w3',              `%r11')
-
-C DOS64 parameters
-IFDOS(` define(`rp',      `%rcx')    ') dnl
-IFDOS(` define(`up',      `%rsi')    ') dnl
-IFDOS(` define(`n_param', `%r8')     ') dnl
-IFDOS(` define(`v0',      `%r9')     ') dnl
-C DOS64 allocations
-IFDOS(` define(`n',       `%rbx')    ') dnl
-IFDOS(` define(`w0',      `%r8')     ') dnl
-IFDOS(` define(`w1',      `%rdi')    ') dnl
-IFDOS(` define(`w2',      `%r10')    ') dnl
-IFDOS(` define(`w3',      `%r11')    ') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-IFDOS(`	push	%rsi		')
-IFDOS(`	push	%rdi		')
-IFDOS(`	mov	%rdx, %rsi	')
-
-	push	%rbx
-	mov	(up), %rax
-
-	lea	-16(rp,n_param,8), rp
-	lea	-16(up,n_param,8), up
-
-	mov	n_param, n
-	and	$3, R32(n_param)
-	jz	L(b0)
-	cmp	$2, R32(n_param)
-	ja	L(b3)
-	jz	L(b2)
-
-L(b1):	mul	v0
-	cmp	$1, n
-	jz	L(n1)
-	mov	%rax, w2
-	mov	%rdx, w3
-	neg	n
-	add	$3, n
-	jmp	L(L1)
-L(n1):	ADDSUB	%rax, 8(rp)
-	adc	$0, %rdx
-	mov	%rdx, %rax
-	pop	%rbx
-IFDOS(`	pop	%rdi		')
-IFDOS(`	pop	%rsi		')
-	ret
-
-L(b3):	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	neg	n
-	inc	n
-	jmp	L(L3)
-
-L(b0):	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	neg	n
-	add	$2, n
-	jmp	L(L0)
-
-L(b2):	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	neg	n
-	jmp	L(L2)
-
-	ALIGN(16)
-L(top):	ADDSUB	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-L(L1):	mov	0(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	ADDSUB	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-L(L0):	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	ADDSUB	w0, 0(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-L(L3):	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	ADDSUB	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-L(L2):	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(top)
-
-L(end):	ADDSUB	w0, (rp)
-	adc	w1, w2
-	adc	$0, w3
-	ADDSUB	w2, 8(rp)
-	adc	$0, w3
-	mov	w3, %rax
-
-	pop	%rbx
-IFDOS(`	pop	%rdi		')
-IFDOS(`	pop	%rsi		')
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/copyd.asm b/gmp/mpn/x86_64/bobcat/copyd.asm
deleted file mode 100644
index 877714e903..0000000000
--- a/gmp/mpn/x86_64/bobcat/copyd.asm
+++ /dev/null
@@ -1,91 +0,0 @@
-dnl  AMD64 mpn_copyd optimised for AMD bobcat.
-
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 1
-C AMD K10	 1-2  (alignment fluctuations)
-C AMD bd1	 ?
-C AMD bobcat	 1.5
-C Intel P4	 2.8
-C Intel core2	 1
-C Intel NHM	 1-1.25
-C Intel SBR	 1
-C Intel atom	 2.87
-C VIA nano	 2
-
-C INPUT PARAMETERS
-C rp	rdi
-C up	rsi
-C n	rdx
-
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_copyd)
-	FUNC_ENTRY(3)
-	sub	$4, n
-	jl	L(end)
-	ALIGN(16)
-L(top):	mov	24(up,n,8), %r8
-	mov	%r8, 24(rp,n,8)
-	mov	16(up,n,8), %r8
-	mov	%r8, 16(rp,n,8)
-	mov	8(up,n,8), %r8
-	mov	%r8, 8(rp,n,8)
-	mov	(up,n,8), %r8
-	mov	%r8, (rp,n,8)
-L(ent):	sub	$4, n
-	jge	L(top)
-
-L(end):	cmp	$-4, R32(n)
-	jz	L(ret)
-	mov	24(up,n,8), %r8
-	mov	%r8, 24(rp,n,8)
-	cmp	$-3, R32(n)
-	jz	L(ret)
-	mov	16(up,n,8), %r8
-	mov	%r8, 16(rp,n,8)
-	cmp	$-2, R32(n)
-	jz	L(ret)
-	mov	8(up,n,8), %r8
-	mov	%r8, 8(rp,n,8)
-
-L(ret):	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/copyi.asm b/gmp/mpn/x86_64/bobcat/copyi.asm
deleted file mode 100644
index ee0f578652..0000000000
--- a/gmp/mpn/x86_64/bobcat/copyi.asm
+++ /dev/null
@@ -1,94 +0,0 @@
-dnl  AMD64 mpn_copyi optimised for AMD bobcat.
-
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 1
-C AMD K10	 1-2  (alignment fluctuations)
-C AMD bd1	 ?
-C AMD bobcat	 1.5
-C Intel P4	 2.8
-C Intel core2	 1
-C Intel NHM	 1-1.25
-C Intel SBR	 1
-C Intel atom	 2.87
-C VIA nano	 2
-
-C INPUT PARAMETERS
-C rp	rdi
-C up	rsi
-C n	rdx
-
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_copyi)
-	FUNC_ENTRY(3)
-	lea	-32(up,n,8), up
-	lea	-32(rp,n,8), rp
-	neg	n
-	add	$4, n
-	jg	L(end)
-	ALIGN(16)
-L(top):	mov	(up,n,8), %r8
-	mov	%r8, (rp,n,8)
-	mov	8(up,n,8), %r8
-	mov	%r8, 8(rp,n,8)
-	mov	16(up,n,8), %r8
-	mov	%r8, 16(rp,n,8)
-	mov	24(up,n,8), %r8
-	mov	%r8, 24(rp,n,8)
-L(ent):	add	$4, n
-	jle	L(top)
-
-L(end):	cmp	$4, R32(n)
-	jz	L(ret)
-	mov	(up,n,8), %r8
-	mov	%r8, (rp,n,8)
-	cmp	$3, R32(n)
-	jz	L(ret)
-	mov	8(up,n,8), %r8
-	mov	%r8, 8(rp,n,8)
-	cmp	$2, R32(n)
-	jz	L(ret)
-	mov	16(up,n,8), %r8
-	mov	%r8, 16(rp,n,8)
-
-L(ret):	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/gmp-mparam.h b/gmp/mpn/x86_64/bobcat/gmp-mparam.h
deleted file mode 100644
index de4c4e4528..0000000000
--- a/gmp/mpn/x86_64/bobcat/gmp-mparam.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* AMD Bobcat gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2012, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-#undef HAVE_NATIVE_mpn_mul_2
-#undef HAVE_NATIVE_mpn_addmul_2
-
-/* 1600 MHz AMD Bobcat Zacate E-350 */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        32
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        43
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           17
-
-#define MUL_TOOM22_THRESHOLD                24
-#define MUL_TOOM33_THRESHOLD                36
-#define MUL_TOOM44_THRESHOLD               268
-#define MUL_TOOM6H_THRESHOLD               396
-#define MUL_TOOM8H_THRESHOLD               517
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     195
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     181
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      72
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     103
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 26
-#define SQR_TOOM3_THRESHOLD                 93
-#define SQR_TOOM4_THRESHOLD                375
-#define SQR_TOOM6_THRESHOLD                  0  /* always */
-#define SQR_TOOM8_THRESHOLD                478
-
-#define MULMID_TOOM42_THRESHOLD             22
-
-#define MULMOD_BNM1_THRESHOLD               11
-#define SQRMOD_BNM1_THRESHOLD               13
-
-#define MUL_FFT_MODF_THRESHOLD             400  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    400, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     20, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     36, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 9}, {     11, 8}, \
-    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
-    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
-    {     49, 9}, {     27,10}, {     15, 9}, {     43,10}, \
-    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     99,10}, {     55,11}, {     31,10}, {     63, 9}, \
-    {    127,10}, {     71, 9}, {    143,10}, {     79,11}, \
-    {     47,10}, {    103,12}, {     31,11}, {     63,10}, \
-    {    143,11}, {     79,10}, {    167,11}, {     95,10}, \
-    {    191, 9}, {    383,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,11}, {    143,10}, \
-    {    287,11}, {    159,10}, {    319,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543,11}, {    287,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    351,10}, {    703,11}, \
-    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    639,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
-    {    607,13}, {    319,12}, {    703,13}, {    383,12}, \
-    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
-    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    895,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1151,14}, {    639,13}, {   1407,14}, \
-    {    767,13}, {   1663,14}, {    895,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,14}, \
-    {   1279,13}, {   2559,14}, {   1407,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 156
-#define MUL_FFT_THRESHOLD                 5504
-
-#define SQR_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     41, 9}, {     23, 8}, {     49, 9}, {     27,10}, \
-    {     15, 9}, {     31, 8}, {     63, 9}, {     43,10}, \
-    {     23, 9}, {     55,10}, {     31, 9}, {     71,10}, \
-    {     39, 9}, {     83,10}, {     47, 9}, {     99,10}, \
-    {     55,11}, {     31,10}, {     79,11}, {     47,10}, \
-    {    103,12}, {     31,11}, {     63,10}, {    127, 9}, \
-    {    255,10}, {    135, 9}, {    271,11}, {     79,10}, \
-    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
-    {    383,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,10}, {    271,11}, {    143,10}, \
-    {    287, 9}, {    575,10}, {    303,11}, {    159,10}, \
-    {    319,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    575,11}, \
-    {    303,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703,11}, \
-    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,13}, {    127,12}, \
-    {    255,11}, {    543,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    479,14}, {    127,13}, {    255,12}, \
-    {    607,13}, {    319,12}, {    703,13}, {    383,12}, \
-    {    831,13}, {    447,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,13}, {    575,12}, {   1151,13}, \
-    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    895,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1151,14}, {    639,13}, {   1343,12}, \
-    {   2687,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1663,14}, {    895,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,14}, {   1279,13}, \
-    {   2687,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 162
-#define SQR_FFT_THRESHOLD                 3712
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  26
-#define MULLO_MUL_N_THRESHOLD            10950
-
-#define DC_DIV_QR_THRESHOLD                 63
-#define DC_DIVAPPR_Q_THRESHOLD             198
-#define DC_BDIV_QR_THRESHOLD                56
-#define DC_BDIV_Q_THRESHOLD                127
-
-#define INV_MULMOD_BNM1_THRESHOLD           46
-#define INV_NEWTON_THRESHOLD               236
-#define INV_APPR_THRESHOLD                 204
-
-#define BINV_NEWTON_THRESHOLD              286
-#define REDC_1_TO_REDC_2_THRESHOLD          63
-#define REDC_2_TO_REDC_N_THRESHOLD           0  /* always */
-
-#define MU_DIV_QR_THRESHOLD               1499
-#define MU_DIVAPPR_Q_THRESHOLD            1499
-#define MUPI_DIV_QR_THRESHOLD               84
-#define MU_BDIV_QR_THRESHOLD              1334
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  1,16,194,904,1167
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     102
-#define HGCD_APPR_THRESHOLD                 50
-#define HGCD_REDUCE_THRESHOLD             2681
-#define GCD_DC_THRESHOLD                   416
-#define GCDEXT_DC_THRESHOLD                293
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        26
-#define SET_STR_DC_THRESHOLD               248
-#define SET_STR_PRECOMPUTE_THRESHOLD      1160
-
-#define FAC_DSC_THRESHOLD                  746
-#define FAC_ODD_THRESHOLD                   44
diff --git a/gmp/mpn/x86_64/bobcat/mul_1.asm b/gmp/mpn/x86_64/bobcat/mul_1.asm
deleted file mode 100644
index cb58bef0b3..0000000000
--- a/gmp/mpn/x86_64/bobcat/mul_1.asm
+++ /dev/null
@@ -1,187 +0,0 @@
-dnl  AMD64 mpn_mul_1 optimised for AMD bobcat.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 4.5
-C AMD K10	 4.5
-C AMD bd1	 4.62
-C AMD bobcat	 5
-C Intel P4	14
-C Intel core2	 4.5
-C Intel NHM	 4.23
-C Intel SBR	 3.0
-C Intel atom	21
-C VIA nano	 4.94
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-C Standard parameters
-define(`rp',              `%rdi')
-define(`up',              `%rsi')
-define(`n_param',         `%rdx')
-define(`v0',              `%rcx')
-define(`cy',              `%r8')
-C Standard allocations
-define(`n',               `%rbx')
-define(`w0',              `%r8')
-define(`w1',              `%r9')
-define(`w2',              `%r10')
-define(`w3',              `%r11')
-
-C DOS64 parameters
-IFDOS(` define(`rp',      `%rcx')    ') dnl
-IFDOS(` define(`up',      `%rsi')    ') dnl
-IFDOS(` define(`n_param', `%r8')     ') dnl
-IFDOS(` define(`v0',      `%r9')     ') dnl
-IFDOS(` define(`cy',      `64(%rsp)')') dnl
-C DOS64 allocations
-IFDOS(` define(`n',       `%rbx')    ') dnl
-IFDOS(` define(`w0',      `%r8')     ') dnl
-IFDOS(` define(`w1',      `%rdi')    ') dnl
-IFDOS(` define(`w2',      `%r10')    ') dnl
-IFDOS(` define(`w3',      `%r11')    ') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_1c)
-IFDOS(`	push	%rsi		')
-IFDOS(`	push	%rdi		')
-IFDOS(`	mov	%rdx, %rsi	')
-	mov	cy, w2
-	jmp	L(com)
-EPILOGUE()
-
-PROLOGUE(mpn_mul_1)
-IFDOS(`	push	%rsi		')
-IFDOS(`	push	%rdi		')
-IFDOS(`	mov	%rdx, %rsi	')
-	xor	w2, w2
-L(com):	push	%rbx
-	mov	(up), %rax
-
-	lea	-16(rp,n_param,8), rp
-	lea	-16(up,n_param,8), up
-
-	mov	n_param, n
-	and	$3, R32(n_param)
-	jz	L(b0)
-	cmp	$2, R32(n_param)
-	ja	L(b3)
-	jz	L(b2)
-
-L(b1):	mul	v0
-	cmp	$1, n
-	jz	L(n1)
-	neg	n
-	add	$3, n
-	add	%rax, w2
-	mov	%rdx, w3
-	jmp	L(L1)
-L(n1):	add	%rax, w2
-	mov	%rdx, %rax
-	mov	w2, 8(rp)
-	adc	$0, %rax
-	pop	%rbx
-IFDOS(`	pop	%rdi		')
-IFDOS(`	pop	%rsi		')
-	ret
-
-L(b3):	mul	v0
-	neg	n
-	inc	n
-	add	%rax, w2
-	mov	%rdx, w3
-	jmp	L(L3)
-
-L(b0):	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	neg	n
-	add	$2, n
-	add	w2, w0
-	jmp	L(L0)
-
-L(b2):	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	neg	n
-	add	w2, w0
-	jmp	L(L2)
-
-	ALIGN(16)
-L(top):	mov	w0, -16(rp,n,8)
-	add	w1, w2
-L(L1):	adc	$0, w3
-	mov	0(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, -8(rp,n,8)
-	add	w3, w0
-L(L0):	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, 0(rp,n,8)
-	add	w1, w2
-L(L3):	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, 8(rp,n,8)
-	add	w3, w0
-L(L2):	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(top)
-
-L(end):	mov	w0, (rp)
-	add	w1, w2
-	adc	$0, w3
-	mov	w2, 8(rp)
-	mov	w3, %rax
-
-	pop	%rbx
-IFDOS(`	pop	%rdi		')
-IFDOS(`	pop	%rsi		')
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/mul_basecase.asm b/gmp/mpn/x86_64/bobcat/mul_basecase.asm
deleted file mode 100644
index e7d46bfcff..0000000000
--- a/gmp/mpn/x86_64/bobcat/mul_basecase.asm
+++ /dev/null
@@ -1,486 +0,0 @@
-dnl  AMD64 mpn_mul_basecase optimised for AMD bobcat.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 4.5
-C AMD K10	 4.5
-C AMD bd1	 4.75
-C AMD bobcat	 5
-C Intel P4	17.7
-C Intel core2	 5.5
-C Intel NHM	 5.43
-C Intel SBR	 3.92
-C Intel atom	23
-C VIA nano	 5.63
-
-C This mul_basecase is based on mul_1 and addmul_1, since these both run at the
-C multiply insn bandwidth, without any apparent loop branch exit pipeline
-C replays experienced on K8.  The structure is unusual: it falls into mul_1 in
-C the same way for all n, then it splits into 4 different wind-down blocks and
-C 4 separate addmul_1 loops.
-C
-C We have not tried using the same addmul_1 loops with a switch into feed-in
-C code, as we do in other basecase implementations.  Doing that could save
-C substantial code volume, but would also probably add some overhead.
-
-C TODO
-C  * Tune un < 3 code.
-C  * Fix slowdown for un=vn=3 (67->71) compared to default code.
-C  * This is 1263 bytes, compared to 1099 bytes for default code.  Consider
-C    combining addmul loops like that code.  Tolerable slowdown?
-C  * Lots of space could be saved by replacing the "switch" code by gradual
-C    jumps out from mul_1 winddown code, perhaps with no added overhead.
-C  * Are the ALIGN(16) really necessary?  They add about 25 bytes of padding.
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-C Standard parameters
-define(`rp',              `%rdi')
-define(`up',              `%rsi')
-define(`un_param',        `%rdx')
-define(`vp',              `%rcx')
-define(`vn',              `%r8')
-C Standard allocations
-define(`un',              `%rbx')
-define(`w0',              `%r10')
-define(`w1',              `%r11')
-define(`w2',              `%r12')
-define(`w3',              `%r13')
-define(`n',               `%rbp')
-define(`v0',              `%r9')
-
-C Temp macro for allowing control over indexing.
-C Define to return $1 for more conservative ptr handling.
-define(`X',`$2')
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-
-	mov	(up), %rax
-	mov	(vp), v0
-
-	cmp	$2, un_param
-	ja	L(ge3)
-	jz	L(u2)
-
-	mul	v0			C u0 x v0
-	mov	%rax, (rp)
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-
-L(u2):	mul	v0			C u0 x v0
-	mov	%rax, (rp)
-	mov	8(up), %rax
-	mov	%rdx, w0
-	mul	v0
-	add	%rax, w0
-	mov	%rdx, w1
-	adc	$0, w1
-	cmp	$1, R32(vn)
-	jnz	L(u2v2)
-	mov	w0, 8(rp)
-	mov	w1, 16(rp)
-	FUNC_EXIT()
-	ret
-
-L(u2v2):mov	8(vp), v0
-	mov	(up), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, 8(rp)
-	mov	%rdx, %r8		C CAUTION: r8 realloc
-	adc	$0, %r8
-	mov	8(up), %rax
-	mul	v0
-	add	w1, %r8
-	adc	$0, %rdx
-	add	%r8, %rax
-	adc	$0, %rdx
-	mov	%rax, 16(rp)
-	mov	%rdx, 24(rp)
-	FUNC_EXIT()
-	ret
-
-
-L(ge3):	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-
-	lea	8(vp), vp
-
-	lea	-24(rp,un_param,8), rp
-	lea	-24(up,un_param,8), up
-	xor	R32(un), R32(un)
-	mov	$2, R32(n)
-	sub	un_param, un
-	sub	un_param, n
-
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	jmp	L(L3)
-
-	ALIGN(16)
-L(top):	mov	w0, -16(rp,n,8)
-	add	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, -8(rp,n,8)
-	add	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, (rp,n,8)
-	add	w1, w2
-	adc	$0, w3
-L(L3):	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, 8(rp,n,8)
-	add	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(top)
-
-	mov	w0, -16(rp,n,8)
-	add	w1, w2
-	adc	$0, w3
-
-C Switch on n into right addmul_l loop
-	test	n, n
-	jz	L(r2)
-	cmp	$2, R32(n)
-	ja	L(r3)
-	jz	L(r0)
-	jmp	L(r1)
-
-
-L(r3):	mov	w2, X(-8(rp,n,8),16(rp))
-	mov	w3, X((rp,n,8),24(rp))
-	add	$2, un
-
-C outer loop(3)
-L(to3):	dec	vn
-	jz	L(ret)
-	mov	(vp), v0
-	mov	8(up,un,8), %rax
-	lea	8(vp), vp
-	lea	8(rp), rp
-	mov	un, n
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	jmp	L(al3)
-
-	ALIGN(16)
-L(ta3):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-L(al3):	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta3)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-	jmp	L(to3)
-
-
-L(r2):	mov	X(0(up,n,8),(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(-8(rp,n,8),-8(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	X(8(up,n,8),8(up)), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, X((rp,n,8),(rp))
-	add	w1, w2
-	adc	$0, w3
-	mov	X(16(up,n,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(8(rp,n,8),8(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	w0, X(16(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w1, X(24(rp,n,8),24(rp))
-	inc	un
-
-C outer loop(2)
-L(to2):	dec	vn
-	jz	L(ret)
-	mov	(vp), v0
-	mov	16(up,un,8), %rax
-	lea	8(vp), vp
-	lea	8(rp), rp
-	mov	un, n
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	jmp	L(al2)
-
-	ALIGN(16)
-L(ta2):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-L(al2):	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta2)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-	jmp	L(to2)
-
-
-L(r1):	mov	X(0(up,n,8),8(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(-8(rp,n,8),(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	X(8(up,n,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, X((rp,n,8),8(rp))
-	add	w1, w2
-	adc	$0, w3
-	mov	w2, X(8(rp,n,8),16(rp))
-	mov	w3, X(16(rp,n,8),24(rp))
-	add	$4, un
-
-C outer loop(1)
-L(to1):	dec	vn
-	jz	L(ret)
-	mov	(vp), v0
-	mov	-8(up,un,8), %rax
-	lea	8(vp), vp
-	lea	8(rp), rp
-	mov	un, n
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	jmp	L(al1)
-
-	ALIGN(16)
-L(ta1):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-L(al1):	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta1)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-	jmp	L(to1)
-
-
-L(r0):	mov	X((up,n,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(-8(rp,n,8),8(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	w0, X((rp,n,8),16(rp))
-	mov	w1, X(8(rp,n,8),24(rp))
-	add	$3, un
-
-C outer loop(0)
-L(to0):	dec	vn
-	jz	L(ret)
-	mov	(vp), v0
-	mov	(up,un,8), %rax
-	lea	8(vp), vp
-	lea	8(rp), rp
-	mov	un, n
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	jmp	L(al0)
-
-	ALIGN(16)
-L(ta0):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-L(al0):	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta0)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-	jmp	L(to0)
-
-
-L(ret):	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/bobcat/redc_1.asm b/gmp/mpn/x86_64/bobcat/redc_1.asm
deleted file mode 100644
index c3798021f7..0000000000
--- a/gmp/mpn/x86_64/bobcat/redc_1.asm
+++ /dev/null
@@ -1,502 +0,0 @@
-dnl  X86-64 mpn_redc_1 optimised for AMD bobcat.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bull	 ?
-C AMD pile	 ?
-C AMD steam	 ?
-C AMD bobcat	 5.0
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel IBR	 ?
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C TODO
-C  * Micro-optimise, none performed thus far.
-C  * Consider inlining mpn_add_n.
-C  * Single basecases out before the pushes.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv',       `%r8')    C stack
-
-define(`i',           `%r14')
-define(`j',           `%r15')
-define(`mp',          `%r12')
-define(`q0',          `%r13')
-define(`w0',          `%rbp')
-define(`w1',          `%r9')
-define(`w2',          `%r10')
-define(`w3',          `%r11')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), q0
-	mov	n, j			C outer loop induction var
-	lea	(mp_param,n,8), mp
-	lea	(up,n,8), up
-	neg	n
-	imul	u0inv, q0		C first iteration q0
-
-	test	$1, R8(n)
-	jz	L(bx0)
-
-L(bx1):	test	$2, R8(n)
-	jz	L(b3)
-
-L(b1):	cmp	$-1, R32(n)
-	jz	L(n1)
-
-L(otp1):lea	1(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	8(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, w1
-	add	(up,n,8), w2
-	adc	w3, %rbx
-	adc	$0, w1
-	mov	16(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e1)
-
-	ALIGNx
-L(tp1):	add	w0, -16(up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-L(e1):	mov	16(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, i
-	js	L(tp1)
-
-L(ed1):	add	w0, I(-16(up),-16(up,i,8))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, I(-8(up),-8(up,i,8))
-	adc	$0, w3
-	mov	w3, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp1)
-	jmp	L(cj)
-
-L(b3):	cmp	$-3, R32(n)
-	jz	L(n3)
-
-L(otp3):lea	3(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	8(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, w1
-	add	(up,n,8), w2
-	adc	w3, %rbx
-	adc	$0, w1
-	mov	16(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e3)
-
-	ALIGNx
-L(tp3):	add	w0, -16(up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-L(e3):	mov	(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, i
-	js	L(tp3)
-
-L(ed3):	add	w0, I(-16(up),-16(up,i,8))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, I(-8(up),-8(up,i,8))
-	adc	$0, w3
-	mov	w3, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp3)
-C	jmp	L(cj)
-
-L(cj):
-IFSTD(`	lea	(up,n,8), up		C param 2: up
-	lea	(up,n,8), %rdx		C param 3: up - n
-	neg	R32(n)		')	C param 4: n
-
-IFDOS(`	lea	(up,n,8), %rdx		C param 2: up
-	lea	(%rdx,n,8), %r8		C param 3: up - n
-	neg	R32(n)
-	mov	n, %r9			C param 4: n
-	mov	rp, %rcx	')	C param 1: rp
-
-	CALL(	mpn_add_n)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b2)
-
-L(b0):
-L(otp0):lea	(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	8(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, w3
-	add	(up,n,8), w0
-	adc	w1, %rbx
-	adc	$0, w3
-	mov	16(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e0)
-
-	ALIGNx
-L(tp0):	add	w0, -16(up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-L(e0):	mov	24(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, i
-	js	L(tp0)
-
-L(ed0):	add	w0, I(-16(up),-16(up,i,8))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, I(-8(up),-8(up,i,8))
-	adc	$0, w3
-	mov	w3, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp0)
-	jmp	L(cj)
-
-L(b2):	cmp	$-2, R32(n)
-	jz	L(n2)
-
-L(otp2):lea	2(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	8(mp,n,8), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, w3
-	add	(up,n,8), w0
-	adc	w1, %rbx
-	adc	$0, w3
-	mov	16(mp,n,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	8(up,n,8), %rbx
-	mov	%rbx, 8(up,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e2)
-
-	ALIGNx
-L(tp2):	add	w0, -16(up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-L(e2):	mov	8(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (up,i,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(up,i,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(mp,i,8), %rax
-	mul	q0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, i
-	js	L(tp2)
-
-L(ed2):	add	w0, I(-16(up),-16(up,i,8))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, I(-8(up),-8(up,i,8))
-	adc	$0, w3
-	mov	w3, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp2)
-	jmp	L(cj)
-
-L(n1):	mov	(mp_param), %rax
-	mul	q0
-	add	-8(up), %rax
-	adc	(up), %rdx
-	mov	%rdx, (rp)
-	mov	$0, R32(%rax)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-L(n2):	mov	(mp_param), %rax
-	mov	-16(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	-8(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, q0
-	imul	u0inv, q0		C next q0
-	mov	-16(mp), %rax
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	(up), %r14
-	mul	q0
-	add	%rax, %r14
-	adc	$0, %rdx
-	add	%r9, %r14
-	adc	$0, %rdx
-	xor	R32(%rax), R32(%rax)
-	add	%r11, %r14
-	adc	8(up), %rdx
-	mov	%r14, (rp)
-	mov	%rdx, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-	ALIGNx
-L(n3):	mov	-24(mp), %rax
-	mov	-24(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	-16(mp), %rax
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	-16(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	add	%r11, %rbp
-	mov	-8(up), %r10
-	adc	$0, %r9
-	mul	q0
-	mov	%rbp, q0
-	imul	u0inv, q0		C next q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	%rbp, -16(up)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, -8(up)
-	mov	%r11, -24(up)		C up[0]
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(n3)
-
-	mov	-48(up), %rdx
-	mov	-40(up), %rbx
-	xor	R32(%rax), R32(%rax)
-	add	%rbp, %rdx
-	adc	%r10, %rbx
-	adc	-8(up), %r11
-	mov	%rdx, (rp)
-	mov	%rbx, 8(rp)
-	mov	%r11, 16(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/bobcat/sqr_basecase.asm b/gmp/mpn/x86_64/bobcat/sqr_basecase.asm
deleted file mode 100644
index 0e417a1ebe..0000000000
--- a/gmp/mpn/x86_64/bobcat/sqr_basecase.asm
+++ /dev/null
@@ -1,565 +0,0 @@
-dnl  AMD64 mpn_sqr_basecase optimised for AMD bobcat.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 4.5
-C AMD K10	 4.5
-C AMD bd1	 4.75
-C AMD bobcat	 5
-C Intel P4	17.7
-C Intel core2	 5.5
-C Intel NHM	 5.43
-C Intel SBR	 3.92
-C Intel atom	23
-C VIA nano	 5.63
-
-C This sqr_basecase is based on mul_1 and addmul_1, since these both run at the
-C multiply insn bandwidth, without any apparent loop branch exit pipeline
-C replays experienced on K8.  The structure is unusual: it falls into mul_1 in
-C the same way for all n, then it splits into 4 different wind-down blocks and
-C 4 separate addmul_1 loops.
-C
-C We have not tried using the same addmul_1 loops with a switch into feed-in
-C code, as we do in other basecase implementations.  Doing that could save
-C substantial code volume, but would also probably add some overhead.
-
-C TODO
-C  * Tune un < 4 code.
-C  * Perhaps implement a larger final corner (it is now 2 x 1).
-C  * Lots of space could be saved by replacing the "switch" code by gradual
-C    jumps out from mul_1 winddown code, perhaps with no added overhead.
-C  * Are the ALIGN(16) really necessary?  They add about 25 bytes of padding.
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-C Standard parameters
-define(`rp',              `%rdi')
-define(`up',              `%rsi')
-define(`un_param',        `%rdx')
-C Standard allocations
-define(`un',              `%rbx')
-define(`w0',              `%r8')
-define(`w1',              `%r9')
-define(`w2',              `%r10')
-define(`w3',              `%r11')
-define(`n',               `%rbp')
-define(`v0',              `%rcx')
-
-C Temp macro for allowing control over indexing.
-C Define to return $1 for more conservative ptr handling.
-define(`X',`$2')
-dnl define(`X',`$1')
-
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_sqr_basecase)
-	FUNC_ENTRY(3)
-
-	mov	(up), %rax
-
-	cmp	$2, R32(un_param)
-	jae	L(ge2)
-
-	mul	%rax
-	mov	%rax, (rp)
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-
-L(ge2):	mov	(up), v0
-	jnz	L(g2)
-
-	mul	%rax
-	mov	%rax, (rp)
-	mov	8(up), %rax
-	mov	%rdx, w0
-	mul	v0
-	add	%rax, w0
-	mov	%rdx, w1
-	adc	$0, w1
-	mov	8(up), v0
-	mov	(up), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, 8(rp)
-	mov	%rdx, w0		C CAUTION: r8 realloc
-	adc	$0, w0
-	mov	8(up), %rax
-	mul	v0
-	add	w1, w0
-	adc	$0, %rdx
-	add	w0, %rax
-	adc	$0, %rdx
-	mov	%rax, 16(rp)
-	mov	%rdx, 24(rp)
-	FUNC_EXIT()
-	ret
-
-L(g2):	cmp	$3, R32(un_param)
-	ja	L(g3)
-	mul	%rax
-	mov	%rax, (rp)
-	mov	%rdx, 8(rp)
-	mov	8(up), %rax
-	mul	%rax
-	mov	%rax, 16(rp)
-	mov	%rdx, 24(rp)
-	mov	16(up), %rax
-	mul	%rax
-	mov	%rax, 32(rp)
-	mov	%rdx, 40(rp)
-
-	mov	(up), v0
-	mov	8(up), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	16(up), %rax
-	mul	v0
-	xor	R32(w2), R32(w2)
-	add	%rax, w1
-	adc	%rdx, w2
-
-	mov	8(up), v0
-	mov	16(up), %rax
-	mul	v0
-	xor	R32(w3), R32(w3)
-	add	%rax, w2
-	adc	%rdx, w3
-	add	w0, w0
-	adc	w1, w1
-	adc	w2, w2
-	adc	w3, w3
-	mov	$0, R32(v0)
-	adc	v0, v0
-	add	w0, 8(rp)
-	adc	w1, 16(rp)
-	adc	w2, 24(rp)
-	adc	w3, 32(rp)
-	adc	v0, 40(rp)
-	FUNC_EXIT()
-	ret
-
-L(g3):	push	%rbx
-	push	%rbp
-
-	mov	8(up), %rax
-	lea	-24(rp,un_param,8), rp
-	lea	-24(up,un_param,8), up
-	neg	un_param
-	push	un_param		C for sqr_diag_addlsh1
-	lea	(un_param), un
-	lea	3(un_param), n
-
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	jmp	L(L3)
-
-	ALIGN(16)
-L(top):	mov	w0, -16(rp,n,8)
-	add	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, -8(rp,n,8)
-	add	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, (rp,n,8)
-	add	w1, w2
-	adc	$0, w3
-L(L3):	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, 8(rp,n,8)
-	add	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(top)
-
-	mov	w0, -16(rp,n,8)
-	add	w1, w2
-	adc	$0, w3
-
-	test	n, n
-	jz	L(r2)
-	cmp	$2, R32(n)
-	ja	L(r3)
-	jz	L(r0)
-
-
-L(r1):	mov	X((up,n,8),8(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(-8(rp,n,8),(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	X(8(up,n,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, X((rp,n,8),8(rp))
-	add	w1, w2
-	adc	$0, w3
-	mov	w2, X(8(rp,n,8),16(rp))
-	mov	w3, X(16(rp,n,8),24(rp))
-	add	$5, un
-	jmp	L(to0)
-
-L(r2):	mov	X((up,n,8),(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(-8(rp,n,8),-8(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	X(8(up,n,8),8(up)), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	w0, X((rp,n,8),(rp))
-	add	w1, w2
-	adc	$0, w3
-	mov	X(16(up,n,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(8(rp,n,8),8(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	w0, X(16(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w1, X(24(rp,n,8),24(rp))
-	add	$6, un
-	jmp	L(to1)
-
-L(r3):	mov	w2, X(-8(rp,n,8),16(rp))
-	mov	w3, X((rp,n,8),24(rp))
-	add	$3, un
-	jmp	L(to2)
-
-L(r0):	mov	X((up,n,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	w2, X(-8(rp,n,8),8(rp))
-	add	w3, w0
-	adc	$0, w1
-	mov	w0, X((rp,n,8),16(rp))
-	mov	w1, X(8(rp,n,8),24(rp))
-	add	$4, un
-C	jmp	L(to3)
-C fall through into main loop
-
-
-L(outer):
-	mov	un, n
-	mov	(up,un,8), v0
-	mov	8(up,un,8), %rax
-	lea	8(rp), rp
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	jmp	L(al3)
-
-	ALIGN(16)
-L(ta3):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-L(al3):	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta3)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-
-
-L(to2):	mov	un, n
-	cmp	$-4, R32(un)
-	jnc	L(end)
-	add	$4, un
-	mov	8(up,n,8), v0
-	mov	16(up,n,8), %rax
-	lea	8(rp), rp
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	jmp	L(al2)
-
-	ALIGN(16)
-L(ta2):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-L(al2):	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta2)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-
-
-L(to1):	mov	un, n
-	mov	-16(up,un,8), v0
-	mov	-8(up,un,8), %rax
-	lea	8(rp), rp
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	jmp	L(al1)
-
-	ALIGN(16)
-L(ta1):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-L(al1):	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta1)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-
-
-L(to0):	mov	un, n
-	mov	-8(up,un,8), v0
-	mov	(up,un,8), %rax
-	lea	8(rp), rp
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	jmp	L(al0)
-
-	ALIGN(16)
-L(ta0):	add	w0, -16(rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, -8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-L(al0):	mov	8(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, (rp,n,8)
-	adc	w1, w2
-	adc	$0, w3
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	add	w2, 8(rp,n,8)
-	adc	w3, w0
-	adc	$0, w1
-	mov	24(up,n,8), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	$4, n
-	js	L(ta0)
-
-	add	w0, X(-16(rp,n,8),8(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(-8(rp,n,8),16(rp))
-	adc	$0, w3
-	mov	w3, X((rp,n,8),24(rp))
-	jmp	L(outer)
-
-
-L(end):	mov	X(8(up,un,8),(up)), v0
-	mov	X(16(up,un,8),8(up)), %rax
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	X(24(up,un,8),16(up)), %rax
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w3
-	add	w0, X(24(rp,un,8),16(rp))
-	adc	w1, w2
-	adc	$0, w3
-	add	w2, X(32(rp,un,8),24(rp))
-	adc	$0, w3
-	mov	X(16(up,un,8),8(up)), v0
-	mov	X(24(up,un,8),16(up)), %rax
-	mul	v0
-	add	%rax, w3
-	mov	w3, X(40(rp,un,8),32(rp))
-	adc	$0, %rdx
-	mov	%rdx, X(48(rp,un,8),40(rp))
-
-
-C sqr_diag_addlsh1
-
-	lea	16(up), up
-	lea	40(rp), rp
-	pop	n
-	lea	2(n,n), n
-
-	mov	(up,n,4), %rax
-	mul	%rax
-	xor	R32(w2), R32(w2)
-
-	mov	8(rp,n,8), w0
-	mov	%rax, (rp,n,8)
-	jmp	L(lm)
-
-	ALIGN(8)
-L(tsd):	add	%rbx, w0
-	adc	%rax, w1
-	mov	w0, -8(rp,n,8)
-	mov	8(rp,n,8), w0
-	mov	w1, (rp,n,8)
-L(lm):	mov	16(rp,n,8), w1
-	adc	w0, w0
-	adc	w1, w1
-	lea	(%rdx,w2), %rbx
-	mov	8(up,n,4), %rax
-	setc	R8(w2)
-	mul	%rax
-	add	$2, n
-	js	L(tsd)
-
-L(esd):	add	%rbx, w0
-	adc	%rax, w1
-	mov	w0, X(-8(rp,n,8),-8(rp))
-	mov	w1, X((rp,n,8),(rp))
-	adc	w2, %rdx
-	mov	%rdx, X(8(rp,n,8),8(rp))
-
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/cnd_aors_n.asm b/gmp/mpn/x86_64/cnd_aors_n.asm
deleted file mode 100644
index 13a2ab3be9..0000000000
--- a/gmp/mpn/x86_64/cnd_aors_n.asm
+++ /dev/null
@@ -1,183 +0,0 @@
-dnl  AMD64 mpn_cnd_add_n, mpn_cnd_sub_n
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 2
-C AMD K10	 2
-C AMD bd1	 2.32
-C AMD bobcat	 3
-C Intel P4	13
-C Intel core2	 2.9
-C Intel NHM	 2.8
-C Intel SBR	 2.4
-C Intel atom	 5.33
-C VIA nano	 3
-
-C NOTES
-C  * It might seem natural to use the cmov insn here, but since this function
-C    is supposed to have the exact same execution pattern for cnd true and
-C    false, and since cmov's documentation is not clear about whether it
-C    actually reads both source operands and writes the register for a false
-C    condition, we cannot use it.
-C  * Two cases could be optimised: (1) cnd_add_n could use ADCSBB-from-memory
-C    to save one insn/limb, and (2) when up=rp cnd_add_n and cnd_sub_n could use
-C    ADCSBB-to-memory, again saving 1 insn/limb.
-C  * This runs optimally at decoder bandwidth on K10.  It has not been tuned
-C    for any other processor.
-
-C INPUT PARAMETERS
-define(`cnd',	`%rdi')	dnl rcx
-define(`rp',	`%rsi')	dnl rdx
-define(`up',	`%rdx')	dnl r8
-define(`vp',	`%rcx')	dnl r9
-define(`n',	`%r8')	dnl rsp+40
-
-ifdef(`OPERATION_cnd_add_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_cnd_add_n)')
-ifdef(`OPERATION_cnd_sub_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_cnd_sub_n)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), R32(%r8)')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-
-	neg	cnd
-	sbb	cnd, cnd		C make cnd mask
-
-	lea	(vp,n,8), vp
-	lea	(up,n,8), up
-	lea	(rp,n,8), rp
-
-	mov	R32(n), R32(%rax)
-	neg	n
-	and	$3, R32(%rax)
-	jz	L(top)			C carry-save reg rax = 0 in this arc
-	cmp	$2, R32(%rax)
-	jc	L(b1)
-	jz	L(b2)
-
-L(b3):	mov	(vp,n,8), %r12
-	mov	8(vp,n,8), %r13
-	mov	16(vp,n,8), %r14
-	and	cnd, %r12
-	mov	(up,n,8), %r10
-	and	cnd, %r13
-	mov	8(up,n,8), %rbx
-	and	cnd, %r14
-	mov	16(up,n,8), %rbp
-	ADDSUB	%r12, %r10
-	mov	%r10, (rp,n,8)
-	ADCSBB	%r13, %rbx
-	mov	%rbx, 8(rp,n,8)
-	ADCSBB	%r14, %rbp
-	mov	%rbp, 16(rp,n,8)
-	sbb	R32(%rax), R32(%rax)	C save carry
-	add	$3, n
-	js	L(top)
-	jmp	L(end)
-
-L(b2):	mov	(vp,n,8), %r12
-	mov	8(vp,n,8), %r13
-	mov	(up,n,8), %r10
-	and	cnd, %r12
-	mov	8(up,n,8), %rbx
-	and	cnd, %r13
-	ADDSUB	%r12, %r10
-	mov	%r10, (rp,n,8)
-	ADCSBB	%r13, %rbx
-	mov	%rbx, 8(rp,n,8)
-	sbb	R32(%rax), R32(%rax)	C save carry
-	add	$2, n
-	js	L(top)
-	jmp	L(end)
-
-L(b1):	mov	(vp,n,8), %r12
-	mov	(up,n,8), %r10
-	and	cnd, %r12
-	ADDSUB	%r12, %r10
-	mov	%r10, (rp,n,8)
-	sbb	R32(%rax), R32(%rax)	C save carry
-	add	$1, n
-	jns	L(end)
-
-	ALIGN(16)
-L(top):	mov	(vp,n,8), %r12
-	mov	8(vp,n,8), %r13
-	mov	16(vp,n,8), %r14
-	mov	24(vp,n,8), %r11
-	and	cnd, %r12
-	mov	(up,n,8), %r10
-	and	cnd, %r13
-	mov	8(up,n,8), %rbx
-	and	cnd, %r14
-	mov	16(up,n,8), %rbp
-	and	cnd, %r11
-	mov	24(up,n,8), %r9
-	add	R32(%rax), R32(%rax)	C restore carry
-	ADCSBB	%r12, %r10
-	mov	%r10, (rp,n,8)
-	ADCSBB	%r13, %rbx
-	mov	%rbx, 8(rp,n,8)
-	ADCSBB	%r14, %rbp
-	mov	%rbp, 16(rp,n,8)
-	ADCSBB	%r11, %r9
-	mov	%r9, 24(rp,n,8)
-	sbb	R32(%rax), R32(%rax)	C save carry
-	add	$4, n
-	js	L(top)
-
-L(end):	neg	R32(%rax)
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/com.asm b/gmp/mpn/x86_64/com.asm
deleted file mode 100644
index 006acaf648..0000000000
--- a/gmp/mpn/x86_64/com.asm
+++ /dev/null
@@ -1,95 +0,0 @@
-dnl  AMD64 mpn_com.
-
-dnl  Copyright 2004-2006, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	    cycles/limb
-C AMD K8,K9	 1.25
-C AMD K10	 1.25
-C Intel P4	 2.78
-C Intel core2	 1.1
-C Intel corei	 1.5
-C Intel atom	 ?
-C VIA nano	 2
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_com)
-	FUNC_ENTRY(3)
-	movq	(up), %r8
-	movl	R32(%rdx), R32(%rax)
-	leaq	(up,n,8), up
-	leaq	(rp,n,8), rp
-	negq	n
-	andl	$3, R32(%rax)
-	je	L(b00)
-	cmpl	$2, R32(%rax)
-	jc	L(b01)
-	je	L(b10)
-
-L(b11):	notq	%r8
-	movq	%r8, (rp,n,8)
-	decq	n
-	jmp	L(e11)
-L(b10):	addq	$-2, n
-	jmp	L(e10)
-	.byte	0x90,0x90,0x90,0x90,0x90,0x90
-L(b01):	notq	%r8
-	movq	%r8, (rp,n,8)
-	incq	n
-	jz	L(ret)
-
-L(oop):	movq	(up,n,8), %r8
-L(b00):	movq	8(up,n,8), %r9
-	notq	%r8
-	notq	%r9
-	movq	%r8, (rp,n,8)
-	movq	%r9, 8(rp,n,8)
-L(e11):	movq	16(up,n,8), %r8
-L(e10):	movq	24(up,n,8), %r9
-	notq	%r8
-	notq	%r9
-	movq	%r8, 16(rp,n,8)
-	movq	%r9, 24(rp,n,8)
-	addq	$4, n
-	jnc	L(oop)
-L(ret):	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/com_n.asm b/gmp/mpn/x86_64/com_n.asm
new file mode 100644
index 0000000000..fba9384642
--- /dev/null
+++ b/gmp/mpn/x86_64/com_n.asm
@@ -0,0 +1,77 @@
+dnl  AMD64 mpn_com_n.
+
+dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	    cycles/limb
+C K8,K9:	1.25
+C K10:		1.25
+C P4:		2.78
+C P6-15:	1.1
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_com_n)
+	movq	(up), %r8
+	movl	%edx, %eax
+	leaq	(up,n,8), up
+	leaq	(rp,n,8), rp
+	negq	n
+	andl	$3, %eax
+	je	L(b00)
+	cmpl	$2, %eax
+	jc	L(b01)
+	je	L(b10)
+
+L(b11):	notq	%r8
+	movq	%r8, (rp,n,8)
+	decq	n
+	jmp	L(e11)
+L(b10):	addq	$-2, n
+	jmp	L(e10)
+	.byte	0x90,0x90,0x90,0x90,0x90,0x90
+L(b01):	notq	%r8
+	movq	%r8, (rp,n,8)
+	incq	n
+	jz	L(ret)
+
+L(oop):	movq	(up,n,8), %r8
+L(b00):	movq	8(up,n,8), %r9
+	notq	%r8
+	notq	%r9
+	movq	%r8, (rp,n,8)
+	movq	%r9, 8(rp,n,8)
+L(e11):	movq	16(up,n,8), %r8
+L(e10):	movq	24(up,n,8), %r9
+	notq	%r8
+	notq	%r9
+	movq	%r8, 16(rp,n,8)
+	movq	%r9, 24(rp,n,8)
+	addq	$4, n
+	jnc	L(oop)
+L(ret):	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86_64/copyd.asm b/gmp/mpn/x86_64/copyd.asm
index a5e6e595e7..759b11d3ed 100644
--- a/gmp/mpn/x86_64/copyd.asm
+++ b/gmp/mpn/x86_64/copyd.asm
@@ -1,93 +1,74 @@
 dnl  AMD64 mpn_copyd -- copy limb vector, decrementing.
 
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C	     cycles/limb
-C AMD K8,K9	 1
-C AMD K10	 1
-C AMD bd1	 1.36
-C AMD bobcat	 1.71
-C Intel P4	 2-3
-C Intel core2	 1
-C Intel NHM	 1
-C Intel SBR	 1
-C Intel atom	 2
-C VIA nano	 2
 
+C	    cycles/limb
+C K8,K9:	1
+C K10:		1
+C P4:		2.8
+C P6-15:	1.2
 
-IFSTD(`define(`rp',`%rdi')')
-IFSTD(`define(`up',`%rsi')')
-IFSTD(`define(`n', `%rdx')')
 
-IFDOS(`define(`rp',`%rcx')')
-IFDOS(`define(`up',`%rdx')')
-IFDOS(`define(`n', `%r8')')
+C INPUT PARAMETERS
+C rp	rdi
+C up	rsi
+C n	rdx
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
 
 ASM_START()
 	TEXT
-	ALIGN(64)
+	ALIGN(16)
 PROLOGUE(mpn_copyd)
-	lea	-8(up,n,8), up
-	lea	(rp,n,8), rp
-	sub	$4, n
+	leaq	-8(up,n,8), up
+	leaq	(rp,n,8), rp
+	subq	$4, n
 	jc	L(end)
-	nop
-
-L(top):	mov	(up), %rax
-	mov	-8(up), %r9
-	lea	-32(rp), rp
-	mov	-16(up), %r10
-	mov	-24(up), %r11
-	lea	-32(up), up
-	mov	%rax, 24(rp)
-	mov	%r9, 16(rp)
-	sub	$4, n
-	mov	%r10, 8(rp)
-	mov	%r11, (rp)
-	jnc	L(top)
+	ALIGN(16)
+L(oop):	movq	(up), %r8
+	movq	-8(up), %r9
+	leaq	-32(rp), rp
+	movq	-16(up), %r10
+	movq	-24(up), %r11
+	leaq	-32(up), up
+	movq	%r8, 24(rp)
+	movq	%r9, 16(rp)
+	subq	$4, n
+	movq	%r10, 8(rp)
+	movq	%r11, (rp)
+	jnc	L(oop)
 
-L(end):	shr	R32(n)
+L(end):	shrl	%edx			C edx = lowpart(n)
 	jnc	1f
-	mov	(up), %rax
-	mov	%rax, -8(rp)
-	lea	-8(rp), rp
-	lea	-8(up), up
-1:	shr	R32(n)
+	movq	(up), %r8
+	movq	%r8, -8(rp)
+	leaq	-8(rp), rp
+	leaq	-8(up), up
+1:	shrl	%edx			C edx = lowpart(n)
 	jnc	1f
-	mov	(up), %rax
-	mov	-8(up), %r9
-	mov	%rax, -8(rp)
-	mov	%r9, -16(rp)
+	movq	(up), %r8
+	movq	-8(up), %r9
+	movq	%r8, -8(rp)
+	movq	%r9, -16(rp)
 1:	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/copyi.asm b/gmp/mpn/x86_64/copyi.asm
index bafce7a09e..506142be79 100644
--- a/gmp/mpn/x86_64/copyi.asm
+++ b/gmp/mpn/x86_64/copyi.asm
@@ -1,92 +1,73 @@
 dnl  AMD64 mpn_copyi -- copy limb vector, incrementing.
 
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-C	     cycles/limb
-C AMD K8,K9	 1
-C AMD K10	 1
-C AMD bd1	 1.36
-C AMD bobcat	 1.71
-C Intel P4	 2-3
-C Intel core2	 1
-C Intel NHM	 1
-C Intel SBR	 1
-C Intel atom	 2
-C VIA nano	 2
 
+C	    cycles/limb
+C K8,K9:	1
+C K10:		1
+C P4:		2.8
+C P6-15:	1.2
 
-IFSTD(`define(`rp',`%rdi')')
-IFSTD(`define(`up',`%rsi')')
-IFSTD(`define(`n', `%rdx')')
 
-IFDOS(`define(`rp',`%rcx')')
-IFDOS(`define(`up',`%rdx')')
-IFDOS(`define(`n', `%r8')')
+C INPUT PARAMETERS
+C rp	rdi
+C up	rsi
+C n	rdx
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
 
 ASM_START()
 	TEXT
-	ALIGN(64)
-	.byte	0,0,0,0,0,0
+	ALIGN(16)
 PROLOGUE(mpn_copyi)
-	lea	-8(rp), rp
-	sub	$4, n
+	leaq	-8(rp), rp
+	subq	$4, n
 	jc	L(end)
+	ALIGN(16)
+L(oop):	movq	(up), %r8
+	movq	8(up), %r9
+	leaq	32(rp), rp
+	movq	16(up), %r10
+	movq	24(up), %r11
+	leaq	32(up), up
+	movq	%r8, -24(rp)
+	movq	%r9, -16(rp)
+	subq	$4, n
+	movq	%r10, -8(rp)
+	movq	%r11, (rp)
+	jnc	L(oop)
 
-L(top):	mov	(up), %rax
-	mov	8(up), %r9
-	lea	32(rp), rp
-	mov	16(up), %r10
-	mov	24(up), %r11
-	lea	32(up), up
-	mov	%rax, -24(rp)
-	mov	%r9, -16(rp)
-	sub	$4, n
-	mov	%r10, -8(rp)
-	mov	%r11, (rp)
-	jnc	L(top)
-
-L(end):	shr	R32(n)
+L(end):	shrl	%edx			C edx = lowpart(n)
 	jnc	1f
-	mov	(up), %rax
-	mov	%rax, 8(rp)
-	lea	8(rp), rp
-	lea	8(up), up
-1:	shr	R32(n)
+	movq	(up), %r8
+	movq	%r8, 8(rp)
+	leaq	8(rp), rp
+	leaq	8(up), up
+1:	shrl	%edx			C edx = lowpart(n)
 	jnc	1f
-	mov	(up), %rax
-	mov	8(up), %r9
-	mov	%rax, 8(rp)
-	mov	%r9, 16(rp)
+	movq	(up), %r8
+	movq	8(up), %r9
+	movq	%r8, 8(rp)
+	movq	%r9, 16(rp)
 1:	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/aorrlsh1_n.asm b/gmp/mpn/x86_64/core2/aorrlsh1_n.asm
deleted file mode 100644
index 7066bb4372..0000000000
--- a/gmp/mpn/x86_64/core2/aorrlsh1_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-define(RSH, 63)
-
-ifdef(`OPERATION_addlsh1_n', `
-	define(ADDSUB,	add)
-	define(ADCSBB,	adc)
-	define(func,	mpn_addlsh1_n)')
-ifdef(`OPERATION_rsblsh1_n', `
-	define(ADDSUB,	sub)
-	define(ADCSBB,	sbb)
-	define(func,	mpn_rsblsh1_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-include_mpn(`x86_64/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86_64/core2/aorrlsh2_n.asm b/gmp/mpn/x86_64/core2/aorrlsh2_n.asm
deleted file mode 100644
index 5065120857..0000000000
--- a/gmp/mpn/x86_64/core2/aorrlsh2_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
-dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 62)
-
-ifdef(`OPERATION_addlsh2_n', `
-	define(ADDSUB,	add)
-	define(ADCSBB,	adc)
-	define(func,	mpn_addlsh2_n)')
-ifdef(`OPERATION_rsblsh2_n', `
-	define(ADDSUB,	sub)
-	define(ADCSBB,	sbb)
-	define(func,	mpn_rsblsh2_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-include_mpn(`x86_64/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86_64/core2/aorrlsh_n.asm b/gmp/mpn/x86_64/core2/aorrlsh_n.asm
deleted file mode 100644
index 57abf31579..0000000000
--- a/gmp/mpn/x86_64/core2/aorrlsh_n.asm
+++ /dev/null
@@ -1,38 +0,0 @@
-dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-include_mpn(`x86_64/coreinhm/aorrlsh_n.asm')
diff --git a/gmp/mpn/x86_64/core2/aors_err1_n.asm b/gmp/mpn/x86_64/core2/aors_err1_n.asm
deleted file mode 100644
index 3f875aefa4..0000000000
--- a/gmp/mpn/x86_64/core2/aors_err1_n.asm
+++ /dev/null
@@ -1,225 +0,0 @@
-dnl  Core 2 mpn_add_err1_n, mpn_sub_err1_n
-
-dnl  Contributed by David Harvey.
-
-dnl  Copyright 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 4.14
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`ep',	`%rcx')
-define(`yp',	`%r8')
-define(`n',	`%r9')
-define(`cy_param',	`8(%rsp)')
-
-define(`el',	`%rbx')
-define(`eh',	`%rbp')
-define(`t0',	`%r10')
-define(`t1',	`%r11')
-define(`t2',	`%r12')
-define(`t3',	`%r13')
-define(`w0',	`%r14')
-define(`w1',	`%r15')
-
-ifdef(`OPERATION_add_err1_n', `
-	define(ADCSBB,	      adc)
-	define(func,	      mpn_add_err1_n)')
-ifdef(`OPERATION_sub_err1_n', `
-	define(ADCSBB,	      sbb)
-	define(func,	      mpn_sub_err1_n)')
-
-MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	mov	cy_param, %rax
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	lea	(rp,n,8), rp
-
-	mov	R32(n), R32(%r10)
-	and	$3, R32(%r10)
-	jz	L(0mod4)
-	cmp	$2, R32(%r10)
-	jc	L(1mod4)
-	jz	L(2mod4)
-L(3mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	xor	R32(t0), R32(t0)
-	xor	R32(t1), R32(t1)
-	lea	-24(yp,n,8), yp
-	neg	n
-
-	shr	$1, %al		   C restore carry
-	mov	(up,n,8), w0
-	mov	8(up,n,8), w1
-	ADCSBB	(vp,n,8), w0
-	mov	w0, (rp,n,8)
-	cmovc	16(yp), el
-	ADCSBB	8(vp,n,8), w1
-	mov	w1, 8(rp,n,8)
-	cmovc	8(yp), t0
-	mov	16(up,n,8), w0
-	ADCSBB	16(vp,n,8), w0
-	mov	w0, 16(rp,n,8)
-	cmovc	(yp), t1
-	setc	%al		   C save carry
-	add	t0, el
-	adc	$0, eh
-	add	t1, el
-	adc	$0, eh
-
-	add	$3, n
-	jnz	L(loop)
-	jmp	L(end)
-
-	ALIGN(16)
-L(0mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	lea	(yp,n,8), yp
-	neg	n
-	jmp	L(loop)
-
-	ALIGN(16)
-L(1mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	lea	-8(yp,n,8), yp
-	neg	n
-
-	shr	$1, %al		   C restore carry
-	mov	(up,n,8), w0
-	ADCSBB	(vp,n,8), w0
-	mov	w0, (rp,n,8)
-	cmovc	(yp), el
-	setc	%al		   C save carry
-
-	add	$1, n
-	jnz	L(loop)
-	jmp	L(end)
-
-	ALIGN(16)
-L(2mod4):
-	xor	R32(el), R32(el)
-	xor	R32(eh), R32(eh)
-	xor	R32(t0), R32(t0)
-	lea	-16(yp,n,8), yp
-	neg	n
-
-	shr	$1, %al		   C restore carry
-	mov	(up,n,8), w0
-	mov	8(up,n,8), w1
-	ADCSBB	(vp,n,8), w0
-	mov	w0, (rp,n,8)
-	cmovc	8(yp), el
-	ADCSBB	8(vp,n,8), w1
-	mov	w1, 8(rp,n,8)
-	cmovc	(yp), t0
-	setc	%al		   C save carry
-	add	t0, el
-	adc	$0, eh
-
-	add	$2, n
-	jnz	L(loop)
-	jmp	L(end)
-
-	ALIGN(32)
-L(loop):
-	mov	(up,n,8), w0
-	shr	$1, %al		   C restore carry
-	mov	-8(yp), t0
-	mov	$0, R32(t3)
-	ADCSBB	(vp,n,8), w0
-	cmovnc	t3, t0
-	mov	w0, (rp,n,8)
-	mov	8(up,n,8), w1
-	mov	16(up,n,8), w0
-	ADCSBB	8(vp,n,8), w1
-	mov	-16(yp), t1
-	cmovnc	t3, t1
-	mov	-24(yp), t2
-	mov	w1, 8(rp,n,8)
-	ADCSBB	16(vp,n,8), w0
-	cmovnc	t3, t2
-	mov	24(up,n,8), w1
-	ADCSBB	24(vp,n,8), w1
-	cmovc	-32(yp), t3
-	setc	%al		   C save carry
-	add	t0, el
-	adc	$0, eh
-	add	t1, el
-	adc	$0, eh
-	add	t2, el
-	adc	$0, eh
-	lea	-32(yp), yp
-	mov	w0, 16(rp,n,8)
-	add	t3, el
-	adc	$0, eh
-	add	$4, n
-	mov	w1, -8(rp,n,8)
-	jnz	L(loop)
-
-L(end):
-	mov	el, (ep)
-	mov	eh, 8(ep)
-
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/aors_n.asm b/gmp/mpn/x86_64/core2/aors_n.asm
index 74a1bce48a..d26af866f9 100644
--- a/gmp/mpn/x86_64/core2/aors_n.asm
+++ b/gmp/mpn/x86_64/core2/aors_n.asm
@@ -1,45 +1,30 @@
-dnl  Intel mpn_add_n/mpn_sub_n optimised for Conroe, Nehalem.
+dnl  Intel P6-15 mpn_add_n/mpn_sub_n -- mpn add or subtract.
 
-dnl  Copyright 2006, 2007, 2011-2013 Free Software Foundation, Inc.
+dnl  Copyright 2006, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2
-C AMD K10	 2
-C Intel P4	10
-C Intel core2	 2
-C Intel NHM	 2
-C Intel SBR	 2
-C Intel atom	 9
-C VIA nano	 3
+C K8,K9:	 2.25
+C K10:		 2
+C P4:		10
+C P6-15:	 2.05
 
 C INPUT PARAMETERS
 define(`rp',	`%rdi')
@@ -59,83 +44,80 @@ ifdef(`OPERATION_sub_n', `
 
 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
+
 	TEXT
 	ALIGN(16)
+
+PROLOGUE(func_nc)
+	jmp	L(start)
+EPILOGUE()
+
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	xor	%r8, %r8
 L(start):
 	mov	(up), %r10
 	mov	(vp), %r11
 
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	lea	(rp,n,8), rp
-	mov	R32(n), R32(%rax)
+	lea	-8(up,n,8), up
+	lea	-8(vp,n,8), vp
+	lea	-16(rp,n,8), rp
+	mov	%ecx, %eax
 	neg	n
-	and	$3, R32(%rax)
+	and	$3, %eax
 	je	L(b00)
-	add	%rax, n			C clear low rcx bits for jrcxz
-	cmp	$2, R32(%rax)
+	add	%rax, n		C clear low rcx bits for jrcxz
+	cmp	$2, %eax
 	jl	L(b01)
 	je	L(b10)
 
-L(b11):	neg	%r8			C set cy
+L(b11):	shr	%r8			C set cy
 	jmp	L(e11)
 
-L(b00):	neg	%r8			C set cy
+L(b00):	shr	%r8			C set cy
 	mov	%r10, %r8
 	mov	%r11, %r9
 	lea	4(n), n
 	jmp	L(e00)
 
-	nop
-	nop
-	nop
-L(b01):	neg	%r8			C set cy
-	jmp	L(top)
+L(b01):	shr	%r8			C set cy
+	jmp	L(e01)
 
-L(b10):	neg	%r8			C set cy
+L(b10):	shr	%r8			C set cy
 	mov	%r10, %r8
 	mov	%r11, %r9
 	jmp	L(e10)
 
 L(end):	ADCSBB	%r11, %r10
-	mov	%r10, -8(rp)
-	mov	R32(%rcx), R32(%rax)	C clear eax, ecx contains 0
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
+	mov	%r10, 8(rp)
+	mov	%ecx, %eax		C clear eax, ecx contains 0
+	adc	%eax, %eax
 	ret
 
 	ALIGN(16)
-L(top):	jrcxz	L(end)
-	mov	(up,n,8), %r8
-	mov	(vp,n,8), %r9
-	lea	4(n), n
-	ADCSBB	%r11, %r10
-	mov	%r10, -40(rp,n,8)
-L(e00):	mov	-24(up,n,8), %r10
-	mov	-24(vp,n,8), %r11
-	ADCSBB	%r9, %r8
-	mov	%r8, -32(rp,n,8)
-L(e11):	mov	-16(up,n,8), %r8
-	mov	-16(vp,n,8), %r9
+L(top):
+	mov	-24(up,n,8), %r8
+	mov	-24(vp,n,8), %r9
 	ADCSBB	%r11, %r10
 	mov	%r10, -24(rp,n,8)
-L(e10):	mov	-8(up,n,8), %r10
-	mov	-8(vp,n,8), %r11
+L(e00):
+	mov	-16(up,n,8), %r10
+	mov	-16(vp,n,8), %r11
 	ADCSBB	%r9, %r8
 	mov	%r8, -16(rp,n,8)
+L(e11):
+	mov	-8(up,n,8), %r8
+	mov	-8(vp,n,8), %r9
+	ADCSBB	%r11, %r10
+	mov	%r10, -8(rp,n,8)
+L(e10):
+	mov	(up,n,8), %r10
+	mov	(vp,n,8), %r11
+	ADCSBB	%r9, %r8
+	mov	%r8, (rp,n,8)
+L(e01):
+	jrcxz	L(end)
+	lea	4(n), n
 	jmp	L(top)
-EPILOGUE()
 
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	jmp	L(start)
 EPILOGUE()
-
diff --git a/gmp/mpn/x86_64/core2/sublshC_n.asm b/gmp/mpn/x86_64/core2/aorslsh1_n.asm
index 5acc46b032..18db7c96f8 100644
--- a/gmp/mpn/x86_64/core2/sublshC_n.asm
+++ b/gmp/mpn/x86_64/core2/aorslsh1_n.asm
@@ -1,45 +1,29 @@
-dnl  AMD64 mpn_sublshC_n -- rp[] = up[] - (vp[] << 1), optimised for Core 2 and
-dnl  Core iN.
+dnl  x86-64 mpn_addlsh1_n and mpn_sublsh1_n, optimized for "Core" 2.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 4.25
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 3
-C Intel NHM	 3.1
-C Intel SBR	 2.47
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 4.25
+C K10:		 ?
+C P4:		 ?
+C P6-15:	 3
 
 C INPUT PARAMETERS
 define(`rp',`%rdi')
@@ -47,11 +31,21 @@ define(`up',`%rsi')
 define(`vp',`%rdx')
 define(`n', `%rcx')
 
+ifdef(`OPERATION_addlsh1_n', `
+	define(ADDSUB,	add)
+	define(ADCSBB,	adc)
+	define(func,	mpn_addlsh1_n)')
+ifdef(`OPERATION_sublsh1_n', `
+	define(ADDSUB,	sub)
+	define(ADCSBB,	sbb)
+	define(func,	mpn_sublsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
 ASM_START()
 	TEXT
 	ALIGN(8)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	push	%rbx
 	push	%r12
 
@@ -64,7 +58,7 @@ PROLOGUE(func)
 	xor	R32(%r11), R32(%r11)
 
 	mov	-24(vp,n,8), %r8	C do first limb early
-	shrd	$RSH, %r8, %r11
+	shrd	$63, %r8, %r11
 
 	and	$3, R32(%rax)
 	je	L(b0)
@@ -73,9 +67,9 @@ PROLOGUE(func)
 	je	L(b2)
 
 L(b3):	mov	-16(vp,n,8), %r9
-	shrd	$RSH, %r9, %r8
+	shrd	$63, %r9, %r8
 	mov	-8(vp,n,8), %r10
-	shrd	$RSH, %r10, %r9
+	shrd	$63, %r10, %r9
 	mov	-24(up,n,8), %r12
 	ADDSUB	%r11, %r12
 	mov	%r12, -24(rp,n,8)
@@ -101,7 +95,7 @@ L(b1):	mov	-24(up,n,8), %r12
 	jmp	L(end)
 
 L(b2):	mov	-16(vp,n,8), %r9
-	shrd	$RSH, %r9, %r8
+	shrd	$63, %r9, %r8
 	mov	-24(up,n,8), %r12
 	ADDSUB	%r11, %r12
 	mov	%r12, -24(rp,n,8)
@@ -116,13 +110,13 @@ L(b2):	mov	-16(vp,n,8), %r9
 
 	ALIGN(16)
 L(top):	mov	-24(vp,n,8), %r8
-	shrd	$RSH, %r8, %r11
+	shrd	$63, %r8, %r11
 L(b0):	mov	-16(vp,n,8), %r9
-	shrd	$RSH, %r9, %r8
+	shrd	$63, %r9, %r8
 	mov	-8(vp,n,8), %r10
-	shrd	$RSH, %r10, %r9
+	shrd	$63, %r10, %r9
 	mov	(vp,n,8), %rbx
-	shrd	$RSH, %rbx, %r10
+	shrd	$63, %rbx, %r10
 
 	add	R32(%rax), R32(%rax)	C restore cy
 
@@ -148,11 +142,10 @@ L(b0):	mov	-16(vp,n,8), %r9
 	add	$4, n
 	js	L(top)
 
-L(end):	shr	$RSH, %r11
+L(end):	add	%r11, %r11
 	pop	%r12
 	pop	%rbx
-	sub	R32(%r11), R32(%rax)
+	sbb	$0, R32(%rax)
 	neg	R32(%rax)
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/aorsmul_1.asm b/gmp/mpn/x86_64/core2/aorsmul_1.asm
index 6b313dd836..1d05b30b59 100644
--- a/gmp/mpn/x86_64/core2/aorsmul_1.asm
+++ b/gmp/mpn/x86_64/core2/aorsmul_1.asm
@@ -1,46 +1,29 @@
 dnl  x86-64 mpn_addmul_1 and mpn_submul_1, optimized for "Core 2".
 
-dnl  Copyright 2003-2005, 2007-2009, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 4
-C AMD K10	 4
-C AMD bd1	 5.1
-C AMD bobcat
-C Intel P4	 ?
-C Intel core2	 4.3-4.5 (fluctuating)
-C Intel NHM	 5.0
-C Intel SBR	 4.1
-C Intel atom	 ?
-C VIA nano	 5.25
+C K8,K9:	 4
+C K10:		 4
+C P4:		 ?
+C P6-15:	 4.3-4.7 (fluctuating)
 
 C INPUT PARAMETERS
 define(`rp',	`%rdi')
@@ -50,129 +33,111 @@ define(`v0',	`%rcx')
 
 ifdef(`OPERATION_addmul_1',`
       define(`ADDSUB',        `add')
-      define(`func',     `mpn_addmul_1')
-      define(`func_1c',  `mpn_addmul_1c')
+      define(`func',  `mpn_addmul_1')
 ')
 ifdef(`OPERATION_submul_1',`
       define(`ADDSUB',        `sub')
-      define(`func',     `mpn_submul_1')
-      define(`func_1c',  `mpn_submul_1c')
+      define(`func',  `mpn_submul_1')
 ')
 
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-	C For DOS, on the stack we have four saved registers, return address,
-	C space for four register arguments, and finally the carry input.
-
-IFDOS(` define(`carry_in', `72(%rsp)')') dnl
-IFSTD(` define(`carry_in', `%r8')') dnl
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
 
 ASM_START()
 	TEXT
 	ALIGN(16)
-PROLOGUE(func_1c)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	lea	(%rdx), %rbx
-	neg	%rbx
-
-	mov	(up), %rax
-	mov	(rp), %r10
-
-	lea	-16(rp,%rdx,8), rp
-	lea	(up,%rdx,8), up
-	mul	%rcx
-	add	carry_in, %rax
-	adc	$0, %rdx
-	jmp	L(start_nc)
-EPILOGUE()
-
-	ALIGN(16)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	lea	(%rdx), %rbx
-	neg	%rbx
+	push	%r15
+	push	%r12
+	push	%r13
+	lea	(%rdx), %r15
+	neg	%r15
 
 	mov	(up), %rax
-	mov	(rp), %r10
 
-	lea	-16(rp,%rdx,8), rp
+	bt	$0, %r15
+	jc	L(odd)
+
+	lea	(rp,%rdx,8), rp
 	lea	(up,%rdx,8), up
 	mul	%rcx
 
-L(start_nc):
-	bt	$0, R32(%rbx)
-	jc	L(odd)
-
 	lea	(%rax), %r11
-	mov	8(up,%rbx,8), %rax
-	lea	(%rdx), %rbp
-	mul	%rcx
-	add	$2, %rbx
+	mov	8(up,%r15,8), %rax
+	mov	(rp,%r15,8), %r13
+	lea	(%rdx), %r12
+
+	add	$2, %r15
 	jns	L(n2)
 
+	mul	%rcx
 	lea	(%rax), %r8
-	mov	(up,%rbx,8), %rax
+	mov	(up,%r15,8), %rax
+	mov	-8(rp,%r15,8), %r10
 	lea	(%rdx), %r9
-	jmp	L(mid)
+	jmp	L(m)
 
-L(odd):	add	$1, %rbx
+L(odd):	lea	(rp,%rdx,8), rp
+	lea	(up,%rdx,8), up
+	mul	%rcx
+	add	$1, %r15
 	jns	L(n1)
 
-	lea	(%rax), %r8
-	mov	(up,%rbx,8), %rax
+L(gt1):	lea	(%rax), %r8
+	mov	(up,%r15,8), %rax
+	mov	-8(rp,%r15,8), %r10
 	lea	(%rdx), %r9
 	mul	%rcx
 	lea	(%rax), %r11
-	mov	8(up,%rbx,8), %rax
-	lea	(%rdx), %rbp
-	jmp	L(e)
+	mov	8(up,%r15,8), %rax
+	mov	(rp,%r15,8), %r13
+	lea	(%rdx), %r12
+	add	$2, %r15
+	jns	L(end)
 
 	ALIGN(16)
 L(top):	mul	%rcx
 	ADDSUB	%r8, %r10
 	lea	(%rax), %r8
-	mov	(up,%rbx,8), %rax
+	mov	0(up,%r15,8), %rax
 	adc	%r9, %r11
-	mov	%r10, -8(rp,%rbx,8)
-	mov	(rp,%rbx,8), %r10
+	mov	%r10, -24(rp,%r15,8)
+	mov	-8(rp,%r15,8), %r10
 	lea	(%rdx), %r9
-	adc	$0, %rbp
-L(mid):	mul	%rcx
-	ADDSUB	%r11, %r10
+	adc	$0, %r12
+L(m):	mul	%rcx
+	ADDSUB	%r11, %r13
 	lea	(%rax), %r11
-	mov	8(up,%rbx,8), %rax
-	adc	%rbp, %r8
-	mov	%r10, (rp,%rbx,8)
-	mov	8(rp,%rbx,8), %r10
-	lea	(%rdx), %rbp
+	mov	8(up,%r15,8), %rax
+	adc	%r12, %r8
+	mov	%r13, -16(rp,%r15,8)
+	mov	0(rp,%r15,8), %r13
+	lea	(%rdx), %r12
 	adc	$0, %r9
-L(e):	add	$2, %rbx
+
+	add	$2, %r15
 	js	L(top)
 
-	mul	%rcx
+L(end):	mul	%rcx
 	ADDSUB	%r8, %r10
 	adc	%r9, %r11
-	mov	%r10, -8(rp)
-	adc	$0, %rbp
-L(n2):	mov	(rp), %r10
-	ADDSUB	%r11, %r10
-	adc	%rbp, %rax
-	mov	%r10, (rp)
+	mov	%r10, -24(rp,%r15,8)
+	mov	-8(rp,%r15,8), %r10
+	adc	$0, %r12
+L(r):	ADDSUB	%r11, %r13
+	adc	%r12, %rax
+	mov	%r13, -16(rp,%r15,8)
 	adc	$0, %rdx
-L(n1):	mov	8(rp), %r10
-	ADDSUB	%rax, %r10
-	mov	%r10, 8(rp)
-	mov	R32(%rbx), R32(%rax)	C zero rax
+L(x):	ADDSUB	%rax, %r10
+	mov	%r10, -8(rp,%r15,8)
+	mov	$0, %eax
 	adc	%rdx, %rax
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
+L(ret):	pop	%r13
+	pop	%r12
+	pop	%r15
 	ret
+L(n2):	mul	%rcx
+	mov	-8(rp,%r15,8), %r10
+	jmp	L(r)
+L(n1):	mov	-8(rp,%r15,8), %r10
+	jmp	L(x)
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/copyd.asm b/gmp/mpn/x86_64/core2/copyd.asm
deleted file mode 100644
index f0dc54a55e..0000000000
--- a/gmp/mpn/x86_64/core2/copyd.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyd optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyd)
-include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/gmp/mpn/x86_64/core2/copyi.asm b/gmp/mpn/x86_64/core2/copyi.asm
deleted file mode 100644
index 9c26e00c52..0000000000
--- a/gmp/mpn/x86_64/core2/copyi.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyi optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyi)
-include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/gmp/mpn/x86_64/core2/divrem_1.asm b/gmp/mpn/x86_64/core2/divrem_1.asm
deleted file mode 100644
index 623bea386c..0000000000
--- a/gmp/mpn/x86_64/core2/divrem_1.asm
+++ /dev/null
@@ -1,237 +0,0 @@
-dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
-
-dnl  Copyright 2004, 2005, 2007-2010, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		norm	unorm	frac
-C AMD K8,K9	15	15	12
-C AMD K10	15	15	12
-C Intel P4	44	44	43
-C Intel core2	24	24	19.5
-C Intel corei	19	19	18
-C Intel atom	51	51	36
-C VIA nano	46	44	22.5
-
-C mp_limb_t
-C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
-C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
-
-C mp_limb_t
-C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
-C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
-C                      mp_limb_t dinv, int cnt)
-
-C INPUT PARAMETERS
-define(`qp',		`%rdi')
-define(`fn_param',	`%rsi')
-define(`up_param',	`%rdx')
-define(`un_param',	`%rcx')
-define(`d',		`%r8')
-define(`dinv',		`%r9')		C only for mpn_preinv_divrem_1
-C       shift passed on stack		C only for mpn_preinv_divrem_1
-
-define(`cnt',		`%rcx')
-define(`up',		`%rsi')
-define(`fn',		`%r12')
-define(`un',		`%rbx')
-
-
-C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
-C         cnt         qp      d  dinv
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFSTD(`define(`CNTOFF',		`40($1)')')
-IFDOS(`define(`CNTOFF',		`104($1)')')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_preinv_divrem_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-IFDOS(`	mov	64(%rsp), %r9	')
-	xor	R32(%rax), R32(%rax)
-	push	%r13
-	push	%r12
-	push	%rbp
-	push	%rbx
-
-	mov	fn_param, fn
-	mov	un_param, un
-	add	fn_param, un_param
-	mov	up_param, up
-
-	lea	-8(qp,un_param,8), qp
-
-	mov	CNTOFF(%rsp), R8(cnt)
-	shl	R8(cnt), d
-	jmp	L(ent)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_divrem_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	xor	R32(%rax), R32(%rax)
-	push	%r13
-	push	%r12
-	push	%rbp
-	push	%rbx
-
-	mov	fn_param, fn
-	mov	un_param, un
-	add	fn_param, un_param
-	mov	up_param, up
-	je	L(ret)
-
-	lea	-8(qp,un_param,8), qp
-	xor	R32(%rbp), R32(%rbp)
-
-L(unnormalized):
-	test	un, un
-	je	L(44)
-	mov	-8(up,un,8), %rax
-	cmp	d, %rax
-	jae	L(44)
-	mov	%rbp, (qp)
-	mov	%rax, %rbp
-	lea	-8(qp), qp
-	je	L(ret)
-	dec	un
-L(44):
-	bsr	d, %rcx
-	not	R32(%rcx)
-	sal	R8(%rcx), d
-	sal	R8(%rcx), %rbp
-
-	push	%rcx
-IFSTD(`	push	%rdi		')
-IFSTD(`	push	%rsi		')
-	push	%r8
-IFSTD(`	mov	d, %rdi		')
-IFDOS(`	mov	d, %rcx		')
-	CALL(	mpn_invert_limb)
-	pop	%r8
-IFSTD(`	pop	%rsi		')
-IFSTD(`	pop	%rdi		')
-	pop	%rcx
-
-	mov	%rax, dinv
-	mov	%rbp, %rax
-	test	un, un
-	je	L(frac)
-L(ent):	mov	-8(up,un,8), %rbp
-	shr	R8(%rcx), %rax
-	shld	R8(%rcx), %rbp, %rax
-	sub	$2, un
-	js	L(end)
-
-	ALIGN(16)
-L(top):	lea	1(%rax), %r11
-	mul	dinv
-	mov	(up,un,8), %r10
-	shld	R8(%rcx), %r10, %rbp
-	mov	%rbp, %r13
-	add	%rax, %r13
-	adc	%r11, %rdx
-	mov	%rdx, %r11
-	imul	d, %rdx
-	sub	%rdx, %rbp
-	lea	(d,%rbp), %rax
-	sub	$8, qp
-	cmp	%r13, %rbp
-	cmovc	%rbp, %rax
-	adc	$-1, %r11
-	cmp	d, %rax
-	jae	L(ufx)
-L(uok):	dec	un
-	mov	%r11, 8(qp)
-	mov	%r10, %rbp
-	jns	L(top)
-
-L(end):	lea	1(%rax), %r11
-	sal	R8(%rcx), %rbp
-	mul	dinv
-	add	%rbp, %rax
-	adc	%r11, %rdx
-	mov	%rax, %r11
-	mov	%rdx, %r13
-	imul	d, %rdx
-	sub	%rdx, %rbp
-	mov	d, %rax
-	add	%rbp, %rax
-	cmp	%r11, %rbp
-	cmovc	%rbp, %rax
-	adc	$-1, %r13
-	cmp	d, %rax
-	jae	L(efx)
-L(eok):	mov	%r13, (qp)
-	sub	$8, qp
-	jmp	L(frac)
-
-L(ufx):	sub	d, %rax
-	inc	%r11
-	jmp	L(uok)
-L(efx):	sub	d, %rax
-	inc	%r13
-	jmp	L(eok)
-
-L(frac):mov	d, %rbp
-	neg	%rbp
-	jmp	L(fent)
-
-	ALIGN(16)			C	    K8-K10  P6-CNR P6-NHM  P4
-L(ftop):mul	dinv			C	      0,12   0,17   0,17
-	add	%r11, %rdx		C	      5      8     10
-	mov	%rax, %r11		C	      4      8      3
-	mov	%rdx, %r13		C	      6      9     11
-	imul	%rbp, %rdx		C	      6      9     11
-	mov	d, %rax			C
-	add	%rdx, %rax		C	     10     14     14
-	cmp	%r11, %rdx		C	     10     14     14
-	cmovc	%rdx, %rax		C	     11     15     15
-	adc	$-1, %r13		C
-	mov	%r13, (qp)		C
-	sub	$8, qp			C
-L(fent):lea	1(%rax), %r11		C
-	dec	fn			C
-	jns	L(ftop)			C
-
-	shr	R8(%rcx), %rax
-L(ret):	pop	%rbx
-	pop	%rbp
-	pop	%r12
-	pop	%r13
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/gcd_1.asm b/gmp/mpn/x86_64/core2/gcd_1.asm
deleted file mode 100644
index e0cab9b4e4..0000000000
--- a/gmp/mpn/x86_64/core2/gcd_1.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl  AMD64 mpn_gcd_1 optimised for Intel C2, NHM, SBR and AMD K10, BD.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/bit (approx)
-C AMD K8,K9	 8.50
-C AMD K10	 4.30
-C AMD bd1	 5.00
-C AMD bobcat	10.0
-C Intel P4	18.6
-C Intel core2	 3.83
-C Intel NHM	 5.17
-C Intel SBR	 4.69
-C Intel atom	17.0
-C VIA nano	 5.44
-C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
-
-C TODO
-C  * Optimise inner-loop for specific CPUs.
-C  * Use DIV for 1-by-1 reductions, at least for some CPUs.
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 6)
-
-C INPUT PARAMETERS
-define(`up',    `%rdi')
-define(`n',     `%rsi')
-define(`v0',    `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFDOS(`define(`STACK_ALLOC', 40)')
-IFSTD(`define(`STACK_ALLOC', 8)')
-
-C Undo some configure cleverness.
-C The problem is that C only defines the '1c' variant, and that configure
-C therefore considers modexact_1c to be the base function.  It then adds a
-C special fat rule for mpn_modexact_1_odd, messing up things when a cpudep
-C gcd_1 exists without a corresponding cpudep mode1o.
-ifdef(`WANT_FAT_BINARY', `
-  define(`mpn_modexact_1_odd', `MPN_PREFIX`modexact_1_odd_x86_64'')')
-
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_gcd_1)
-	FUNC_ENTRY(3)
-	mov	(up), %rax	C U low limb
-	or	v0, %rax
-	bsf	%rax, %rax	C min(ctz(u0),ctz(v0))
-
-	bsf	v0, %rcx
-	shr	R8(%rcx), v0
-
-	push	%rax		C preserve common twos over call
-	push	v0		C preserve v0 argument over call
-	sub	$STACK_ALLOC, %rsp	C maintain ABI required rsp alignment
-
-	cmp	$1, n
-	jnz	L(reduce_nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	mov	(up), %r8
-	mov	%r8, %rax
-	shr	$BMOD_THRES_LOG2, %r8
-	cmp	%r8, v0
-	ja	L(reduced)
-	jmp	L(bmod)
-
-L(reduce_nby1):
-	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
-	jl	L(bmod)
-IFDOS(`	mov	%rdx, %r8	')
-IFDOS(`	mov	%rsi, %rdx	')
-IFDOS(`	mov	%rdi, %rcx	')
-	CALL(	mpn_mod_1)
-	jmp	L(reduced)
-L(bmod):
-IFDOS(`	mov	%rdx, %r8	')
-IFDOS(`	mov	%rsi, %rdx	')
-IFDOS(`	mov	%rdi, %rcx	')
-	CALL(	mpn_modexact_1_odd)
-L(reduced):
-
-	add	$STACK_ALLOC, %rsp
-	pop	%rdx
-
-	bsf	%rax, %rcx
-C	test	%rax, %rax	C FIXME: does this lower latency?
-	jnz	L(mid)
-	jmp	L(end)
-
-	ALIGN(16)		C               K10   BD    C2    NHM   SBR
-L(top):	cmovc	%r10, %rax	C if x-y < 0    0,3   0,3   0,6   0,5   0,5
-	cmovc	%r9, %rdx	C use x,y-x     0,3   0,3   2,8   1,7   1,7
-L(mid):	shr	R8(%rcx), %rax	C               1,7   1,6   2,8   2,8   2,8
-	mov	%rdx, %r10	C               1     1     4     3     3
-	sub	%rax, %r10	C               2     2     5     4     4
-	bsf	%r10, %rcx	C               3     3     6     5     5
-	mov	%rax, %r9	C               2     2     3     3     4
-	sub	%rdx, %rax	C               2     2     4     3     4
-	jnz	L(top)		C
-
-L(end):	pop	%rcx
-	mov	%rdx, %rax
-	shl	R8(%rcx), %rax
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/gmp-mparam.h b/gmp/mpn/x86_64/core2/gmp-mparam.h
index 0f4f88f780..8207da4895 100644
--- a/gmp/mpn/x86_64/core2/gmp-mparam.h
+++ b/gmp/mpn/x86_64/core2/gmp-mparam.h
@@ -1,217 +1,78 @@
-/* Core 2 gmp-mparam.h -- Compiler/machine parameter header file.
+/* "Core 2" gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
-or both in parallel, as here.
+/* 2133 MHz "Core 2" / 65nm / 4096 Kibyte cache / socket 775 */
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 2133 MHz Core 2 (65nm) */
-/* FFT tuning limit = 60000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        16
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      9
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           24
-
-#define MUL_TOOM22_THRESHOLD                23
-#define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD               179
-#define MUL_TOOM6H_THRESHOLD               268
-#define MUL_TOOM8H_THRESHOLD               357
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      78
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     100
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                102
-#define SQR_TOOM4_THRESHOLD                160
-#define SQR_TOOM6_THRESHOLD                222
-#define SQR_TOOM8_THRESHOLD                296
-
-#define MULMID_TOOM42_THRESHOLD             28
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               13
-
-#define MUL_FFT_MODF_THRESHOLD             372  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    372, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     32, 8}, \
-    {     17, 7}, {     36, 8}, {     19, 7}, {     40, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     43,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    135, 9}, {    271,11}, \
-    {     79,10}, {    159, 9}, {    319,10}, {    167,11}, \
-    {     95,10}, {    191, 9}, {    383,10}, {    207,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    271,11}, \
-    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,10}, {    319,12}, {     95,11}, {    191,10}, \
-    {    383,11}, {    207,10}, {    415,11}, {    223,13}, \
-    {     63,12}, {    127,11}, {    271,10}, {    543,11}, \
-    {    287,10}, {    575,11}, {    303,10}, {    607,12}, \
-    {    159,11}, {    319,10}, {    639,11}, {    351,12}, \
-    {    191,11}, {    415,12}, {    223,11}, {    479,13}, \
-    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
-    {    607,12}, {    319,11}, {    639,12}, {    351,11}, \
-    {    703,13}, {    191,12}, {    415,11}, {    831,12}, \
-    {    479,14}, {    127,13}, {    255,12}, {    607,13}, \
-    {    319,12}, {    703,13}, {    383,12}, {    831,13}, \
-    {    447,12}, {    959,14}, {    255,13}, {    511,12}, \
-    {   1023,13}, {    575,12}, {   1215,13}, {    639,12}, \
-    {   1279,13}, {    703,14}, {    383,13}, {    831,12}, \
-    {   1663,13}, {    895,15}, {    255,14}, {    511,13}, \
-    {   1151,14}, {    639,13}, {   1343,14}, {    767,13}, \
-    {   1599,14}, {    895,15}, {    511,14}, {   1279,13}, \
-    {   2687,14}, {   1407,13}, {   2815,15}, {    767,14}, \
-    {   1535,13}, {   3199,14}, {   1663,13}, {   3455,16}, \
-    {    511,15}, {   1023,14}, {   2047,13}, {   4095,14}, \
-    {   2175,12}, {   8959,14}, {   2303,13}, {   4607,12}, \
-    {   9471,14}, {   2431,13}, {   4863,12}, {   9983,15}, \
-    {   1279,14}, {   2559,12}, {  10239,14}, {   2687,12}, \
-    {  11775,15}, {   1535,14}, {   3327,13}, {   6655,14}, \
-    {   3455,13}, {   6911,14}, {   3583,12}, {  14335,11}, \
-    {  28671,10}, {  57343,11}, {   2048,12}, {   4096,13}, \
-    {   8192,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 183
-#define MUL_FFT_THRESHOLD                 4736
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    340, 5}, {     15, 6}, {      8, 5}, {     17, 6}, \
-    {      9, 5}, {     19, 6}, {     23, 7}, {     12, 6}, \
-    {     25, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     43,10}, \
-    {     23, 9}, {     55,10}, {     31, 9}, {     67,10}, \
-    {     39, 9}, {     79,10}, {     47,11}, {     31,10}, \
-    {     79,11}, {     47,10}, {     95,12}, {     31,11}, \
-    {     63,10}, {    127, 9}, {    255, 8}, {    511, 9}, \
-    {    271, 8}, {    543,11}, {     79, 9}, {    319, 8}, \
-    {    639,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    207, 9}, {    415,12}, {     63,11}, {    127,10}, \
-    {    271, 9}, {    543,10}, {    287, 9}, {    575,10}, \
-    {    303, 9}, {    607,10}, {    319, 9}, {    639,11}, \
-    {    175,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,11}, \
-    {    303,10}, {    607,11}, {    319,10}, {    639,11}, \
-    {    351,12}, {    191,11}, {    415,10}, {    831,12}, \
-    {    223,11}, {    447,10}, {    895,11}, {    479,13}, \
-    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
-    {    607,12}, {    319,11}, {    639,12}, {    351,13}, \
-    {    191,12}, {    415,11}, {    831,12}, {    479,14}, \
-    {    127,13}, {    255,12}, {    607,13}, {    319,12}, \
-    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1023,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1279,13}, \
-    {    703,14}, {    383,13}, {    831,12}, {   1663,13}, \
-    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1215,14}, {    639,13}, {   1343,12}, \
-    {   2687,13}, {   1407,12}, {   2815,14}, {    767,13}, \
-    {   1663,14}, {    895,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2303,12}, {   4607,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
-    {   1407,15}, {    767,14}, {   1535,13}, {   3071,14}, \
-    {   1663,13}, {   3455,12}, {   6911,14}, {   1791,13}, \
-    {   3583,16}, {    511,15}, {   1023,14}, {   2175,13}, \
-    {   4351,14}, {   2303,13}, {   4607,14}, {   2431,13}, \
-    {   4863,15}, {   1279,14}, {   2815,13}, {   5631,14}, \
-    {   2943,13}, {   5887,15}, {   1535,14}, {   3455,13}, \
-    {   6911,14}, {  16384,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 179
-#define SQR_FFT_THRESHOLD                 3008
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  62
-#define MULLO_MUL_N_THRESHOLD             9174
-
-#define DC_DIV_QR_THRESHOLD                 46
-#define DC_DIVAPPR_Q_THRESHOLD             155
-#define DC_BDIV_QR_THRESHOLD                50
-#define DC_BDIV_Q_THRESHOLD                 94
-
-#define INV_MULMOD_BNM1_THRESHOLD           48
-#define INV_NEWTON_THRESHOLD               156
-#define INV_APPR_THRESHOLD                 155
-
-#define BINV_NEWTON_THRESHOLD              234
-#define REDC_1_TO_REDC_2_THRESHOLD          22
-#define REDC_2_TO_REDC_N_THRESHOLD          48
-
-#define MU_DIV_QR_THRESHOLD               1187
-#define MU_DIVAPPR_Q_THRESHOLD            1142
-#define MUPI_DIV_QR_THRESHOLD               74
-#define MU_BDIV_QR_THRESHOLD              1017
-#define MU_BDIV_Q_THRESHOLD               1187
-
-#define POWM_SEC_TABLE  1,64,131,269,466
-
-#define MATRIX22_STRASSEN_THRESHOLD         19
-#define HGCD_THRESHOLD                     117
-#define HGCD_APPR_THRESHOLD                151
-#define HGCD_REDUCE_THRESHOLD             2121
-#define GCD_DC_THRESHOLD                   427
-#define GCDEXT_DC_THRESHOLD                342
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                11
-#define GET_STR_PRECOMPUTE_THRESHOLD        18
-#define SET_STR_DC_THRESHOLD               552
-#define SET_STR_PRECOMPUTE_THRESHOLD      1561
-
-#define FAC_DSC_THRESHOLD                  656
-#define FAC_ODD_THRESHOLD                   23
+/* Generated by tuneup.c, 2009-01-14, gcc 4.2 */
+
+#define MUL_KARATSUBA_THRESHOLD          18
+#define MUL_TOOM3_THRESHOLD              65
+#define MUL_TOOM44_THRESHOLD            166
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          32
+#define SQR_TOOM3_THRESHOLD              97
+#define SQR_TOOM4_THRESHOLD             163
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              20
+#define MULLOW_MUL_N_THRESHOLD          232
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 60
+#define POWM_THRESHOLD                   77
+
+#define MATRIX22_STRASSEN_THRESHOLD      25
+#define HGCD_THRESHOLD                  140
+#define GCD_DC_THRESHOLD                691
+#define GCDEXT_DC_THRESHOLD             760
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 3
+#define MOD_1_2_THRESHOLD                 5
+#define MOD_1_4_THRESHOLD                20
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             10
+#define GET_STR_PRECOMPUTE_THRESHOLD     16
+#define SET_STR_DC_THRESHOLD            668
+#define SET_STR_PRECOMPUTE_THRESHOLD   2052
+
+#define MUL_FFT_TABLE  { 336, 672, 1600, 2816, 7168, 20480, 81920, 327680, 786432, 0 }
+#define MUL_FFT_MODF_THRESHOLD          352
+#define MUL_FFT_THRESHOLD              3456
+
+#define SQR_FFT_TABLE  { 336, 736, 1728, 3328, 7168, 20480, 81920, 327680, 0 }
+#define SQR_FFT_MODF_THRESHOLD          352
+#define SQR_FFT_THRESHOLD              2432
+
+/* Generated 2009-01-12, gcc 4.2 */
+
+#define MUL_FFT_TABLE2 {{1,4}, {273,5}, {545,6}, {1217,7}, {3201,8}, {6913,9}, {7681,8}, {8449,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {39937,9}, {42497,10}, {56321,11}, {63489,10}, {81409,11}, {92161,10}, {93185,11}, {96257,12}, {126977,11}, {131073,10}, {138241,11}, {167937,10}, {169473,11}, {169985,10}, {172033,11}, {195585,9}, {196097,11}, {198657,10}, {208897,11}, {217089,12}, {258049,11}, {261121,9}, {262657,10}, {275457,11}, {302081,10}, {307201,11}, {331777,12}, {389121,11}, {425985,13}, {516097,12}, {520193,11}, {598017,12}, {610305,11}, {614401,12}, {651265,11}, {653313,10}, {654337,11}, {673793,10}, {674817,11}, {677889,10}, {679937,11}, {718849,10}, {719873,12}, {782337,11}, {850945,12}, {913409,11}, {925697,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1244161,12}, {1306625,11}, {1310721,12}, {1327105,11}, {1347585,12}, {1355777,11}, {1366017,12}, {1439745,13}, {1564673,12}, {1835009,14}, {1900545,12}, {1904641,14}, {2080769,13}, {2088961,12}, {2488321,13}, {2613249,12}, {2879489,13}, {2932737,12}, {2940929,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4978689,13}, {5234689,12}, {5500929,13}, {5758977,14}, {6275073,13}, {7856129,15}, {8355841,14}, {8372225,13}, {9957377,14}, {MP_SIZE_T_MAX, 0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {241,5}, {545,6}, {1345,7}, {3201,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {10497,9}, {13825,10}, {15361,9}, {19969,10}, {23553,9}, {28161,11}, {30721,10}, {31745,9}, {34305,10}, {55297,11}, {63489,10}, {80897,11}, {94209,10}, {97281,12}, {126977,11}, {129025,9}, {130049,10}, {138753,11}, {162817,9}, {164353,11}, {170497,10}, {178177,11}, {183297,10}, {184321,11}, {194561,10}, {208897,12}, {219137,11}, {221185,12}, {258049,11}, {261121,9}, {261633,10}, {267777,9}, {268289,11}, {270337,10}, {274945,9}, {276481,10}, {278529,11}, {292865,9}, {293377,10}, {295937,9}, {296449,10}, {306177,9}, {309249,10}, {310273,11}, {328705,12}, {331777,11}, {335873,12}, {344065,11}, {346113,12}, {352257,11}, {356353,12}, {389121,11}, {395265,10}, {398337,11}, {419841,10}, {421889,11}, {423937,13}, {516097,12}, {520193,11}, {546817,10}, {550913,11}, {561153,10}, {563201,11}, {579585,10}, {585729,11}, {621569,12}, {636929,11}, {638977,12}, {651265,11}, {714753,10}, {716801,11}, {718849,12}, {782337,11}, {849921,12}, {913409,11}, {954369,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,13}, {1564673,12}, {1961985,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {2879489,13}, {3137537,12}, {3272705,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4976641,13}, {5234689,12}, {5320705,13}, {5324801,12}, {5447681,13}, {5455873,12}, {5500929,13}, {5758977,14}, {6275073,13}, {6283265,12}, {6549505,13}, {7856129,15}, {8355841,14}, {8372225,13}, {9953281,14}, {MP_SIZE_T_MAX, 0}}
diff --git a/gmp/mpn/x86_64/core2/lshift.asm b/gmp/mpn/x86_64/core2/lshift.asm
index 8ccafeca6c..60518901eb 100644
--- a/gmp/mpn/x86_64/core2/lshift.asm
+++ b/gmp/mpn/x86_64/core2/lshift.asm
@@ -1,83 +1,64 @@
 dnl  x86-64 mpn_lshift optimized for "Core 2".
 
-dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2007 Free Software Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 4.25
-C AMD K10	 4.25
-C Intel P4	14.7
-C Intel core2	 1.27
-C Intel NHM	 1.375	(up to about n = 260, then 1.5)
-C Intel SBR	 1.87
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 4.25
+C K10:		 4.25
+C P4:		14.7
+C P6-15:	 1.27
 
 
 C INPUT PARAMETERS
 define(`rp',	`%rdi')
 define(`up',	`%rsi')
 define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`cnt',	`%cl')
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_lshift)
-	FUNC_ENTRY(4)
 	lea	-8(rp,n,8), rp
 	lea	-8(up,n,8), up
 
-	mov	R32(%rdx), R32(%rax)
-	and	$3, R32(%rax)
+	mov	%edx, %eax
+	and	$3, %eax
 	jne	L(nb00)
 L(b00):	C n = 4, 8, 12, ...
 	mov	(up), %r10
 	mov	-8(up), %r11
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r10, %rax
+	xor	%eax, %eax
+	shld	%cl, %r10, %rax
 	mov	-16(up), %r8
 	lea	24(rp), rp
 	sub	$4, n
 	jmp	L(00)
 
 L(nb00):C n = 1, 5, 9, ...
-	cmp	$2, R32(%rax)
+	cmp	$2, %eax
 	jae	L(nb01)
 L(b01):	mov	(up), %r9
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r9, %rax
+	xor	%eax, %eax
+	shld	%cl, %r9, %rax
 	sub	$2, n
 	jb	L(le1)
 	mov	-8(up), %r10
@@ -85,65 +66,62 @@ L(b01):	mov	(up), %r9
 	lea	-8(up), up
 	lea	16(rp), rp
 	jmp	L(01)
-L(le1):	shl	R8(cnt), %r9
+L(le1):	shl	%cl, %r9
 	mov	%r9, (rp)
-	FUNC_EXIT()
 	ret
 
 L(nb01):C n = 2, 6, 10, ...
 	jne	L(b11)
 L(b10):	mov	(up), %r8
 	mov	-8(up), %r9
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r8, %rax
+	xor	%eax, %eax
+	shld	%cl, %r8, %rax
 	sub	$3, n
 	jb	L(le2)
 	mov	-16(up), %r10
 	lea	-16(up), up
 	lea	8(rp), rp
 	jmp	L(10)
-L(le2):	shld	R8(cnt), %r9, %r8
+L(le2):	shld	%cl, %r9, %r8
 	mov	%r8, (rp)
-	shl	R8(cnt), %r9
+	shl	%cl, %r9
 	mov	%r9, -8(rp)
-	FUNC_EXIT()
 	ret
 
 	ALIGN(16)			C performance critical!
 L(b11):	C n = 3, 7, 11, ...
 	mov	(up), %r11
 	mov	-8(up), %r8
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r11, %rax
+	xor	%eax, %eax
+	shld	%cl, %r11, %rax
 	mov	-16(up), %r9
 	lea	-24(up), up
 	sub	$4, n
 	jb	L(end)
 
 	ALIGN(16)
-L(top):	shld	R8(cnt), %r8, %r11
+L(top):	shld	%cl, %r8, %r11
 	mov	(up), %r10
 	mov	%r11, (rp)
-L(10):	shld	R8(cnt), %r9, %r8
+L(10):	shld	%cl, %r9, %r8
 	mov	-8(up), %r11
 	mov	%r8, -8(rp)
-L(01):	shld	R8(cnt), %r10, %r9
+L(01):	shld	%cl, %r10, %r9
 	mov	-16(up), %r8
 	mov	%r9, -16(rp)
-L(00):	shld	R8(cnt), %r11, %r10
+L(00):	shld	%cl, %r11, %r10
 	mov	-24(up), %r9
+	lea	-32(up), up
 	mov	%r10, -24(rp)
-	add	$-32, up
 	lea	-32(rp), rp
 	sub	$4, n
 	jnc	L(top)
 
-L(end):	shld	R8(cnt), %r8, %r11
+L(end):	shld	%cl, %r8, %r11
 	mov	%r11, (rp)
-	shld	R8(cnt), %r9, %r8
+	shld	%cl, %r9, %r8
 	mov	%r8, -8(rp)
-	shl	R8(cnt), %r9
+	shl	%cl, %r9
 	mov	%r9, -16(rp)
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/lshiftc.asm b/gmp/mpn/x86_64/core2/lshiftc.asm
deleted file mode 100644
index 65c7b2f1b8..0000000000
--- a/gmp/mpn/x86_64/core2/lshiftc.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl  x86-64 mpn_lshiftc optimized for "Core 2".
-
-dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 1.5
-C Intel NHM	 2.25	(up to about n = 260, then 1.875)
-C Intel SBR	 2.25
-C Intel atom	 ?
-C VIA nano	 ?
-
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_lshiftc)
-	FUNC_ENTRY(4)
-	lea	-8(rp,n,8), rp
-	lea	-8(up,n,8), up
-
-	mov	R32(%rdx), R32(%rax)
-	and	$3, R32(%rax)
-	jne	L(nb00)
-L(b00):	C n = 4, 8, 12, ...
-	mov	(up), %r10
-	mov	-8(up), %r11
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r10, %rax
-	mov	-16(up), %r8
-	lea	24(rp), rp
-	sub	$4, n
-	jmp	L(00)
-
-L(nb00):C n = 1, 5, 9, ...
-	cmp	$2, R32(%rax)
-	jae	L(nb01)
-L(b01):	mov	(up), %r9
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r9, %rax
-	sub	$2, n
-	jb	L(le1)
-	mov	-8(up), %r10
-	mov	-16(up), %r11
-	lea	-8(up), up
-	lea	16(rp), rp
-	jmp	L(01)
-L(le1):	shl	R8(cnt), %r9
-	not	%r9
-	mov	%r9, (rp)
-	FUNC_EXIT()
-	ret
-
-L(nb01):C n = 2, 6, 10, ...
-	jne	L(b11)
-L(b10):	mov	(up), %r8
-	mov	-8(up), %r9
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r8, %rax
-	sub	$3, n
-	jb	L(le2)
-	mov	-16(up), %r10
-	lea	-16(up), up
-	lea	8(rp), rp
-	jmp	L(10)
-L(le2):	shld	R8(cnt), %r9, %r8
-	not	%r8
-	mov	%r8, (rp)
-	shl	R8(cnt), %r9
-	not	%r9
-	mov	%r9, -8(rp)
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)			C performance critical!
-L(b11):	C n = 3, 7, 11, ...
-	mov	(up), %r11
-	mov	-8(up), %r8
-	xor	R32(%rax), R32(%rax)
-	shld	R8(cnt), %r11, %rax
-	mov	-16(up), %r9
-	lea	-24(up), up
-	sub	$4, n
-	jb	L(end)
-
-	ALIGN(16)
-L(top):	shld	R8(cnt), %r8, %r11
-	mov	(up), %r10
-	not	%r11
-	mov	%r11, (rp)
-L(10):	shld	R8(cnt), %r9, %r8
-	mov	-8(up), %r11
-	not	%r8
-	mov	%r8, -8(rp)
-L(01):	shld	R8(cnt), %r10, %r9
-	mov	-16(up), %r8
-	not	%r9
-	mov	%r9, -16(rp)
-L(00):	shld	R8(cnt), %r11, %r10
-	mov	-24(up), %r9
-	not	%r10
-	mov	%r10, -24(rp)
-	add	$-32, up
-	lea	-32(rp), rp
-	sub	$4, n
-	jnc	L(top)
-
-L(end):	shld	R8(cnt), %r8, %r11
-	not	%r11
-	mov	%r11, (rp)
-	shld	R8(cnt), %r9, %r8
-	not	%r8
-	mov	%r8, -8(rp)
-	shl	R8(cnt), %r9
-	not	%r9
-	mov	%r9, -16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/mul_basecase.asm b/gmp/mpn/x86_64/core2/mul_basecase.asm
deleted file mode 100644
index d16be852f7..0000000000
--- a/gmp/mpn/x86_64/core2/mul_basecase.asm
+++ /dev/null
@@ -1,975 +0,0 @@
-dnl  X86-64 mpn_mul_basecase optimised for Intel Nehalem/Westmere.
-dnl  It also seems good for Conroe/Wolfdale.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_1		mul_2		mul_3		addmul_2
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core	 4.0		 4.0		 -		4.18-4.25
-C Intel NHM	 3.75		 3.8		 -		4.06-4.2
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C Code structure:
-C
-C
-C               m_1(0m4)        m_1(1m4)        m_1(2m4)        m_1(3m4)
-C                  |               |               |               |
-C        m_2(0m4)  |     m_2(1m4)  |     m_2(2m4)  |     m_2(3m4)  |
-C           |      /        |      /        |      /        |      /
-C           |     /         |     /         |     /         |     /
-C           |    /          |    /          |    /          |    /
-C          \|/ |/_         \|/ |/_         \|/ |/_         \|/ |/_
-C             _____           _____           _____           _____
-C            /     \         /     \         /     \         /     \
-C          \|/      |      \|/      |      \|/      |      \|/      |
-C        am_2(0m4)  |    am_2(1m4)  |    am_2(2m4)  |    am_2(3m4)  |
-C           \      /|\      \      /|\      \      /|\      \      /|\
-C            \_____/         \_____/         \_____/         \_____/
-
-C TODO
-C  * Tune.  None done so far.
-C  * Currently 2687 bytes, making it smaller would be nice.
-C  * Implement some basecases, say for un < 4.
-C  * Try zeroing with xor in m2 loops.
-C  * Try re-rolling the m2 loops to avoid the current 9 insn code duplication
-C    between loop header and wind-down code.
-C  * Consider adc reg,reg instead of adc $0,reg in m2 loops.  This save a byte.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-C Define this to $1 to use late loop index variable as zero, $2 to use an
-C explicit $0.
-define(`Z',`$1')
-
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`un_param', `%rdx')
-define(`vp_param', `%rcx')	C FIXME reallocate vp to rcx but watch performance!
-define(`vn_param', `%r8')
-
-define(`un',       `%r9')
-define(`vn',       `(%rsp)')
-
-define(`v0',       `%r10')
-define(`v1',       `%r11')
-define(`w0',       `%rbx')
-define(`w1',       `%rcx')
-define(`w2',       `%rbp')
-define(`w3',       `%r12')
-define(`i',        `%r13')
-define(`vp',       `%r14')
-
-define(`X0',       `%r8')
-define(`X1',       `%r15')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-define(`N', 85)
-ifdef(`N',,`define(`N',0)')
-define(`MOV', `ifelse(eval(N & $3),0,`mov	$1, $2',`lea	($1), $2')')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mul_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-	mov	(up), %rax		C shared for mul_1 and mul_2
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-
-	mov	(vp_param), v0		C shared for mul_1 and mul_2
-
-	xor	un, un
-	sub	un_param, un		C un = -un_param
-
-	lea	(up,un_param,8), up
-	lea	(rp,un_param,8), rp
-
-	mul	v0			C shared for mul_1 and mul_2
-
-	test	$1, R8(vn_param)
-	jz	L(m2)
-
-	lea	8(vp_param), vp		C FIXME: delay until known needed
-
-	test	$1, R8(un)
-	jnz	L(m1x1)
-
-L(m1x0):test	$2, R8(un)
-	jnz	L(m1s2)
-
-L(m1s0):
-	lea	(un), i
-	mov	%rax, (rp,un,8)
-	mov	8(up,un,8), %rax
-	mov	%rdx, w0		C FIXME: Use lea?
-	lea	L(do_am0)(%rip), %rbp
-	jmp	L(m1e0)
-
-L(m1s2):
-	lea	2(un), i
-	mov	%rax, (rp,un,8)
-	mov	8(up,un,8), %rax
-	mov	%rdx, w0		C FIXME: Use lea?
-	mul	v0
-	lea	L(do_am2)(%rip), %rbp
-	test	i, i
-	jnz	L(m1e2)
-	add	%rax, w0
-	adc	$0, %rdx
-	mov	w0, I(-8(rp),8(rp,un,8))
-	mov	%rdx, I((rp),16(rp,un,8))
-	jmp	L(ret2)
-
-L(m1x1):test	$2, R8(un)
-	jz	L(m1s3)
-
-L(m1s1):
-	lea	1(un), i
-	mov	%rax, (rp,un,8)
-	test	i, i
-	jz	L(1)
-	mov	8(up,un,8), %rax
-	mov	%rdx, w1		C FIXME: Use lea?
-	lea	L(do_am1)(%rip), %rbp
-	jmp	L(m1e1)
-L(1):	mov	%rdx, I((rp),8(rp,un,8))
-	jmp	L(ret2)
-
-L(m1s3):
-	lea	-1(un), i
-	mov	%rax, (rp,un,8)
-	mov	8(up,un,8), %rax
-	mov	%rdx, w1		C FIXME: Use lea?
-	lea	L(do_am3)(%rip), %rbp
-	jmp	L(m1e3)
-
-	ALIGNx
-L(m1top):
-	mul	v0
-	mov	w1, -16(rp,i,8)
-L(m1e2):xor	R32(w1), R32(w1)
-	add	%rax, w0
-	mov	(up,i,8), %rax
-	adc	%rdx, w1
-	mov	w0, -8(rp,i,8)
-L(m1e1):xor	R32(w0), R32(w0)
-	mul	v0
-	add	%rax, w1
-	mov	8(up,i,8), %rax
-	adc	%rdx, w0
-	mov	w1, (rp,i,8)
-L(m1e0):xor	R32(w1), R32(w1)
-	mul	v0
-	add	%rax, w0
-	mov	16(up,i,8), %rax
-	adc	%rdx, w1
-	mov	w0, 8(rp,i,8)
-L(m1e3):xor	R32(w0), R32(w0)
-	mul	v0
-	add	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m1top)
-
-	mul	v0
-	mov	w1, I(-16(rp),-16(rp,i,8))
-	add	%rax, w0
-	adc	$0, %rdx
-	mov	w0, I(-8(rp),-8(rp,i,8))
-	mov	%rdx, I((rp),(rp,i,8))
-
-	dec	vn_param
-	jz	L(ret2)
-	lea	-8(rp), rp
-	jmp	*%rbp
-
-L(m2):
-	mov	8(vp_param), v1
-	lea	16(vp_param), vp	C FIXME: delay until known needed
-
-	test	$1, R8(un)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(un)
-	jnz	L(b10)
-
-L(b00):	lea	(un), i
-	mov	%rax, (rp,un,8)
-	mov	%rdx, w1		C FIXME: Use lea?
-	mov	(up,un,8), %rax
-	mov	$0, R32(w2)
-	jmp	L(m2e0)
-
-L(b10):	lea	-2(un), i
-	mov	%rax, w2		C FIXME: Use lea?
-	mov	(up,un,8), %rax
-	mov	%rdx, w3		C FIXME: Use lea?
-	mov	$0, R32(w0)
-	jmp	L(m2e2)
-
-L(bx1):	test	$2, R8(un)
-	jz	L(b11)
-
-L(b01):	lea	1(un), i
-	mov	%rax, (rp,un,8)
-	mov	(up,un,8), %rax
-	mov	%rdx, w0		C FIXME: Use lea?
-	mov	$0, R32(w1)
-	jmp	L(m2e1)
-
-L(b11):	lea	-1(un), i
-	mov	%rax, w1		C FIXME: Use lea?
-	mov	(up,un,8), %rax
-	mov	%rdx, w2		C FIXME: Use lea?
-	mov	$0, R32(w3)
-	jmp	L(m2e3)
-
-	ALIGNx
-L(m2top0):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-L(m2e0):mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top0)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	w0, I((rp),(rp,i,8))
-	mov	w1, I(8(rp),8(rp,i,8))
-
-	add	$-2, vn_param
-	jz	L(ret2)
-
-L(do_am0):
-	push	%r15
-	push	vn_param
-
-L(olo0):
-	mov	(vp), v0
-	mov	8(vp), v1
-	lea	16(vp), vp
-	lea	16(rp), rp
-	mov	(up,un,8), %rax
-C	lea	0(un), i
-	mov	un, i
-	mul	v0
-	mov	%rax, X0
-	mov	(up,un,8), %rax
-	MOV(	%rdx, X1, 2)
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,un,8), w2
-	mov	%rax, w3
-	jmp	L(lo0)
-
-	ALIGNx
-L(am2top0):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-L(lo0):	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top0)
-
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	w2, X0
-	mov	X0, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	addl	$-2, vn
-	jnz	L(olo0)
-
-L(ret):	pop	%rax
-	pop	%r15
-L(ret2):pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-
-	ALIGNx
-L(m2top1):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-L(m2e1):mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top1)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	w0, I((rp),(rp,i,8))
-	mov	w1, I(8(rp),8(rp,i,8))
-
-	add	$-2, vn_param
-	jz	L(ret2)
-
-L(do_am1):
-	push	%r15
-	push	vn_param
-
-L(olo1):
-	mov	(vp), v0
-	mov	8(vp), v1
-	lea	16(vp), vp
-	lea	16(rp), rp
-	mov	(up,un,8), %rax
-	lea	1(un), i
-	mul	v0
-	mov	%rax, X1
-	MOV(	%rdx, X0, 128)
-	mov	(up,un,8), %rax
-	mov	(rp,un,8), w1
-	mul	v1
-	mov	%rax, w2
-	mov	8(up,un,8), %rax
-	MOV(	%rdx, w3, 1)
-	jmp	L(lo1)
-
-	ALIGNx
-L(am2top1):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-L(lo1):	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top1)
-
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	w2, X0
-	mov	X0, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	addl	$-2, vn
-	jnz	L(olo1)
-
-	pop	%rax
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-
-	ALIGNx
-L(m2top2):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-L(m2e2):mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top2)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	w0, I((rp),(rp,i,8))
-	mov	w1, I(8(rp),8(rp,i,8))
-
-	add	$-2, vn_param
-	jz	L(ret2)
-
-L(do_am2):
-	push	%r15
-	push	vn_param
-
-L(olo2):
-	mov	(vp), v0
-	mov	8(vp), v1
-	lea	16(vp), vp
-	lea	16(rp), rp
-	mov	(up,un,8), %rax
-	lea	-2(un), i
-	mul	v0
-	mov	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	(up,un,8), %rax
-	mov	(rp,un,8), w0
-	mul	v1
-	mov	%rax, w1
-	lea	(%rdx), w2
-	mov	8(up,un,8), %rax
-	jmp	L(lo2)
-
-	ALIGNx
-L(am2top2):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-L(lo2):	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top2)
-
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	w2, X0
-	mov	X0, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	addl	$-2, vn
-	jnz	L(olo2)
-
-	pop	%rax
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-
-	ALIGNx
-L(m2top3):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-L(m2e3):mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top3)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	w0, I((rp),(rp,i,8))
-	mov	w1, I(8(rp),8(rp,i,8))
-
-	add	$-2, vn_param
-	jz	L(ret2)
-
-L(do_am3):
-	push	%r15
-	push	vn_param
-
-L(olo3):
-	mov	(vp), v0
-	mov	8(vp), v1
-	lea	16(vp), vp
-	lea	16(rp), rp
-	mov	(up,un,8), %rax
-	lea	-1(un), i
-	mul	v0
-	mov	%rax, X1
-	MOV(	%rdx, X0, 8)
-	mov	(up,un,8), %rax
-	mov	(rp,un,8), w3
-	mul	v1
-	mov	%rax, w0
-	MOV(	%rdx, w1, 16)
-	mov	8(up,un,8), %rax
-	jmp	L(lo3)
-
-	ALIGNx
-L(am2top3):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-L(lo3):	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top3)
-
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	w2, X0
-	mov	X0, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	addl	$-2, vn
-	jnz	L(olo3)
-
-	pop	%rax
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/mullo_basecase.asm b/gmp/mpn/x86_64/core2/mullo_basecase.asm
deleted file mode 100644
index 0f03d867f6..0000000000
--- a/gmp/mpn/x86_64/core2/mullo_basecase.asm
+++ /dev/null
@@ -1,427 +0,0 @@
-dnl  AMD64 mpn_mullo_basecase optimised for Conroe/Wolfdale/Nehalem/Westmere.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_2		addmul_2
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core	 4.0		4.18-4.25
-C Intel NHM	 3.75		4.06-4.2
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C   * Implement proper cor2, replacing current cor0.
-C   * Offset n by 2 in order to avoid the outer loop cmp.  (And sqr_basecase?)
-C   * Micro-optimise.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp_param', `%rdx')
-define(`n_param',  `%rcx')
-
-define(`v0',       `%r10')
-define(`v1',       `%r11')
-define(`w0',       `%rbx')
-define(`w1',       `%rcx')
-define(`w2',       `%rbp')
-define(`w3',       `%r12')
-define(`n',        `%r9')
-define(`i',        `%r13')
-define(`vp',       `%r8')
-
-define(`X0',       `%r14')
-define(`X1',       `%r15')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-define(`N', 85)
-ifdef(`N',,`define(`N',0)')
-define(`MOV', `ifelse(eval(N & $3),0,`mov	$1, $2',`lea	($1), $2')')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mullo_basecase)
-	FUNC_ENTRY(4)
-
-	mov	(up), %rax
-	mov	vp_param, vp
-
-	cmp	$4, n_param
-	jb	L(small)
-
-	mov	(vp_param), v0
-	push	%rbx
-	lea	(rp,n_param,8), rp	C point rp at R[un]
-	push	%rbp
-	lea	(up,n_param,8), up	C point up right after U's end
-	push	%r12
-	mov	$0, R32(n)		C FIXME
-	sub	n_param, n
-	push	%r13
-	mul	v0
-	mov	8(vp), v1
-
-	test	$1, R8(n_param)
-	jnz	L(m2x1)
-
-L(m2x0):test	$2, R8(n_param)
-	jnz	L(m2b2)
-
-L(m2b0):lea	(n), i
-	mov	%rax, (rp,n,8)
-	mov	%rdx, w1
-	mov	(up,n,8), %rax
-	xor	R32(w2), R32(w2)
-	jmp	L(m2e0)
-
-L(m2b2):lea	-2(n), i
-	mov	%rax, w2
-	mov	(up,n,8), %rax
-	mov	%rdx, w3
-	xor	R32(w0), R32(w0)
-	jmp	L(m2e2)
-
-L(m2x1):test	$2, R8(n_param)
-	jnz	L(m2b3)
-
-L(m2b1):lea	1(n), i
-	mov	%rax, (rp,n,8)
-	mov	(up,n,8), %rax
-	mov	%rdx, w0
-	xor	R32(w1), R32(w1)
-	jmp	L(m2e1)
-
-L(m2b3):lea	-1(n), i
-	xor	R32(w3), R32(w3)
-	mov	%rax, w1
-	mov	%rdx, w2
-	mov	(up,n,8), %rax
-	jmp	L(m2e3)
-
-	ALIGNx
-L(m2tp):mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-L(m2e1):mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-L(m2e0):mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-L(m2e3):mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-L(m2e2):mul	v1
-	mov	$0, R32(w1)		C FIXME: dead in last iteration
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0		C FIXME: dead in last iteration
-	add	$4, i
-	js	L(m2tp)
-
-L(m2ed):imul	v0, %rax
-	add	w3, %rax
-	mov	%rax, I(-8(rp),-8(rp,i,8))
-
-	add	$2, n
-	lea	16(vp), vp
-	lea	-16(up), up
-	cmp	$-2, n
-	jge	L(cor1)
-
-	push	%r14
-	push	%r15
-
-L(outer):
-	mov	(vp), v0
-	mov	8(vp), v1
-	mov	(up,n,8), %rax
-	mul	v0
-	test	$1, R8(n)
-	jnz	L(a1x1)
-
-L(a1x0):mov	%rax, X1
-	MOV(	%rdx, X0, 8)
-	mov	(up,n,8), %rax
-	mul	v1
-	test	$2, R8(n)
-	jnz	L(a110)
-
-L(a100):lea	(n), i
-	mov	(rp,n,8), w3
-	mov	%rax, w0
-	MOV(	%rdx, w1, 16)
-	jmp	L(lo0)
-
-L(a110):lea	2(n), i
-	mov	(rp,n,8), w1
-	mov	%rax, w2
-	mov	8(up,n,8), %rax
-	MOV(	%rdx, w3, 1)
-	jmp	L(lo2)
-
-L(a1x1):mov	%rax, X0
-	MOV(	%rdx, X1, 2)
-	mov	(up,n,8), %rax
-	mul	v1
-	test	$2, R8(n)
-	jz	L(a111)
-
-L(a101):lea	1(n), i
-	MOV(	%rdx, w0, 4)
-	mov	(rp,n,8), w2
-	mov	%rax, w3
-	jmp	L(lo1)
-
-L(a111):lea	-1(n), i
-	MOV(	%rdx, w2, 64)
-	mov	%rax, w1
-	mov	(rp,n,8), w0
-	mov	8(up,n,8), %rax
-	jmp	L(lo3)
-
-	ALIGNx
-L(top):	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	-8(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-L(lo2):	mul	v0
-	add	w1, X1
-	mov	X1, -16(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	-8(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	-8(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-L(lo1):	mov	(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, -8(rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	(up,i,8), %rax
-	mov	(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-L(lo0):	mov	8(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, (rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	8(rp,i,8), w3
-	adc	$0, X1
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	16(up,i,8), %rax
-	adc	$0, w2
-L(lo3):	mul	v0
-	add	w0, X0
-	mov	X0, 8(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	16(up,i,8), %rax
-	mov	16(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(top)
-
-L(end):	imul	v1, %rax
-	add	w0, w1
-	adc	%rax, w2
-	mov	I(-8(up),-8(up,i,8)), %rax
-	imul	v0, %rax
-	add	w1, X1
-	mov	X1, I(-16(rp),-16(rp,i,8))
-	adc	X0, %rax
-	mov	I(-8(rp),-8(rp,i,8)), w1
-	add	w1, w2
-	add	w2, %rax
-	mov	%rax, I(-8(rp),-8(rp,i,8))
-
-	add	$2, n
-	lea	16(vp), vp
-	lea	-16(up), up
-	cmp	$-2, n
-	jl	L(outer)
-
-	pop	%r15
-	pop	%r14
-
-	jnz	L(cor0)
-
-L(cor1):mov	(vp), v0
-	mov	8(vp), v1
-	mov	-16(up), %rax
-	mul	v0			C u0 x v2
-	add	-16(rp), %rax		C FIXME: rp[0] still available in reg?
-	adc	-8(rp), %rdx		C FIXME: rp[1] still available in reg?
-	mov	-8(up), %rbx
-	imul	v0, %rbx
-	mov	-16(up), %rcx
-	imul	v1, %rcx
-	mov	%rax, -16(rp)
-	add	%rbx, %rcx
-	add	%rdx, %rcx
-	mov	%rcx, -8(rp)
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(cor0):mov	(vp), %r11
-	imul	-8(up), %r11
-	add	%rax, %r11
-	mov	%r11, -8(rp)
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(small):
-	cmp	$2, n_param
-	jae	L(gt1)
-L(n1):	imul	(vp_param), %rax
-	mov	%rax, (rp)
-	FUNC_EXIT()
-	ret
-L(gt1):	ja	L(gt2)
-L(n2):	mov	(vp_param), %r9
-	mul	%r9
-	mov	%rax, (rp)
-	mov	8(up), %rax
-	imul	%r9, %rax
-	add	%rax, %rdx
-	mov	8(vp), %r9
-	mov	(up), %rcx
-	imul	%r9, %rcx
-	add	%rcx, %rdx
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-L(gt2):
-L(n3):	mov	(vp_param), %r9
-	mul	%r9		C u0 x v0
-	mov	%rax, (rp)
-	mov	%rdx, %r10
-	mov	8(up), %rax
-	mul	%r9		C u1 x v0
-	imul	16(up), %r9	C u2 x v0
-	add	%rax, %r10
-	adc	%rdx, %r9
-	mov	8(vp), %r11
-	mov	(up), %rax
-	mul	%r11		C u0 x v1
-	add	%rax, %r10
-	adc	%rdx, %r9
-	imul	8(up), %r11	C u1 x v1
-	add	%r11, %r9
-	mov	%r10, 8(rp)
-	mov	16(vp), %r10
-	mov	(up), %rax
-	imul	%rax, %r10	C u0 x v2
-	add	%r10, %r9
-	mov	%r9, 16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/popcount.asm b/gmp/mpn/x86_64/core2/popcount.asm
index e935cf1892..6c22999ff4 100644
--- a/gmp/mpn/x86_64/core2/popcount.asm
+++ b/gmp/mpn/x86_64/core2/popcount.asm
@@ -3,33 +3,21 @@ dnl  x86-64 mpn_popcount optimized for "Core 2".
 dnl  Copyright 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
 
-MULFUNC_PROLOGUE(mpn_popcount)
 include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86_64/core2/redc_1.asm b/gmp/mpn/x86_64/core2/redc_1.asm
deleted file mode 100644
index d0e96ef1cb..0000000000
--- a/gmp/mpn/x86_64/core2/redc_1.asm
+++ /dev/null
@@ -1,425 +0,0 @@
-dnl  X86-64 mpn_redc_1 optimised for Intel Conroe and Wolfdale.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bull	 ?
-C AMD pile	 ?
-C AMD steam	 ?
-C AMD bobcat	 ?
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 4.5  (fluctuating)
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel IBR	 ?
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Micro-optimise, none performed thus far.
-C  * Consider inlining mpn_add_n.
-C  * Single basecases out before the pushes.
-C  * Keep up[i] in registers for basecases (might require pushes).
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv',       `%r8')    C stack
-
-define(`i',           `%r14')
-define(`j',           `%r15')
-define(`mp',          `%r12')
-define(`q0',          `%r13')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-C  X  q0'  n   X  rp  up      u0i           mp   q0 i   j
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), q0
-	mov	n, j			C outer loop induction var
-	lea	(mp_param,n,8), mp
-	lea	-16(up,n,8), up
-	neg	n
-	imul	u0inv, q0		C first iteration q0
-
-	test	$1, R8(n)
-	jz	L(b0)
-
-L(b1):	cmp	$-1, R32(n)
-	jz	L(n1)
-	cmp	$-3, R32(n)
-	jz	L(n3)
-
-	push	rp
-
-L(otp1):lea	3(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	lea	(%rax), %rbp
-	mov	8(mp,n,8), %rax
-	lea	(%rdx), %r9
-	mul	q0
-	lea	(%rax), %r11
-	mov	16(mp,n,8), %rax
-	mov	16(up,n,8), %r10
-	lea	(%rdx), %rdi
-	mul	q0
-	add	%rbp, %r10
-	lea	(%rax), %rbp
-	mov	24(mp,n,8), %rax
-	adc	%r9, %r11
-	mov	24(up,n,8), %rbx
-	lea	(%rdx), %r9
-	adc	$0, %rdi
-	mul	q0
-	add	%r11, %rbx
-	lea	(%rax), %r11
-	mov	32(mp,n,8), %rax
-	adc	%rdi, %rbp
-	mov	%rbx, 24(up,n,8)
-	mov	32(up,n,8), %r10
-	lea	(%rdx), %rdi
-	adc	$0, %r9
-	imul	u0inv, %rbx		C next q limb
-	add	$2, i
-	jns	L(ed1)
-
-	ALIGNx
-L(tp1):	mul	q0
-	add	%rbp, %r10
-	lea	(%rax), %rbp
-	mov	(mp,i,8), %rax
-	adc	%r9, %r11
-	mov	%r10, -8(up,i,8)
-	mov	(up,i,8), %r10
-	lea	(%rdx), %r9
-	adc	$0, %rdi
-	mul	q0
-	add	%r11, %r10
-	lea	(%rax), %r11
-	mov	8(mp,i,8), %rax
-	adc	%rdi, %rbp
-	mov	%r10, (up,i,8)
-	mov	8(up,i,8), %r10
-	lea	(%rdx), %rdi
-	adc	$0, %r9
-	add	$2, i
-	js	L(tp1)
-
-L(ed1):	mul	q0
-	add	%rbp, %r10
-	adc	%r9, %r11
-	mov	%r10, I(-8(up),-8(up,i,8))
-	mov	I((up),(up,i,8)), %r10
-	adc	$0, %rdi
-	add	%r11, %r10
-	adc	%rdi, %rax
-	mov	%r10, I((up),(up,i,8))
-	mov	I(8(up),8(up,i,8)), %r10
-	adc	$0, %rdx
-	add	%rax, %r10
-	mov	%r10, I(8(up),8(up,i,8))
-	adc	$0, %rdx
-	mov	%rdx, 16(up,n,8)	C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp1)
-	jmp	L(cj)
-
-L(b0):	cmp	$-2, R32(n)
-	jz	L(n2)
-	cmp	$-4, R32(n)
-	jz	L(n4)
-
-	push	rp
-
-L(otp0):lea	4(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	lea	(%rax), %r11
-	mov	8(mp,n,8), %rax
-	lea	(%rdx), %rdi
-	mul	q0
-	lea	(%rax), %rbp
-	mov	16(mp,n,8), %rax
-	mov	16(up,n,8), %r10
-	lea	(%rdx), %r9
-	mul	q0
-	add	%r11, %r10
-	lea	(%rax), %r11
-	mov	24(mp,n,8), %rax
-	adc	%rdi, %rbp
-	mov	24(up,n,8), %rbx
-	lea	(%rdx), %rdi
-	adc	$0, %r9
-	mul	q0
-	add	%rbp, %rbx
-	lea	(%rax), %rbp
-	mov	32(mp,n,8), %rax
-	adc	%r9, %r11
-	mov	%rbx, 24(up,n,8)
-	mov	32(up,n,8), %r10
-	lea	(%rdx), %r9
-	adc	$0, %rdi
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e0)
-
-	ALIGNx
-L(tp0):	mul	q0
-	add	%rbp, %r10
-	lea	(%rax), %rbp
-	mov	(mp,i,8), %rax
-	adc	%r9, %r11
-	mov	%r10, -8(up,i,8)
-	mov	(up,i,8), %r10
-	lea	(%rdx), %r9
-	adc	$0, %rdi
-L(e0):	mul	q0
-	add	%r11, %r10
-	lea	(%rax), %r11
-	mov	8(mp,i,8), %rax
-	adc	%rdi, %rbp
-	mov	%r10, (up,i,8)
-	mov	8(up,i,8), %r10
-	lea	(%rdx), %rdi
-	adc	$0, %r9
-	add	$2, i
-	js	L(tp0)
-
-L(ed0):	mul	q0
-	add	%rbp, %r10
-	adc	%r9, %r11
-	mov	%r10, I(-8(up),-8(up,i,8))
-	mov	I((up),(up,i,8)), %r10
-	adc	$0, %rdi
-	add	%r11, %r10
-	adc	%rdi, %rax
-	mov	%r10, I((up),(up,i,8))
-	mov	I(8(up),8(up,i,8)), %r10
-	adc	$0, %rdx
-	add	%rax, %r10
-	mov	%r10, I(8(up),8(up,i,8))
-	adc	$0, %rdx
-	mov	%rdx, 16(up,n,8)	C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp0)
-
-L(cj):	lea	16(up), up		C FIXME
-	pop	rp
-L(add_n):
-IFSTD(`	lea	(up,n,8), up		C param 2: up
-	lea	(up,n,8), %rdx		C param 3: up - n
-	neg	R32(n)		')	C param 4: n
-
-IFDOS(`	lea	(up,n,8), %rdx		C param 2: up
-	lea	(%rdx,n,8), %r8		C param 3: up - n
-	neg	R32(n)
-	mov	n, %r9			C param 4: n
-	mov	rp, %rcx	')	C param 1: rp
-
-	CALL(	mpn_add_n)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(n1):	mov	(mp_param), %rax
-	mul	q0
-	add	8(up), %rax
-	adc	16(up), %rdx
-	mov	%rdx, (rp)
-	mov	$0, R32(%rax)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-L(n2):	mov	(mp_param), %rax
-	mov	(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	8(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, q0
-	imul	u0inv, q0		C next q0
-	mov	-16(mp), %rax
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	16(up), %r14
-	mul	q0
-	add	%rax, %r14
-	adc	$0, %rdx
-	add	%r9, %r14
-	adc	$0, %rdx
-	xor	R32(%rax), R32(%rax)
-	add	%r11, %r14
-	adc	24(up), %rdx
-	mov	%r14, (rp)
-	mov	%rdx, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-	ALIGNx
-L(n3):	mov	-24(mp), %rax
-	mov	-8(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	-16(mp), %rax
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	add	%r11, %rbp
-	mov	8(up), %r10
-	adc	$0, %r9
-	mul	q0
-	mov	%rbp, q0
-	imul	u0inv, q0		C next q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	%rbp, (up)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, 8(up)
-	mov	%r11, -8(up)		C up[0]
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(n3)
-
-	mov	-32(up), %rdx
-	mov	-24(up), %rbx
-	xor	R32(%rax), R32(%rax)
-	add	%rbp, %rdx
-	adc	%r10, %rbx
-	adc	8(up), %r11
-	mov	%rdx, (rp)
-	mov	%rbx, 8(rp)
-	mov	%r11, 16(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-	ALIGNx
-L(n4):	mov	-32(mp), %rax
-	mul	q0
-	lea	(%rax), %r11
-	mov	-24(mp), %rax
-	lea	(%rdx), %r14
-	mul	q0
-	lea	(%rax), %rbp
-	mov	-16(mp), %rax
-	mov	-16(up), %r10
-	lea	(%rdx), %r9
-	mul	q0
-	add	%r11, %r10
-	lea	(%rax), %r11
-	mov	-8(mp), %rax
-	adc	%r14, %rbp
-	mov	-8(up), %rbx
-	lea	(%rdx), %r14
-	adc	$0, %r9
-	mul	q0
-	add	%rbp, %rbx
-	adc	%r9, %r11
-	mov	%rbx, -8(up)
-	mov	(up), %r10
-	adc	$0, %r14
-	imul	u0inv, %rbx		C next q limb
-	add	%r11, %r10
-	adc	%r14, %rax
-	mov	%r10, (up)
-	mov	8(up), %r10
-	adc	$0, %rdx
-	add	%rax, %r10
-	mov	%r10, 8(up)
-	adc	$0, %rdx
-	mov	%rdx, -16(up)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(n4)
-	lea	16(up), up
-	jmp	L(add_n)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/core2/rsh1aors_n.asm b/gmp/mpn/x86_64/core2/rsh1aors_n.asm
deleted file mode 100644
index 27eed3712d..0000000000
--- a/gmp/mpn/x86_64/core2/rsh1aors_n.asm
+++ /dev/null
@@ -1,169 +0,0 @@
-dnl  X86-64 mpn_rsh1add_n, mpn_rsh1sub_n optimised for Intel Conroe/Penryn.
-
-dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 3.05
-C Intel NHM	 3.3
-C Intel SBR	 2.5
-C Intel atom	 ?
-C VIA nano	 ?
-
-C TODO
-C  * Loopmix to approach 2.5 c/l on NHM.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n',  `%rcx')
-
-ifdef(`OPERATION_rsh1add_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func_n,	      mpn_rsh1add_n)
-	define(func_nc,	      mpn_rsh1add_nc)')
-ifdef(`OPERATION_rsh1sub_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func_n,	      mpn_rsh1sub_n)
-	define(func_nc,	      mpn_rsh1sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-
-	neg	%r8			C set C flag from parameter
-	mov	(up), %r8
-	ADCSBB	(vp), %r8
-	jmp	L(ent)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(up), %r8
-	ADDSUB	(vp), %r8
-L(ent):	sbb	R32(%rbx), R32(%rbx)	C save cy
-	mov	%r8, %rax
-	and	$1, R32(%rax)		C return value
-
-	lea	(up,n,8), up
-	lea	(vp,n,8), vp
-	lea	(rp,n,8), rp
-	mov	R32(n), R32(%rbp)
-	neg	n
-	and	$3, R32(%rbp)
-	jz	L(b0)
-	cmp	$2, R32(%rbp)
-	jae	L(n1)
-
-L(b1):	mov	%r8, %rbp
-	inc	n
-	js	L(top)
-	jmp	L(end)
-
-L(n1):	jnz	L(b3)
-	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	8(up,n,8), %r11
-	ADCSBB	8(vp,n,8), %r11
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-	mov	%r8, %r10
-	add	$-2, n
-	jmp	L(2)
-
-L(b3):	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	8(up,n,8), %r10
-	mov	16(up,n,8), %r11
-	ADCSBB	8(vp,n,8), %r10
-	ADCSBB	16(vp,n,8), %r11
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-	mov	%r8, %r9
-	dec	n
-	jmp	L(3)
-
-L(b0):	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	8(up,n,8), %r9
-	mov	16(up,n,8), %r10
-	mov	24(up,n,8), %r11
-	ADCSBB	8(vp,n,8), %r9
-	ADCSBB	16(vp,n,8), %r10
-	ADCSBB	24(vp,n,8), %r11
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-	jmp	L(4)
-
-	ALIGN(16)
-
-L(top):	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	(up,n,8), %r8
-	mov	8(up,n,8), %r9
-	mov	16(up,n,8), %r10
-	mov	24(up,n,8), %r11
-	ADCSBB	(vp,n,8), %r8
-	ADCSBB	8(vp,n,8), %r9
-	ADCSBB	16(vp,n,8), %r10
-	ADCSBB	24(vp,n,8), %r11
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-	shrd	$1, %r8, %rbp
-	mov	%rbp, -8(rp,n,8)
-L(4):	shrd	$1, %r9, %r8
-	mov	%r8, (rp,n,8)
-L(3):	shrd	$1, %r10, %r9
-	mov	%r9, 8(rp,n,8)
-L(2):	shrd	$1, %r11, %r10
-	mov	%r10, 16(rp,n,8)
-L(1):	add	$4, n
-	mov	%r11, %rbp
-	js	L(top)
-
-L(end):	shrd	$1, %rbx, %rbp
-	mov	%rbp, -8(rp)
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/rshift.asm b/gmp/mpn/x86_64/core2/rshift.asm
index ab32ec85df..9a3fc46f9a 100644
--- a/gmp/mpn/x86_64/core2/rshift.asm
+++ b/gmp/mpn/x86_64/core2/rshift.asm
@@ -1,69 +1,50 @@
 dnl  x86-64 mpn_rshift optimized for "Core 2".
 
-dnl  Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2007 Free Software Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 4.25
-C AMD K10	 4.25
-C Intel P4	14.7
-C Intel core2	 1.27
-C Intel NHM	 1.375	(up to about n = 260, then 1.5)
-C Intel SBR	 1.77
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 4.25
+C K10:		 4.25
+C P4:		14.7
+C P6-15:	 1.27
 
 
 C INPUT PARAMETERS
 define(`rp',	`%rdi')
 define(`up',	`%rsi')
 define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`cnt',	`%cl')
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_rshift)
-	FUNC_ENTRY(4)
-	mov	R32(%rdx), R32(%rax)
-	and	$3, R32(%rax)
+	mov	%edx, %eax
+	and	$3, %eax
 	jne	L(nb00)
 L(b00):	C n = 4, 8, 12, ...
 	mov	(up), %r10
 	mov	8(up), %r11
-	xor	R32(%rax), R32(%rax)
-	shrd	R8(cnt), %r10, %rax
+	xor	%eax, %eax
+	shrd	%cl, %r10, %rax
 	mov	16(up), %r8
 	lea	8(up), up
 	lea	-24(rp), rp
@@ -71,11 +52,11 @@ L(b00):	C n = 4, 8, 12, ...
 	jmp	L(00)
 
 L(nb00):C n = 1, 5, 9, ...
-	cmp	$2, R32(%rax)
+	cmp	$2, %eax
 	jae	L(nb01)
 L(b01):	mov	(up), %r9
-	xor	R32(%rax), R32(%rax)
-	shrd	R8(cnt), %r9, %rax
+	xor	%eax, %eax
+	shrd	%cl, %r9, %rax
 	sub	$2, n
 	jb	L(le1)
 	mov	8(up), %r10
@@ -83,65 +64,62 @@ L(b01):	mov	(up), %r9
 	lea	16(up), up
 	lea	-16(rp), rp
 	jmp	L(01)
-L(le1):	shr	R8(cnt), %r9
+L(le1):	shr	%cl, %r9
 	mov	%r9, (rp)
-	FUNC_EXIT()
 	ret
 
 L(nb01):C n = 2, 6, 10, ...
 	jne	L(b11)
 L(b10):	mov	(up), %r8
 	mov	8(up), %r9
-	xor	R32(%rax), R32(%rax)
-	shrd	R8(cnt), %r8, %rax
+	xor	%eax, %eax
+	shrd	%cl, %r8, %rax
 	sub	$3, n
 	jb	L(le2)
 	mov	16(up), %r10
 	lea	24(up), up
 	lea	-8(rp), rp
 	jmp	L(10)
-L(le2):	shrd	R8(cnt), %r9, %r8
+L(le2):	shrd	%cl, %r9, %r8
 	mov	%r8, (rp)
-	shr	R8(cnt), %r9
+	shr	%cl, %r9
 	mov	%r9, 8(rp)
-	FUNC_EXIT()
 	ret
 
 	ALIGN(16)
 L(b11):	C n = 3, 7, 11, ...
 	mov	(up), %r11
 	mov	8(up), %r8
-	xor	R32(%rax), R32(%rax)
-	shrd	R8(cnt), %r11, %rax
+	xor	%eax, %eax
+	shrd	%cl, %r11, %rax
 	mov	16(up), %r9
 	lea	32(up), up
 	sub	$4, n
 	jb	L(end)
 
 	ALIGN(16)
-L(top):	shrd	R8(cnt), %r8, %r11
+L(top):	shrd	%cl, %r8, %r11
 	mov	-8(up), %r10
 	mov	%r11, (rp)
-L(10):	shrd	R8(cnt), %r9, %r8
+L(10):	shrd	%cl, %r9, %r8
 	mov	(up), %r11
 	mov	%r8, 8(rp)
-L(01):	shrd	R8(cnt), %r10, %r9
+L(01):	shrd	%cl, %r10, %r9
 	mov	8(up), %r8
 	mov	%r9, 16(rp)
-L(00):	shrd	R8(cnt), %r11, %r10
+L(00):	shrd	%cl, %r11, %r10
 	mov	16(up), %r9
+	lea	32(up), up
 	mov	%r10, 24(rp)
-	add	$32, up
 	lea	32(rp), rp
 	sub	$4, n
 	jnc	L(top)
 
-L(end):	shrd	R8(cnt), %r8, %r11
+L(end):	shrd	%cl, %r8, %r11
 	mov	%r11, (rp)
-	shrd	R8(cnt), %r9, %r8
+	shrd	%cl, %r9, %r8
 	mov	%r8, 8(rp)
-	shr	R8(cnt), %r9
+	shr	%cl, %r9
 	mov	%r9, 16(rp)
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/sec_tabselect.asm b/gmp/mpn/x86_64/core2/sec_tabselect.asm
deleted file mode 100644
index e4360341d9..0000000000
--- a/gmp/mpn/x86_64/core2/sec_tabselect.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_sec_tabselect.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sec_tabselect)
-include_mpn(`x86_64/fastsse/sec_tabselect.asm')
diff --git a/gmp/mpn/x86_64/core2/sqr_basecase.asm b/gmp/mpn/x86_64/core2/sqr_basecase.asm
deleted file mode 100644
index a112c1b52e..0000000000
--- a/gmp/mpn/x86_64/core2/sqr_basecase.asm
+++ /dev/null
@@ -1,984 +0,0 @@
-dnl  X86-64 mpn_sqr_basecase optimised for Intel Nehalem/Westmere.
-dnl  It also seems good for Conroe/Wolfdale.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_2		addmul_2	sqr_diag_addlsh1
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core	 4.9		4.18-4.25		 3.87
-C Intel NHM	 3.8		4.06-4.2		 3.5
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C Code structure:
-C
-C
-C        m_2(0m4)        m_2(2m4)        m_2(1m4)        m_2(3m4)
-C           |               |               |               |
-C           |               |               |               |
-C           |               |               |               |
-C          \|/             \|/             \|/             \|/
-C              ____________                   ____________
-C             /            \                 /            \
-C            \|/            \               \|/            \
-C         am_2(3m4)       am_2(1m4)       am_2(0m4)       am_2(2m4)
-C            \            /|\                \            /|\
-C             \____________/                  \____________/
-C                       \                        /
-C                        \                      /
-C                         \                    /
-C                       tail(0m2)          tail(1m2)
-C                            \              /
-C                             \            /
-C                            sqr_diag_addlsh1
-
-C TODO
-C  * Tune.  None done so far.
-C  * Currently 2761 bytes, making it smaller would be nice.
-C  * Consider using a jumptab-based entry sequence.  One might even use a mask-
-C    less sequence, if the table is large enough to support tuneup's needs.
-C    The code would be, using non-PIC code,
-C        lea tab(%rip),%rax; jmp *(n,%rax)
-C    or,
-C        lea tab(%rip),%rax; lea (%rip),%rbx; add (n,%rax),%rbx; jmp *%rbx
-C    using PIC code.  The table entries would be Ln1,Ln2,Ln3,Lm0,Lm1,Lm2,Lm3,..
-C    with the last four entries repeated a safe number of times.
-C  * Consider expanding feed-in code in order to avoid zeroing registers.
-C  * Zero consistently with xor.
-C  * Check if using "lea (reg),reg" should be done in more places; we have some
-C    explicit "mov %rax,reg" now.
-C  * Try zeroing with xor in m2 loops.
-C  * Try re-rolling the m2 loops to avoid the current 9 insn code duplication
-C    between loop header and wind-down code.
-C  * Consider adc reg,reg instead of adc $0,reg in m2 loops.  This save a byte.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-C Define this to $1 to use late loop index variable as zero, $2 to use an
-C explicit $0.
-define(`Z',`$1')
-
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`n_param',  `%rdx')
-
-define(`n',        `%r8')
-
-define(`v0',       `%r10')
-define(`v1',       `%r11')
-define(`w0',       `%rbx')
-define(`w1',       `%rcx')
-define(`w2',       `%rbp')
-define(`w3',       `%r9')
-define(`i',        `%r13')
-
-define(`X0',       `%r12')
-define(`X1',       `%r14')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-define(`N', 85)
-ifdef(`N',,`define(`N',0)')
-define(`MOV', `ifelse(eval(N & $3),0,`mov	$1, $2',`lea	($1), $2')')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_sqr_basecase)
-	FUNC_ENTRY(3)
-
-	cmp	$4, n_param
-	jl	L(small)
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-
-	mov	(up), v0
-	mov	8(up), %rax
-	mov	%rax, v1
-
-	mov	$1, R32(n)
-	sub	n_param, n		C n = -n_param+1
-	push	n
-
-	lea	(up,n_param,8), up
-	lea	(rp,n_param,8), rp
-
-	mul	v0
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n)
-	mov	%rax, (rp,n,8)
-	jnz	L(b10)
-
-L(b00):	lea	(n), i			C n = 5, 9, ...
-	mov	%rdx, w1		C FIXME: Use lea?
-	xor	R32(w2), R32(w2)
-	jmp	L(m2e0)
-
-L(b10):	lea	2(n), i			C n = 7, 11, ...
-	mov	8(up,n,8), %rax
-	mov	%rdx, w3		C FIXME: Use lea?
-	xor	R32(w0), R32(w0)
-	xor	R32(w1), R32(w1)
-	jmp	L(m2e2)
-
-L(bx1):	test	$2, R8(n)
-	mov	%rax, (rp,n,8)
-	jz	L(b11)
-
-L(b01):	lea	1(n), i			C n = 6, 10, ...
-	mov	%rdx, w0		C FIXME: Use lea?
-	xor	R32(w1), R32(w1)
-	jmp	L(m2e1)
-
-L(b11):	lea	-1(n), i		C n = 4, 8, 12, ...
-	mov	%rdx, w2		C FIXME: Use lea?
-	xor	R32(w3), R32(w3)
-	jmp	L(m2e3)
-
-
-	ALIGNx
-L(m2top1):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-L(m2e1):mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top1)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	w0, %rax
-	adc	w1, %rdx
-	mov	%rax, I((rp),(rp,i,8))
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n			C decrease |n|
-	jmp	L(am2o3)
-
-	ALIGNx
-L(m2top3):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-L(m2e3):mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top3)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	w0, %rax
-	adc	w1, %rdx
-	mov	%rax, I((rp),(rp,i,8))
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n			C decrease |n|
-	cmp	$-1, n
-	jz	L(cor1)			C jumps iff entry n = 4
-
-L(am2o1):
-	mov	-8(up,n,8), v0
-	mov	(up,n,8), %rax
-	mov	%rax, v1
-	lea	1(n), i
-	mul	v0
-	mov	%rax, X1
-	MOV(	%rdx, X0, 128)
-	mov	(rp,n,8), w1
-	xor	R32(w2), R32(w2)
-	mov	8(up,n,8), %rax
-	xor	R32(w3), R32(w3)
-	jmp	L(lo1)
-
-	ALIGNx
-L(am2top1):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-L(lo1):	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top1)
-
-	mul	v1
-	add	w0, w1
-	adc	w2, %rax
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	X0, %rax
-	mov	%rax, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n
-
-L(am2o3):
-	mov	-8(up,n,8), v0
-	mov	(up,n,8), %rax
-	mov	%rax, v1
-	lea	-1(n), i
-	mul	v0
-	mov	%rax, X1
-	MOV(	%rdx, X0, 8)
-	mov	(rp,n,8), w3
-	xor	R32(w0), R32(w0)
-	xor	R32(w1), R32(w1)
-	mov	8(up,n,8), %rax
-	jmp	L(lo3)
-
-	ALIGNx
-L(am2top3):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-L(lo3):	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top3)
-
-	mul	v1
-	add	w0, w1
-	adc	w2, %rax
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	X0, %rax
-	mov	%rax, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n
-	cmp	$-1, n
-	jnz	L(am2o1)
-
-L(cor1):pop	n
-	mov	%rdx, w3
-	mov	-16(up), v0
-	mov	-8(up), %rax
-	mul	v0
-	add	w3, %rax
-	adc	$0, %rdx
-	mov	%rax, -8(rp)
-	mov	%rdx, (rp)
-	jmp	L(sqr_diag_addlsh1)
-
-	ALIGNx
-L(m2top2):
-L(m2e2):mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top2)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	w0, %rax
-	adc	w1, %rdx
-	mov	%rax, I((rp),(rp,i,8))
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n			C decrease |n|
-	jmp	L(am2o0)
-
-	ALIGNx
-L(m2top0):
-	mul	v0
-	add	%rax, w3
-	mov	-8(up,i,8), %rax
-	mov	w3, -8(rp,i,8)
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	$0, R32(w2)
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w0
-	mov	w0, (rp,i,8)
-	adc	%rdx, w1
-	mov	(up,i,8), %rax
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	adc	%rdx, w2
-L(m2e0):mov	8(up,i,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mov	8(up,i,8), %rax
-	mul	v1
-	add	%rax, w2
-	mov	w1, 8(rp,i,8)
-	adc	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	16(up,i,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	mov	$0, R32(w1)
-	add	%rax, w3
-	mov	24(up,i,8), %rax
-	mov	w2, 16(rp,i,8)
-	adc	%rdx, w0
-	add	$4, i
-	js	L(m2top0)
-
-	mul	v0
-	add	%rax, w3
-	mov	I(-8(up),-8(up,i,8)), %rax
-	mov	w3, I(-8(rp),-8(rp,i,8))
-	adc	%rdx, w0
-	adc	R32(w1), R32(w1)
-	mul	v1
-	add	w0, %rax
-	adc	w1, %rdx
-	mov	%rax, I((rp),(rp,i,8))
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n			C decrease |n|
-	cmp	$-2, n
-	jz	L(cor2)			C jumps iff entry n = 5
-
-L(am2o2):
-	mov	-8(up,n,8), v0
-	mov	(up,n,8), %rax
-	mov	%rax, v1
-	lea	-2(n), i
-	mul	v0
-	mov	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	(rp,n,8), w0
-	xor	R32(w1), R32(w1)
-	xor	R32(w2), R32(w2)
-	mov	8(up,n,8), %rax
-	jmp	L(lo2)
-
-	ALIGNx
-L(am2top2):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-L(lo2):	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top2)
-
-	mul	v1
-	add	w0, w1
-	adc	w2, %rax
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	X0, %rax
-	mov	%rax, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n
-
-L(am2o0):
-	mov	-8(up,n,8), v0
-	mov	(up,n,8), %rax
-	mov	%rax, v1
-	lea	0(n), i
-	mul	v0
-	mov	%rax, X0
-	MOV(	%rdx, X1, 2)
-	xor	R32(w0), R32(w0)
-	mov	(rp,n,8), w2
-	xor	R32(w3), R32(w3)
-	jmp	L(lo0)
-
-	ALIGNx
-L(am2top0):
-	mul	v1
-	add	w0, w1
-	adc	%rax, w2
-	mov	(up,i,8), %rax
-	MOV(	%rdx, w3, 1)
-	adc	$0, w3
-	mul	v0
-	add	w1, X1
-	mov	X1, -8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 2)
-	adc	$0, X1
-	mov	(up,i,8), %rax
-	mul	v1
-	MOV(	%rdx, w0, 4)
-	mov	(rp,i,8), w1
-	add	w1, w2
-	adc	%rax, w3
-	adc	$0, w0
-L(lo0):	mov	8(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	adc	%rax, X1
-	mov	X0, (rp,i,8)
-	MOV(	%rdx, X0, 8)
-	adc	$0, X0
-	mov	8(up,i,8), %rax
-	mov	8(rp,i,8), w2
-	mul	v1
-	add	w2, w3
-	adc	%rax, w0
-	MOV(	%rdx, w1, 16)
-	adc	$0, w1
-	mov	16(up,i,8), %rax
-	mul	v0
-	add	w3, X1
-	mov	X1, 8(rp,i,8)
-	adc	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	16(rp,i,8), w3
-	adc	$0, X1
-	mov	16(up,i,8), %rax
-	mul	v1
-	add	w3, w0
-	MOV(	%rdx, w2, 64)
-	adc	%rax, w1
-	mov	24(up,i,8), %rax
-	adc	$0, w2
-	mul	v0
-	add	w0, X0
-	mov	X0, 16(rp,i,8)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	24(up,i,8), %rax
-	mov	24(rp,i,8), w0
-	adc	$0, X0
-	add	$4, i
-	jnc	L(am2top0)
-
-	mul	v1
-	add	w0, w1
-	adc	w2, %rax
-	adc	Z(i,$0), %rdx
-	add	w1, X1
-	adc	Z(i,$0), X0
-	mov	X1, I(-8(rp),-8(rp,i,8))
-	add	X0, %rax
-	mov	%rax, I((rp),(rp,i,8))
-	adc	Z(i,$0), %rdx
-	mov	%rdx, I(8(rp),8(rp,i,8))
-
-	lea	16(rp), rp
-	add	$2, n
-	cmp	$-2, n
-	jnz	L(am2o2)
-
-L(cor2):pop	n
-	mov	-24(up), v0
-	mov	%rax, w2
-	mov	%rdx, w0
-	mov	-16(up), %rax
-	mov	%rax, v1
-	mul	v0
-	mov	%rax, X0
-	MOV(	%rdx, X1, 32)
-	mov	-8(up), %rax
-	mul	v0
-	add	w2, X0
-	mov	X0, -16(rp)
-	MOV(	%rdx, X0, 128)
-	adc	%rax, X1
-	mov	-8(up), %rax
-	adc	$0, X0
-	mul	v1
-	add	w0, X1
-	adc	$0, X0
-	mov	X1, -8(rp)
-	add	X0, %rax
-	mov	%rax, (rp)
-	adc	$0, %rdx
-	mov	%rdx, 8(rp)
-	lea	8(rp), rp
-
-L(sqr_diag_addlsh1):
-	mov	-8(up,n,8), %rax
-	shl	n
-	xor	R32(%rbx), R32(%rbx)
-	mul	%rax
-	mov	8(rp,n,8), %r11
-	lea	(%rdx), %r10
-	mov	16(rp,n,8), %r9
-	add	%r11, %r11
-	jmp	L(dm)
-
-	ALIGNx
-L(dtop):mul	%rax
-	add	%r11, %r10
-	mov	8(rp,n,8), %r11
-	mov	%r10, -8(rp,n,8)
-	adc	%r9, %rax
-	lea	(%rdx,%rbx), %r10
-	mov	16(rp,n,8), %r9
-	adc	%r11, %r11
-L(dm):	mov	%rax, (rp,n,8)
-	mov	(up,n,4), %rax
-	adc	%r9, %r9
-	setc	R8(%rbx)
-	add	$2, n
-	js	L(dtop)
-
-	mul	%rax
-	add	%r11, %r10
-	mov	%r10, -8(rp)
-	adc	%r9, %rax
-	lea	(%rdx,%rbx), %r10
-	mov	%rax, (rp)
-	adc	$0, %r10
-	mov	%r10, 8(rp)
-
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(small):
-	mov	(up), %rax
-	cmp	$2, n_param
-	jae	L(gt1)
-L(n1):
-	mul	%rax
-	mov	%rax, (rp)
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt1):	jne	L(gt2)
-L(n2):	mov	%rax, %r8
-	mul	%rax
-	mov	8(up), %r11
-	mov	%rax, (rp)
-	mov	%r11, %rax
-	mov	%rdx, %r9
-	mul	%rax
-	mov	%rax, %r10
-	mov	%r11, %rax
-	mov	%rdx, %r11
-	mul	%r8
-	xor	%r8, %r8
-	add	%rax, %r9
-	adc	%rdx, %r10
-	adc	%r8, %r11
-	add	%rax, %r9
-	mov	%r9, 8(rp)
-	adc	%rdx, %r10
-	mov	%r10, 16(rp)
-	adc	%r8, %r11
-	mov	%r11, 24(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt2):
-L(n3):	mov	%rax, %r10
-	mul	%rax
-	mov	8(up), %r11
-	mov	%rax, (rp)
-	mov	%r11, %rax
-	mov	%rdx, 8(rp)
-	mul	%rax
-	mov	16(up), %rcx
-	mov	%rax, 16(rp)
-	mov	%rcx, %rax
-	mov	%rdx, 24(rp)
-	mul	%rax
-	mov	%rax, 32(rp)
-	mov	%rdx, 40(rp)
-
-	mov	%r11, %rax
-	mul	%r10
-	mov	%rax, %r8
-	mov	%rcx, %rax
-	mov	%rdx, %r9
-	mul	%r10
-	xor	%r10, %r10
-	add	%rax, %r9
-	mov	%r11, %rax
-	mov	%r10, %r11
-	adc	%rdx, %r10
-
-	mul	%rcx
-	add	%rax, %r10
-	adc	%r11, %rdx
-	add	%r8, %r8
-	adc	%r9, %r9
-	adc	%r10, %r10
-	adc	%rdx, %rdx
-	adc	%r11, %r11
-	add	%r8, 8(rp)
-	adc	%r9, 16(rp)
-	adc	%r10, 24(rp)
-	adc	%rdx, 32(rp)
-	adc	%r11, 40(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/core2/sublsh1_n.asm b/gmp/mpn/x86_64/core2/sublsh1_n.asm
deleted file mode 100644
index 46488fcafe..0000000000
--- a/gmp/mpn/x86_64/core2/sublsh1_n.asm
+++ /dev/null
@@ -1,47 +0,0 @@
-dnl  AMD64 mpn_sublsh1_n optimised for Core 2 and Core iN.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-define(RSH, 63)
-
-define(ADDSUB,	sub)
-define(ADCSBB,	sbb)
-define(func,	mpn_sublsh1_n)
-
-MULFUNC_PROLOGUE(mpn_sublsh1_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-include_mpn(`x86_64/core2/sublshC_n.asm')
diff --git a/gmp/mpn/x86_64/core2/sublsh2_n.asm b/gmp/mpn/x86_64/core2/sublsh2_n.asm
deleted file mode 100644
index f3b1e28464..0000000000
--- a/gmp/mpn/x86_64/core2/sublsh2_n.asm
+++ /dev/null
@@ -1,47 +0,0 @@
-dnl  AMD64 mpn_sublsh2_n optimised for Core 2 and Core iN.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 62)
-
-define(ADDSUB,	sub)
-define(ADCSBB,	sbb)
-define(func,	mpn_sublsh2_n)
-
-MULFUNC_PROLOGUE(mpn_sublsh2_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-include_mpn(`x86_64/core2/sublshC_n.asm')
diff --git a/gmp/mpn/x86_64/coreihwl/addmul_2.asm b/gmp/mpn/x86_64/coreihwl/addmul_2.asm
deleted file mode 100644
index 54aebc888d..0000000000
--- a/gmp/mpn/x86_64/coreihwl/addmul_2.asm
+++ /dev/null
@@ -1,238 +0,0 @@
-dnl  AMD64 mpn_addmul_2 optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bull	n/a
-C AMD pile	n/a
-C AMD steam	 ?
-C AMD bobcat	n/a
-C AMD jaguar	 ?
-C Intel P4	n/a
-C Intel core	n/a
-C Intel NHM	n/a
-C Intel SBR	n/a
-C Intel IBR	n/a
-C Intel HWL	 2.15
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-define(`rp',     `%rdi')
-define(`up',     `%rsi')
-define(`n_param',`%rdx')
-define(`vp',     `%rcx')
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%rbx')
-define(`w1', `%rcx')
-define(`w2', `%rbp')
-define(`w3', `%r10')
-define(`n',  `%r11')
-define(`X0', `%r12')
-define(`X1', `%r13')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_addmul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	mov	n_param, n
-	shr	$2, n
-
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	mov	(rp), X0
-	mov	8(rp), X1
-	test	$2, R8(n_param)
-	jnz	L(b10)
-
-L(b00):	mov	(up), %rdx
-	lea	16(up), up
-	mulx(	v0, %rax, w1)
-	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	mov	X0, (rp)
-	add	%rax, X1
-	adc	$0, w2
-	mov	-8(up), %rdx
-	lea	16(rp), rp
-	jmp	L(lo0)
-
-L(b10):	mov	(up), %rdx
-	inc	n
-	mulx(	v0, %rax, w1)
-	add	%rax, X0
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	mov	X0, (rp)
-	mov	16(rp), X0
-	add	%rax, X1
-	adc	$0, w2
-	xor	w0, w0
-	jmp	L(lo2)
-
-L(bx1):	mov	(rp), X1
-	mov	8(rp), X0
-	test	$2, R8(n_param)
-	jnz	L(b11)
-
-L(b01):	mov	(up), %rdx
-	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	mov	8(up), %rdx
-	mov	X1, (rp)
-	mov	16(rp), X1
-	mulx(	v0, %rax, w1)
-	lea	24(rp), rp
-	lea	24(up), up
-	jmp	L(lo1)
-
-L(b11):	mov	(up), %rdx
-	inc	n
-	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	mov	X1, (rp)
-	mov	8(up), %rdx
-	mulx(	v0, %rax, w1)
-	lea	8(rp), rp
-	lea	8(up), up
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	lea	32(rp), rp
-	add	w1, X1
-	mov	-16(up), %rdx
-	mov	X1, -24(rp)
-	adc	$0, w3
-	add	w2, X0
-	mov	-8(rp), X1
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo1):	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	add	w3, X0
-	mov	X0, -16(rp)
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	add	w0, X1
-	mov	-8(up), %rdx
-	adc	$0, w2
-L(lo0):	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mov	(rp), X0
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	mov	X1, -8(rp)
-	adc	$0, w3
-	mov	(up), %rdx
-	add	w2, X0
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo3):	add	%rax, X0
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	w3, X0
-	mov	8(rp), X1
-	mov	X0, (rp)
-	mov	16(rp), X0
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-L(lo2):	mov	8(up), %rdx
-	lea	32(up), up
-	dec	n
-	jnz	L(top)
-
-L(end):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rdx, %rax)
-	add	w1, X1
-	mov	X1, 8(rp)
-	adc	$0, w3
-	add	w2, %rdx
-	adc	$0, %rax
-	add	w3, %rdx
-	mov	%rdx, 16(rp)
-	adc	$0, %rax
-
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreihwl/aorsmul_1.asm b/gmp/mpn/x86_64/coreihwl/aorsmul_1.asm
deleted file mode 100644
index fd5a26d00f..0000000000
--- a/gmp/mpn/x86_64/coreihwl/aorsmul_1.asm
+++ /dev/null
@@ -1,198 +0,0 @@
-dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bull	n/a
-C AMD pile	n/a
-C AMD steam	 ?
-C AMD bobcat	n/a
-C AMD jaguar	 ?
-C Intel P4	n/a
-C Intel core	n/a
-C Intel NHM	n/a
-C Intel SBR	n/a
-C Intel IBR	n/a
-C Intel HWL	 2.32
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Handle small n separately, for lower overhead.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0_param',`%rcx')   C r9
-
-define(`n',       `%rbp')
-define(`v0',      `%rdx')
-
-ifdef(`OPERATION_addmul_1',`
-  define(`ADDSUB',        `add')
-  define(`ADCSBB',        `adc')
-  define(`func',  `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-  define(`ADDSUB',        `sub')
-  define(`ADCSBB',        `sbb')
-  define(`func',  `mpn_submul_1')
-')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-
-	mov	n_param, n
-	mov	v0_param, v0
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	shr	$2, n
-	jc	L(b10)
-
-L(b00):	mulx(	(up), %r13, %r12)
-	mulx(	8,(up), %rbx, %rax)
-	add	%r12, %rbx
-	adc	$0, %rax
-	mov	(rp), %r12
-	mov	8(rp), %rcx
-	mulx(	16,(up), %r9, %r8)
-	lea	-16(rp), rp
-	lea	16(up), up
-	ADDSUB	%r13, %r12
-	jmp	L(lo0)
-
-L(bx1):	shr	$2, n
-	jc	L(b11)
-
-L(b01):	mulx(	(up), %r11, %r10)
-	jnz	L(gt1)
-L(n1):	ADDSUB	%r11, (rp)
-	mov	$0, R32(%rax)
-	adc	%r10, %rax
-	jmp	L(ret)
-
-L(gt1):	mulx(	8,(up), %r13, %r12)
-	mulx(	16,(up), %rbx, %rax)
-	lea	24(up), up
-	add	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	(rp), %r10
-	mov	8(rp), %r12
-	mov	16(rp), %rcx
-	lea	-8(rp), rp
-	ADDSUB	%r11, %r10
-	jmp	L(lo1)
-
-L(b11):	mulx(	(up), %rbx, %rax)
-	mov	(rp), %rcx
-	mulx(	8,(up), %r9, %r8)
-	lea	8(up), up
-	lea	-24(rp), rp
-	inc	n			C adjust n
-	ADDSUB	%rbx, %rcx
-	jmp	L(lo3)
-
-L(b10):	mulx(	(up), %r9, %r8)
-	mulx(	8,(up), %r11, %r10)
-	lea	-32(rp), rp
-	mov	$0, R32(%rax)
-	clc				C clear cf
-	jz	L(end)			C depends on old shift
-
-	ALIGN(16)
-L(top):	adc	%rax, %r9
-	lea	32(rp), rp
-	adc	%r8, %r11
-	mulx(	16,(up), %r13, %r12)
-	mov	(rp), %r8
-	mulx(	24,(up), %rbx, %rax)
-	lea	32(up), up
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	8(rp), %r10
-	mov	16(rp), %r12
-	ADDSUB	%r9, %r8
-	mov	24(rp), %rcx
-	mov	%r8, (rp)
-	ADCSBB	%r11, %r10
-L(lo1):	mulx(	(up), %r9, %r8)
-	mov	%r10, 8(rp)
-	ADCSBB	%r13, %r12
-L(lo0):	mov	%r12, 16(rp)
-	ADCSBB	%rbx, %rcx
-L(lo3):	mulx(	8,(up), %r11, %r10)
-	mov	%rcx, 24(rp)
-	dec	n
-	jnz	L(top)
-
-L(end):	adc	%rax, %r9
-	adc	%r8, %r11
-	mov	32(rp), %r8
-	mov	%r10, %rax
-	adc	$0, %rax
-	mov	40(rp), %r10
-	ADDSUB	%r9, %r8
-	mov	%r8, 32(rp)
-	ADCSBB	%r11, %r10
-	mov	%r10, 40(rp)
-	adc	$0, %rax
-
-L(ret):	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreihwl/gmp-mparam.h b/gmp/mpn/x86_64/coreihwl/gmp-mparam.h
deleted file mode 100644
index eef44b3a81..0000000000
--- a/gmp/mpn/x86_64/coreihwl/gmp-mparam.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* Haswell gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 2900 MHz Core i5 Haswell */
-/* FFT tuning limit = 75000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        26
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           25
-
-#define MUL_TOOM22_THRESHOLD                22
-#define MUL_TOOM33_THRESHOLD                74
-#define MUL_TOOM44_THRESHOLD               195
-#define MUL_TOOM6H_THRESHOLD               298
-#define MUL_TOOM8H_THRESHOLD               406
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     121
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     138
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     128
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     132
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     170
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 34
-#define SQR_TOOM3_THRESHOLD                117
-#define SQR_TOOM4_THRESHOLD                336
-#define SQR_TOOM6_THRESHOLD                426
-#define SQR_TOOM8_THRESHOLD                562
-
-#define MULMID_TOOM42_THRESHOLD             42
-
-#define MULMOD_BNM1_THRESHOLD               13
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    376, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     39, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     55,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255,10}, \
-    {    135,11}, {     79,10}, {    159, 9}, {    319,10}, \
-    {    167,11}, {     95,10}, {    191, 9}, {    383,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255, 9}, \
-    {    511,10}, {    271, 9}, {    543,11}, {    143,10}, \
-    {    287, 9}, {    575,10}, {    303, 9}, {    607,11}, \
-    {    159,10}, {    319, 9}, {    639,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207,10}, {    415,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    607,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703,11}, \
-    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,10}, {    831,12}, {    223,11}, {    447,10}, \
-    {    895,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
-    {    351,11}, {    703,10}, {   1407,11}, {    735,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,12}, \
-    {    479,14}, {    127,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,12}, {    575,11}, {   1151,12}, \
-    {    607,11}, {   1215,13}, {    319,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,12}, {    735,13}, \
-    {    383,12}, {    767,11}, {   1535,12}, {    831,13}, \
-    {    447,12}, {    959,11}, {   1919,13}, {    511,12}, \
-    {   1087,13}, {    575,12}, {   1215,13}, {    639,12}, \
-    {   1343,13}, {    703,12}, {   1407,11}, {   2815,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1727,13}, {    959,12}, {   1919,14}, {    511,13}, \
-    {   1023,12}, {   2047,13}, {   1087,12}, {   2175,13}, \
-    {   1215,12}, {   2431,14}, {    639,13}, {   1279,12}, \
-    {   2559,13}, {   1343,12}, {   2687,13}, {   1407,12}, \
-    {   2815,13}, {   1471,12}, {   2943,14}, {    767,13}, \
-    {   1535,12}, {   3071,13}, {   1727,14}, {    895,13}, \
-    {   1791,12}, {   3583,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2943,15}, {    767,14}, {   1535,13}, {   3199,14}, \
-    {   1663,13}, {   3455,12}, {   6911,14}, {   1791,13}, \
-    {   3583,14}, {   1919,16}, {    511,15}, {   1023,14}, \
-    {   2175,13}, {   4351,14}, {   2431,13}, {   4863,15}, \
-    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
-    {   3455,13}, {   6911,15}, {   1791,14}, {   3839,13}, \
-    {   7679,16}, {   1023,15}, {   2047,14}, {   4351,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 237
-#define MUL_FFT_THRESHOLD                 4224
-
-#define SQR_FFT_MODF_THRESHOLD             344  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    344, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     21, 8}, \
-    {     11, 7}, {     25, 8}, {     13, 7}, {     28, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255, 8}, {    511,10}, {    135,11}, \
-    {     79,10}, {    159, 9}, {    319,11}, {     95,10}, \
-    {    191, 9}, {    383,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
-    {    543,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    303, 9}, {    607,11}, {    159,10}, {    319, 9}, \
-    {    639,12}, {     95,11}, {    191,10}, {    383, 9}, \
-    {    767,11}, {    207,10}, {    415,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543, 9}, {   1087,10}, {    575,11}, {    303,10}, \
-    {    607,11}, {    319,10}, {    671,11}, {    351,10}, \
-    {    735,11}, {    383,10}, {    767,11}, {    415,10}, \
-    {    831,11}, {    447,10}, {    895,11}, {    479,13}, \
-    {    127,12}, {    255,11}, {    543,10}, {   1087,11}, \
-    {    607,10}, {   1215,11}, {    671,12}, {    351,11}, \
-    {    735,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,12}, \
-    {    479,14}, {    127,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,12}, {    607,11}, {   1215,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    735,13}, {    383,12}, {    767,11}, \
-    {   1535,12}, {    831,13}, {    447,12}, {    959,13}, \
-    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
-    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
-    {    383,13}, {    767,12}, {   1535,13}, {    831,12}, \
-    {   1663,13}, {    959,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1215,12}, {   2431,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,13}, \
-    {   1471,14}, {    767,13}, {   1599,12}, {   3199,13}, \
-    {   1663,14}, {    895,13}, {   1791,12}, {   3583,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
-    {   1407,13}, {   2815,15}, {    767,14}, {   1535,13}, \
-    {   3199,14}, {   1663,13}, {   3455,12}, {   6911,14}, \
-    {   1791,13}, {   3583,16}, {    511,15}, {   1023,14}, \
-    {   2431,13}, {   4863,15}, {   1279,14}, {   2943,13}, \
-    {   5887,15}, {   1535,14}, {   3455,13}, {   6911,15}, \
-    {   1791,14}, {   3839,16}, {   1023,15}, {   2047,14}, \
-    {   4223,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 206
-#define SQR_FFT_THRESHOLD                 3712
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  78
-#define MULLO_MUL_N_THRESHOLD             8207
-
-#define DC_DIV_QR_THRESHOLD                 63
-#define DC_DIVAPPR_Q_THRESHOLD             195
-#define DC_BDIV_QR_THRESHOLD                56
-#define DC_BDIV_Q_THRESHOLD                128
-
-#define INV_MULMOD_BNM1_THRESHOLD           42
-#define INV_NEWTON_THRESHOLD               199
-#define INV_APPR_THRESHOLD                 181
-
-#define BINV_NEWTON_THRESHOLD              236
-#define REDC_1_TO_REDC_2_THRESHOLD          47
-#define REDC_2_TO_REDC_N_THRESHOLD          62
-
-#define MU_DIV_QR_THRESHOLD               1470
-#define MU_DIVAPPR_Q_THRESHOLD            1589
-#define MUPI_DIV_QR_THRESHOLD               78
-#define MU_BDIV_QR_THRESHOLD              1442
-#define MU_BDIV_Q_THRESHOLD               1470
-
-#define POWM_SEC_TABLE  3,22,194,257,1099
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     112
-#define HGCD_APPR_THRESHOLD                 52
-#define HGCD_REDUCE_THRESHOLD             2681
-#define GCD_DC_THRESHOLD                   807
-#define GCDEXT_DC_THRESHOLD                416
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD              1326
-#define SET_STR_PRECOMPUTE_THRESHOLD      2627
-
-#define FAC_DSC_THRESHOLD                  767
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/x86_64/coreihwl/mul_1.asm b/gmp/mpn/x86_64/coreihwl/mul_1.asm
deleted file mode 100644
index 1e3c338f4e..0000000000
--- a/gmp/mpn/x86_64/coreihwl/mul_1.asm
+++ /dev/null
@@ -1,155 +0,0 @@
-dnl  AMD64 mpn_mul_1 using mulx optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bd1	n/a
-C AMD bd2	 ?
-C AMD bobcat	n/a
-C AMD jaguar	 ?
-C Intel P4	n/a
-C Intel PNR	n/a
-C Intel NHM	n/a
-C Intel SBR	n/a
-C Intel IBR	n/a
-C Intel HWL	 1.57		this
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0_param',`%rcx')   C r9
-
-define(`n',       `%rbp')
-define(`v0',      `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mul_1)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	push	%r12
-
-	mov	n_param, n
-	shr	$2, n
-
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n_param)
-	mov	v0_param, v0
-	jnz	L(b10)
-
-L(b00):	mulx(	(up), %r9, %r8)
-	mulx(	8,(up), %r11, %r10)
-	mulx(	16,(up), %rcx, %r12)
-	lea	-32(rp), rp
-	jmp	L(lo0)
-
-L(b10):	mulx(	(up), %rcx, %r12)
-	mulx(	8,(up), %rbx, %rax)
-	lea	-16(rp), rp
-	test	n, n
-	jz	L(cj2)
-	mulx(	16,(up), %r9, %r8)
-	lea	16(up), up
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n_param)
-	mov	v0_param, v0
-	jnz	L(b11)
-
-L(b01):	mulx(	(up), %rbx, %rax)
-	lea	-24(rp), rp
-	test	n, n
-	jz	L(cj1)
-	mulx(	8,(up), %r9, %r8)
-	lea	8(up), up
-	jmp	L(lo1)
-
-L(b11):	mulx(	(up), %r11, %r10)
-	mulx(	8,(up), %rcx, %r12)
-	mulx(	16,(up), %rbx, %rax)
-	lea	-8(rp), rp
-	test	n, n
-	jz	L(cj3)
-	lea	24(up), up
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):	lea	32(rp), rp
-	mov	%r9, (rp)
-	adc	%r8, %r11
-L(lo3):	mulx(	(up), %r9, %r8)
-	mov	%r11, 8(rp)
-	adc	%r10, %rcx
-L(lo2):	mov	%rcx, 16(rp)
-	adc	%r12, %rbx
-L(lo1):	mulx(	8,(up), %r11, %r10)
-	adc	%rax, %r9
-	mulx(	16,(up), %rcx, %r12)
-	mov	%rbx, 24(rp)
-L(lo0):	mulx(	24,(up), %rbx, %rax)
-	lea	32(up), up
-	dec	n
-	jnz	L(top)
-
-L(end):	lea	32(rp), rp
-	mov	%r9, (rp)
-	adc	%r8, %r11
-L(cj3):	mov	%r11, 8(rp)
-	adc	%r10, %rcx
-L(cj2):	mov	%rcx, 16(rp)
-	adc	%r12, %rbx
-L(cj1):	mov	%rbx, 24(rp)
-	adc	$0, %rax
-
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/coreihwl/mul_2.asm b/gmp/mpn/x86_64/coreihwl/mul_2.asm
deleted file mode 100644
index 5bdb1aa645..0000000000
--- a/gmp/mpn/x86_64/coreihwl/mul_2.asm
+++ /dev/null
@@ -1,173 +0,0 @@
-dnl  AMD64 mpn_mul_2 optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bull	n/a
-C AMD pile	n/a
-C AMD steam	 ?
-C AMD bobcat	n/a
-C AMD jaguar	 ?
-C Intel P4	n/a
-C Intel core	n/a
-C Intel NHM	n/a
-C Intel SBR	n/a
-C Intel IBR	n/a
-C Intel HWL	 1.86
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Move test and jcc together, for insn fusion.
-
-define(`rp',     `%rdi')
-define(`up',     `%rsi')
-define(`n_param',`%rdx')
-define(`vp',     `%rcx')
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%rbx')
-define(`w1', `%rcx')
-define(`w2', `%rbp')
-define(`w3', `%r10')
-define(`n',  `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	3(n_param), n
-	shr	$2, n
-
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	xor	w0, w0
-	test	$2, R8(n_param)
-	mov	(up), %rdx
-	mulx(	v0, w2, w1)
-	jz	L(lo0)
-
-L(b10):	lea	-16(rp), rp
-	lea	-16(up), up
-	jmp	L(lo2)
-
-L(bx1):	xor	w2, w2
-	test	$2, R8(n_param)
-	mov	(up), %rdx
-	mulx(	v0, w0, w3)
-	jnz	L(b11)
-
-L(b01):	lea	-24(rp), rp
-	lea	8(up), up
-	jmp	L(lo1)
-
-L(b11):	lea	-8(rp), rp
-	lea	-8(up), up
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):	mulx(	v1, %rax, w0)
-	add	%rax, w2		C 0
-	mov	(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0			C 1
-	add	%rax, w2		C 0
-	adc	$0, w1			C 1
-	add	w3, w2			C 0
-L(lo0):	mov	w2, (rp)		C 0
-	adc	$0, w1			C 1
-	mulx(	v1, %rax, w2)
-	add	%rax, w0		C 1
-	mov	8(up), %rdx
-	adc	$0, w2			C 2
-	mulx(	v0, %rax, w3)
-	add	%rax, w0		C 1
-	adc	$0, w3			C 2
-	add	w1, w0			C 1
-L(lo3):	mov	w0, 8(rp)		C 1
-	adc	$0, w3			C 2
-	mulx(	v1, %rax, w0)
-	add	%rax, w2		C 2
-	mov	16(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0			C 3
-	add	%rax, w2		C 2
-	adc	$0, w1			C 3
-	add	w3, w2			C 2
-L(lo2):	mov	w2, 16(rp)		C 2
-	adc	$0, w1			C 3
-	mulx(	v1, %rax, w2)
-	add	%rax, w0		C 3
-	mov	24(up), %rdx
-	adc	$0, w2			C 4
-	mulx(	v0, %rax, w3)
-	add	%rax, w0		C 3
-	adc	$0, w3			C 4
-	add	w1, w0			C 3
-	lea	32(up), up
-L(lo1):	mov	w0, 24(rp)		C 3
-	adc	$0, w3			C 4
-	dec	n
-	lea	32(rp), rp
-	jnz	L(top)
-
-L(end):	mulx(	v1, %rdx, %rax)
-	add	%rdx, w2
-	adc	$0, %rax
-	add	w3, w2
-	mov	w2, (rp)
-	adc	$0, %rax
-
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreihwl/mul_basecase.asm b/gmp/mpn/x86_64/coreihwl/mul_basecase.asm
deleted file mode 100644
index b2656c8e9b..0000000000
--- a/gmp/mpn/x86_64/coreihwl/mul_basecase.asm
+++ /dev/null
@@ -1,441 +0,0 @@
-dnl  AMD64 mpn_mul_basecase optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_1		mul_2		mul_3		addmul_2
-C AMD K8,K9	n/a		n/a		 -		n/a
-C AMD K10	n/a		n/a		 -		n/a
-C AMD bull	n/a		n/a		 -		n/a
-C AMD pile	n/a		n/a		 -		n/a
-C AMD steam	 ?		 ?		 -		 ?
-C AMD bobcat	n/a		n/a		 -		n/a
-C AMD jaguar	 ?		 ?		 -		 ?
-C Intel P4	n/a		n/a		 -		n/a
-C Intel core	n/a		n/a		 -		n/a
-C Intel NHM	n/a		n/a		 -		n/a
-C Intel SBR	n/a		n/a		 -		n/a
-C Intel IBR	n/a		n/a		 -		n/a
-C Intel HWL	 1.77		 1.86		 -		 2.15
-C Intel BWL	 ?		 ?		 -		 ?
-C Intel atom	n/a		n/a		 -		n/a
-C VIA nano	n/a		n/a		 -		n/a
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Adjoin a mul_3.
-C  * Further micro-optimise.
-
-define(`rp',      `%rdi')
-define(`up',      `%rsi')
-define(`un_param',`%rdx')
-define(`vp',      `%rcx')
-define(`vn',      `%r8')
-
-define(`un',      `%rbx')
-
-define(`w0',	`%r10')
-define(`w1',	`%r11')
-define(`w2',	`%r12')
-define(`w3',	`%r13')
-define(`n',	`%rbp')
-define(`v0',	`%r9')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	un_param, un		C free up rdx
-	neg	un
-
-	mov	un_param, n		C FIXME: share
-	sar	$2, n			C FIXME: share
-
-	test	$1, R8(vn)
-	jz	L(do_mul_2)
-
-define(`w4',	`%r9')
-define(`w5',	`%r14')
-
-	mov	(vp), %rdx
-
-L(do_mul_1):
-	test	$1, R8(un)
-	jnz	L(m1x1)
-
-L(m1x0):test	$2, R8(un)
-	jnz	L(m110)
-
-L(m100):
-	mulx(	(up), w5, w2)
-	mulx(	8,(up), w1, w3)
-	lea	-24(rp), rp
-	jmp	L(m1l0)
-
-L(m110):
-	mulx(	(up), w3, w4)
-	mulx(	8,(up), w1, w5)
-	lea	-8(rp), rp
-	test	n, n
-	jz	L(cj2)
-	mulx(	16,(up), w0, w2)
-	lea	16(up), up
-	jmp	L(m1l2)
-
-L(m1x1):test	$2, R8(un)
-	jz	L(m111)
-
-L(m101):
-	mulx(	(up), w4, w5)
-	lea	-16(rp), rp
-	test	n, n
-	jz	L(cj1)
-	mulx(	8,(up), w0, w2)
-	lea	8(up), up
-	jmp	L(m1l1)
-
-L(m111):
-	mulx(	(up), w2, w3)
-	mulx(	8,(up), w0, w4)
-	mulx(	16,(up), w1, w5)
-	lea	24(up), up
-	test	n, n
-	jnz	L(gt3)
-	add	w0, w3
-	jmp	L(cj3)
-L(gt3):	add	w0, w3
-	jmp	L(m1l3)
-
-	ALIGN(32)
-L(m1tp):lea	32(rp), rp
-L(m1l3):mov	w2, (rp)
-	mulx(	(up), w0, w2)
-L(m1l2):mov	w3, 8(rp)
-	adc	w1, w4
-L(m1l1):adc	w0, w5
-	mov	w4, 16(rp)
-	mulx(	8,(up), w1, w3)
-L(m1l0):mov	w5, 24(rp)
-	mulx(	16,(up), w0, w4)
-	adc	w1, w2
-	mulx(	24,(up), w1, w5)
-	adc	w0, w3
-	lea	32(up), up
-	dec	n
-	jnz	L(m1tp)
-
-L(m1ed):lea	32(rp), rp
-L(cj3):	mov	w2, (rp)
-L(cj2):	mov	w3, 8(rp)
-	adc	w1, w4
-L(cj1):	mov	w4, 16(rp)
-	adc	$0, w5
-	mov	w5, 24(rp)
-
-	dec	R32(vn)
-	jz	L(ret5)
-
-	lea	8(vp), vp
-	lea	32(rp), rp
-C	push	%r12
-C	push	%r13
-C	push	%r14
-	jmp	L(do_addmul)
-
-L(do_mul_2):
-define(`v1',	`%r14')
-C	push	%r12
-C	push	%r13
-C	push	%r14
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	(un), n
-	sar	$2, n
-
-	test	$1, R8(un)
-	jnz	L(m2x1)
-
-L(m2x0):xor	w0, w0
-	test	$2, R8(un)
-	mov	(up), %rdx
-	mulx(	v0, w2, w1)
-	jz	L(m2l0)
-
-L(m210):lea	-16(rp), rp
-	lea	-16(up), up
-	jmp	L(m2l2)
-
-L(m2x1):xor	w2, w2
-	test	$2, R8(un)
-	mov	(up), %rdx
-	mulx(	v0, w0, w3)
-	jz	L(m211)
-
-L(m201):lea	-24(rp), rp
-	lea	8(up), up
-	jmp	L(m2l1)
-
-L(m211):lea	-8(rp), rp
-	lea	-8(up), up
-	jmp	L(m2l3)
-
-	ALIGN(16)
-L(m2tp):mulx(	v1, %rax, w0)
-	add	%rax, w2
-	mov	(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-	add	%rax, w2
-	adc	$0, w1
-	add	w3, w2
-L(m2l0):mov	w2, (rp)
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	%rax, w0
-	mov	8(up), %rdx
-	adc	$0, w2
-	mulx(	v0, %rax, w3)
-	add	%rax, w0
-	adc	$0, w3
-	add	w1, w0
-L(m2l3):mov	w0, 8(rp)
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, w2
-	mov	16(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-	add	%rax, w2
-	adc	$0, w1
-	add	w3, w2
-L(m2l2):mov	w2, 16(rp)
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	%rax, w0
-	mov	24(up), %rdx
-	adc	$0, w2
-	mulx(	v0, %rax, w3)
-	add	%rax, w0
-	adc	$0, w3
-	add	w1, w0
-	lea	32(up), up
-L(m2l1):mov	w0, 24(rp)
-	adc	$0, w3
-	inc	n
-	lea	32(rp), rp
-	jnz	L(m2tp)
-
-L(m2ed):mulx(	v1, %rdx, %rax)
-	add	%rdx, w2
-	adc	$0, %rax
-	add	w3, w2
-	mov	w2, (rp)
-	adc	$0, %rax
-	mov	%rax, 8(rp)
-
-	add	$-2, R32(vn)
-	jz	L(ret5)
-	lea	16(vp), vp
-	lea	16(rp), rp
-
-
-L(do_addmul):
-	push	%r15
-	push	vn			C save vn in new stack slot
-define(`vn',	`(%rsp)')
-define(`X0',	`%r14')
-define(`X1',	`%r15')
-define(`v1',	`%r8')
-
-	lea	(rp,un,8), rp
-	lea	(up,un,8), up
-
-L(outer):
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	2(un), n
-	sar	$2, n
-
-	mov	(up), %rdx
-	test	$1, R8(un)
-	jnz	L(bx1)
-
-L(bx0):	mov	(rp), X0
-	mov	8(rp), X1
-	mulx(	v0, %rax, w1)
-	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	mov	X0, (rp)
-	add	%rax, X1
-	adc	$0, w2
-	mov	8(up), %rdx
-	test	$2, R8(un)
-	jnz	L(b10)
-
-L(b00):	lea	16(up), up
-	lea	16(rp), rp
-	jmp	L(lo0)
-
-L(b10):	mov	16(rp), X0
-	lea	32(up), up
-	mulx(	v0, %rax, w3)
-	jmp	L(lo2)
-
-L(bx1):	mov	(rp), X1
-	mov	8(rp), X0
-	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	mov	8(up), %rdx
-	mov	X1, (rp)
-	mulx(	v0, %rax, w1)
-	test	$2, R8(un)
-	jz	L(b11)
-
-L(b01):	mov	16(rp), X1
-	lea	24(rp), rp
-	lea	24(up), up
-	jmp	L(lo1)
-
-L(b11):	lea	8(rp), rp
-	lea	8(up), up
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-L(lo2):	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	lea	32(rp), rp
-	add	w1, X1
-	mov	-16(up), %rdx
-	mov	X1, -24(rp)
-	adc	$0, w3
-	add	w2, X0
-	mov	-8(rp), X1
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo1):	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	add	w3, X0
-	mov	X0, -16(rp)
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	add	w0, X1
-	mov	-8(up), %rdx
-	adc	$0, w2
-L(lo0):	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mov	(rp), X0
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	mov	X1, -8(rp)
-	adc	$0, w3
-	mov	(up), %rdx
-	add	w2, X0
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo3):	add	%rax, X0
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	w3, X0
-	mov	8(rp), X1
-	mov	X0, (rp)
-	mov	16(rp), X0
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	mov	8(up), %rdx
-	lea	32(up), up
-	inc	n
-	jnz	L(top)
-
-L(end):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rdx, %rax)
-	add	w1, X1
-	mov	X1, 8(rp)
-	adc	$0, w3
-	add	w2, %rdx
-	adc	$0, %rax
-	add	w3, %rdx
-	mov	%rdx, 16(rp)
-	adc	$0, %rax
-	mov	%rax, 24(rp)
-
-	addl	$-2, vn
-	lea	16(vp), vp
-	lea	-16(up,un,8), up
-	lea	32(rp,un,8), rp
-	jnz	L(outer)
-
-	pop	%rax		C deallocate vn slot
-	pop	%r15
-L(ret5):pop	%r14
-L(ret4):pop	%r13
-L(ret3):pop	%r12
-L(ret2):pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreihwl/mullo_basecase.asm b/gmp/mpn/x86_64/coreihwl/mullo_basecase.asm
deleted file mode 100644
index 9986e8bcfa..0000000000
--- a/gmp/mpn/x86_64/coreihwl/mullo_basecase.asm
+++ /dev/null
@@ -1,426 +0,0 @@
-dnl  AMD64 mpn_mullo_basecase optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_2		addmul_2
-C AMD K8,K9	n/a		n/a
-C AMD K10	n/a		n/a
-C AMD bull	n/a		n/a
-C AMD pile	n/a		n/a
-C AMD steam	 ?		 ?
-C AMD bobcat	n/a		n/a
-C AMD jaguar	 ?		 ?
-C Intel P4	n/a		n/a
-C Intel core	n/a		n/a
-C Intel NHM	n/a		n/a
-C Intel SBR	n/a		n/a
-C Intel IBR	n/a		n/a
-C Intel HWL	 1.86		 2.15
-C Intel BWL	 ?		 ?
-C Intel atom	n/a		n/a
-C VIA nano	n/a		n/a
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C   * Implement proper cor2, replacing current cor0.
-C   * Micro-optimise.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp_param', `%rdx')
-define(`n',        `%rcx')
-
-define(`vp',       `%r8')
-define(`X0',       `%r14')
-define(`X1',       `%r15')
-
-define(`w0',       `%r10')
-define(`w1',       `%r11')
-define(`w2',       `%r12')
-define(`w3',       `%r13')
-define(`i',        `%rbp')
-define(`v0',       `%r9')
-define(`v1',       `%rbx')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mullo_basecase)
-	FUNC_ENTRY(4)
-
-	mov	vp_param, vp
-	mov	(up), %rdx
-
-	cmp	$4, n
-	jb	L(small)
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	2(n), i
-	shr	$2, i
-	neg	n
-	add	$2, n
-
-	push	up			C put entry `up' on stack
-
-	test	$1, R8(n)
-	jnz	L(m2x1)
-
-L(m2x0):mulx(	v0, w0, w3)
-	xor	R32(w2), R32(w2)
-	test	$2, R8(n)
-	jz	L(m2b2)
-
-L(m2b0):lea	-8(rp), rp
-	lea	-8(up), up
-	jmp	L(m2e0)
-
-L(m2b2):lea	-24(rp), rp
-	lea	8(up), up
-	jmp	L(m2e2)
-
-L(m2x1):mulx(	v0, w2, w1)
-	xor	R32(w0), R32(w0)
-	test	$2, R8(n)
-	jnz	L(m2b3)
-
-L(m2b1):jmp	L(m2e1)
-
-L(m2b3):lea	-16(rp), rp
-	lea	-16(up), up
-	jmp	L(m2e3)
-
-	ALIGN(16)
-L(m2tp):mulx(	v1, %rax, w0)
-	add	%rax, w2
-	mov	(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-	add	%rax, w2
-	adc	$0, w1
-	add	w3, w2
-L(m2e1):mov	w2, (rp)
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	%rax, w0
-	mov	8(up), %rdx
-	adc	$0, w2
-	mulx(	v0, %rax, w3)
-	add	%rax, w0
-	adc	$0, w3
-	add	w1, w0
-L(m2e0):mov	w0, 8(rp)
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, w2
-	mov	16(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-	add	%rax, w2
-	adc	$0, w1
-	add	w3, w2
-L(m2e3):mov	w2, 16(rp)
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	%rax, w0
-	mov	24(up), %rdx
-	adc	$0, w2
-	mulx(	v0, %rax, w3)
-	add	%rax, w0
-	adc	$0, w3
-	add	w1, w0
-	lea	32(up), up
-L(m2e2):mov	w0, 24(rp)
-	adc	$0, w3
-	dec	i
-	lea	32(rp), rp
-	jnz	L(m2tp)
-
-L(m2ed):mulx(	v1, %rax, w0)
-	add	%rax, w2
-	mov	(up), %rdx
-	mulx(	v0, %rax, w1)
-	add	w2, %rax
-	add	w3, %rax
-	mov	%rax, (rp)
-
-	mov	(%rsp), up		C restore `up' to beginning
-	lea	16(vp), vp
-	lea	8(rp,n,8), rp		C put back rp to old rp + 2
-	add	$2, n
-	jge	L(cor1)
-
-	push	%r14
-	push	%r15
-
-L(outer):
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	(n), i
-	sar	$2, i
-
-	mov	(up), %rdx
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	mov	(rp), X1
-	mov	8(rp), X0
-	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	mov	8(up), %rdx
-	mov	X1, (rp)
-	mulx(	v0, %rax, w1)
-	test	$2, R8(n)
-	jz	L(b2)
-
-L(b0):	lea	8(rp), rp
-	lea	8(up), up
-	jmp	L(lo0)
-
-L(b2):	mov	16(rp), X1
-	lea	24(rp), rp
-	lea	24(up), up
-	jmp	L(lo2)
-
-L(bx1):	mov	(rp), X0
-	mov	8(rp), X1
-	mulx(	v0, %rax, w1)
-	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	mov	X0, (rp)
-	add	%rax, X1
-	adc	$0, w2
-	mov	8(up), %rdx
-	test	$2, R8(n)
-	jnz	L(b3)
-
-L(b1):	lea	16(up), up
-	lea	16(rp), rp
-	jmp	L(lo1)
-
-L(b3):	mov	16(rp), X0
-	lea	32(up), up
-	mulx(	v0, %rax, w3)
-	inc	i
-	jz	L(cj3)
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(top):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-L(lo3):	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	lea	32(rp), rp
-	add	w1, X1
-	mov	-16(up), %rdx
-	mov	X1, -24(rp)
-	adc	$0, w3
-	add	w2, X0
-	mov	-8(rp), X1
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo2):	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	add	w3, X0
-	mov	X0, -16(rp)
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	add	w0, X1
-	mov	-8(up), %rdx
-	adc	$0, w2
-L(lo1):	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mov	(rp), X0
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	mov	X1, -8(rp)
-	adc	$0, w3
-	mov	(up), %rdx
-	add	w2, X0
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo0):	add	%rax, X0
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	w3, X0
-	mov	8(rp), X1
-	mov	X0, (rp)
-	mov	16(rp), X0
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	mov	8(up), %rdx
-	lea	32(up), up
-	inc	i
-	jnz	L(top)
-
-L(end):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-L(cj3):	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	add	w1, X1
-	mov	-16(up), %rdx
-	mov	X1, 8(rp)
-	adc	$0, w3
-	add	w2, X0
-	mulx(	v0, %rax, w1)
-	add	X0, %rax
-	add	w3, %rax
-	mov	%rax, 16(rp)
-
-	mov	16(%rsp), up		C restore `up' to beginning
-	lea	16(vp), vp
-	lea	24(rp,n,8), rp		C put back rp to old rp + 2
-	add	$2, n
-	jl	L(outer)
-
-	pop	%r15
-	pop	%r14
-
-	jnz	L(cor0)
-
-L(cor1):mov	(vp), v0
-	mov	8(vp), v1
-	mov	(up), %rdx
-	mulx(	v0, %r12, %rbp)		C u0 x v2
-	add	(rp), %r12		C FIXME: rp[0] still available in reg?
-	adc	%rax, %rbp
-	mov	8(up), %r10
-	imul	v0, %r10
-	imul	v1, %rdx
-	mov	%r12, (rp)
-	add	%r10, %rdx
-	add	%rbp, %rdx
-	mov	%rdx, 8(rp)
-	pop	%rax			C deallocate `up' copy
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(cor0):mov	(vp), %r11
-	imul	(up), %r11
-	add	%rax, %r11
-	mov	%r11, (rp)
-	pop	%rax			C deallocate `up' copy
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(small):
-	cmp	$2, n
-	jae	L(gt1)
-L(n1):	imul	(vp), %rdx
-	mov	%rdx, (rp)
-	FUNC_EXIT()
-	ret
-L(gt1):	ja	L(gt2)
-L(n2):	mov	(vp), %r9
-	mulx(	%r9, %rax, %rdx)
-	mov	%rax, (rp)
-	mov	8(up), %rax
-	imul	%r9, %rax
-	add	%rax, %rdx
-	mov	8(vp), %r9
-	mov	(up), %rcx
-	imul	%r9, %rcx
-	add	%rcx, %rdx
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-L(gt2):
-L(n3):	mov	(vp), %r9
-	mulx(	%r9, %rax, %r10)	C u0 x v0
-	mov	%rax, (rp)
-	mov	8(up), %rdx
-	mulx(	%r9, %rax, %rdx)	C u1 x v0
-	imul	16(up), %r9		C u2 x v0
-	add	%rax, %r10
-	adc	%rdx, %r9
-	mov	8(vp), %r11
-	mov	(up), %rdx
-	mulx(	%r11, %rax, %rdx)	C u0 x v1
-	add	%rax, %r10
-	adc	%rdx, %r9
-	imul	8(up), %r11		C u1 x v1
-	add	%r11, %r9
-	mov	%r10, 8(rp)
-	mov	16(vp), %r10
-	mov	(up), %rax
-	imul	%rax, %r10		C u0 x v2
-	add	%r10, %r9
-	mov	%r9, 16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreihwl/redc_1.asm b/gmp/mpn/x86_64/coreihwl/redc_1.asm
deleted file mode 100644
index f1a475e53c..0000000000
--- a/gmp/mpn/x86_64/coreihwl/redc_1.asm
+++ /dev/null
@@ -1,433 +0,0 @@
-dnl  AMD64 mpn_redc_1 optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bull	n/a
-C AMD pile	n/a
-C AMD steam	 ?
-C AMD bobcat	n/a
-C AMD jaguar	 ?
-C Intel P4	n/a
-C Intel core	n/a
-C Intel NHM	n/a
-C Intel SBR	n/a
-C Intel IBR	n/a
-C Intel HWL	 2.32
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Micro-optimise.
-C  * Consider inlining mpn_add_n.  Tests indicate that this saves just 1-2
-C    cycles, though.
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv_param', `%r8')    C stack
-
-define(`i',           `%r14')
-define(`j',           `%r15')
-define(`mp',          `%rdi')
-define(`u0inv',       `(%rsp)')  C stack
-
-ABI_SUPPORT(DOS64)    C FIXME: needs verification
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-	push	rp
-	mov	mp_param, mp		C note that rp and mp shares register
-	mov	(up), %rdx
-
-	neg	n
-	push	%r8			C put u0inv on stack
-	imul	u0inv_param, %rdx	C first iteration q0
-	mov	n, j			C outer loop induction var
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n)
-	jz	L(o0b)
-
-	cmp	$-2, R32(n)
-	jnz	L(o2)
-
-C Special code for n = 2 since general code cannot handle it
-	mov	8(%rsp), %rbx		C rp
-	lea	16(%rsp), %rsp		C deallocate two slots
-	mulx(	(mp), %r9, %r12)
-	mulx(	8,(mp), %r11, %r10)
-	add	%r12, %r11
-	adc	$0, %r10
-	add	(up), %r9		C = 0
-	adc	8(up), %r11		C r11 = up[1]
-	adc	$0, %r10		C -> up[0]
-	mov	%r11, %rdx
-	imul	u0inv_param, %rdx
-	mulx(	(mp), %r13, %r12)
-	mulx(	8,(mp), %r14, %r15)
-	xor	R32(%rax), R32(%rax)
-	add	%r12, %r14
-	adc	$0, %r15
-	add	%r11, %r13		C = 0
-	adc	16(up), %r14		C rp[2]
-	adc	$0, %r15		C -> up[1]
-	add	%r14, %r10
-	adc	24(up), %r15
-	mov	%r10, (%rbx)
-	mov	%r15, 8(%rbx)
-	setc	R8(%rax)
-	jmp	L(ret)
-
-L(o2):	lea	2(n), i			C inner loop induction var
-	mulx(	(mp), %r9, %r8)
-	mulx(	8,(mp), %r11, %r10)
-	sar	$2, i
-	add	%r8, %r11
-	jmp	L(lo2)
-
-	ALIGN(16)
-L(tp2):	adc	%rax, %r9
-	lea	32(up), up
-	adc	%r8, %r11
-L(lo2):	mulx(	16,(mp), %r13, %r12)
-	mov	(up), %r8
-	mulx(	24,(mp), %rbx, %rax)
-	lea	32(mp), mp
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	8(up), %r10
-	mov	16(up), %r12
-	add	%r9, %r8
-	mov	24(up), %rbp
-	mov	%r8, (up)
-	adc	%r11, %r10
-	mulx(	(mp), %r9, %r8)
-	mov	%r10, 8(up)
-	adc	%r13, %r12
-	mov	%r12, 16(up)
-	adc	%rbx, %rbp
-	mulx(	8,(mp), %r11, %r10)
-	mov	%rbp, 24(up)
-	inc	i
-	jnz	L(tp2)
-
-L(ed2):	mov	56(up,n,8), %rdx	C next iteration up[0]
-	lea	16(mp,n,8), mp		C mp = (last starting mp)
-	adc	%rax, %r9
-	adc	%r8, %r11
-	mov	32(up), %r8
-	adc	$0, %r10
-	imul	u0inv, %rdx		C next iteration q0
-	mov	40(up), %rax
-	add	%r9, %r8
-	mov	%r8, 32(up)
-	adc	%r11, %rax
-	mov	%rax, 40(up)
-	lea	56(up,n,8), up		C up = (last starting up) + 1
-	adc	$0, %r10
-	mov	%r10, -8(up)
-	inc	j
-	jnz	L(o2)
-
-	jmp	L(cj)
-
-
-L(bx1):	test	$2, R8(n)
-	jz	L(o3a)
-
-L(o1a):	cmp	$-1, R32(n)
-	jnz	L(o1b)
-
-C Special code for n = 1 since general code cannot handle it
-	mov	8(%rsp), %rbx		C rp
-	lea	16(%rsp), %rsp		C deallocate two slots
-	mulx(	(mp), %r11, %r10)
-	add	(up), %r11
-	adc	8(up), %r10
-	mov	%r10, (%rbx)
-	mov	$0, R32(%rax)
-	setc	R8(%rax)
-	jmp	L(ret)
-
-L(o1b):	lea	24(mp), mp
-L(o1):	lea	1(n), i			C inner loop induction var
-	mulx(	-24,(mp), %r11, %r10)
-	mulx(	-16,(mp), %r13, %r12)
-	mulx(	-8,(mp), %rbx, %rax)
-	sar	$2, i
-	add	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	(up), %r10
-	mov	8(up), %r12
-	mov	16(up), %rbp
-	add	%r11, %r10
-	jmp	L(lo1)
-
-	ALIGN(16)
-L(tp1):	adc	%rax, %r9
-	lea	32(up), up
-	adc	%r8, %r11
-	mulx(	16,(mp), %r13, %r12)
-	mov	-8(up), %r8
-	mulx(	24,(mp), %rbx, %rax)
-	lea	32(mp), mp
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	(up), %r10
-	mov	8(up), %r12
-	add	%r9, %r8
-	mov	16(up), %rbp
-	mov	%r8, -8(up)
-	adc	%r11, %r10
-L(lo1):	mulx(	(mp), %r9, %r8)
-	mov	%r10, (up)
-	adc	%r13, %r12
-	mov	%r12, 8(up)
-	adc	%rbx, %rbp
-	mulx(	8,(mp), %r11, %r10)
-	mov	%rbp, 16(up)
-	inc	i
-	jnz	L(tp1)
-
-L(ed1):	mov	48(up,n,8), %rdx	C next iteration up[0]
-	lea	40(mp,n,8), mp		C mp = (last starting mp)
-	adc	%rax, %r9
-	adc	%r8, %r11
-	mov	24(up), %r8
-	adc	$0, %r10
-	imul	u0inv, %rdx		C next iteration q0
-	mov	32(up), %rax
-	add	%r9, %r8
-	mov	%r8, 24(up)
-	adc	%r11, %rax
-	mov	%rax, 32(up)
-	lea	48(up,n,8), up		C up = (last starting up) + 1
-	adc	$0, %r10
-	mov	%r10, -8(up)
-	inc	j
-	jnz	L(o1)
-
-	jmp	L(cj)
-
-L(o3a):	cmp	$-3, R32(n)
-	jnz	L(o3b)
-
-C Special code for n = 3 since general code cannot handle it
-L(n3):	mulx(	(mp), %rbx, %rax)
-	mulx(	8,(mp), %r9, %r14)
-	add	(up), %rbx
-	mulx(	16,(mp), %r11, %r10)
-	adc	%rax, %r9		C W 1
-	adc	%r14, %r11		C W 2
-	mov	8(up), %r14
-	mov	u0inv_param, %rdx
-	adc	$0, %r10		C W 3
-	mov	16(up), %rax
-	add	%r9, %r14		C W 1
-	mov	%r14, 8(up)
-	mulx(	%r14, %rdx, %r13)	C next iteration q0
-	adc	%r11, %rax		C W 2
-	mov	%rax, 16(up)
-	adc	$0, %r10		C W 3
-	mov	%r10, (up)
-	lea	8(up), up		C up = (last starting up) + 1
-	inc	j
-	jnz	L(n3)
-
-	jmp	L(cj)
-
-L(o3b):	lea	8(mp), mp
-L(o3):	lea	4(n), i			C inner loop induction var
-	mulx(	-8,(mp), %rbx, %rax)
-	mulx(	(mp), %r9, %r8)
-	mov	(up), %rbp
-	mulx(	8,(mp), %r11, %r10)
-	sar	$2, i
-	add	%rbx, %rbp
-	nop
-	adc	%rax, %r9
-	jmp	L(lo3)
-
-	ALIGN(16)
-L(tp3):	adc	%rax, %r9
-	lea	32(up), up
-L(lo3):	adc	%r8, %r11
-	mulx(	16,(mp), %r13, %r12)
-	mov	8(up), %r8
-	mulx(	24,(mp), %rbx, %rax)
-	lea	32(mp), mp
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	16(up), %r10
-	mov	24(up), %r12
-	add	%r9, %r8
-	mov	32(up), %rbp
-	mov	%r8, 8(up)
-	adc	%r11, %r10
-	mulx(	(mp), %r9, %r8)
-	mov	%r10, 16(up)
-	adc	%r13, %r12
-	mov	%r12, 24(up)
-	adc	%rbx, %rbp
-	mulx(	8,(mp), %r11, %r10)
-	mov	%rbp, 32(up)
-	inc	i
-	jnz	L(tp3)
-
-L(ed3):	mov	64(up,n,8), %rdx	C next iteration up[0]
-	lea	24(mp,n,8), mp		C mp = (last starting mp)
-	adc	%rax, %r9
-	adc	%r8, %r11
-	mov	40(up), %r8
-	adc	$0, %r10
-	imul	u0inv, %rdx		C next iteration q0
-	mov	48(up), %rax
-	add	%r9, %r8
-	mov	%r8, 40(up)
-	adc	%r11, %rax
-	mov	%rax, 48(up)
-	lea	64(up,n,8), up		C up = (last starting up) + 1
-	adc	$0, %r10
-	mov	%r10, -8(up)
-	inc	j
-	jnz	L(o3)
-
-	jmp	L(cj)
-
-L(o0b):	lea	16(mp), mp
-L(o0):	mov	n, i			C inner loop induction var
-	mulx(	-16,(mp), %r13, %r12)
-	mulx(	-8,(mp), %rbx, %rax)
-	sar	$2, i
-	add	%r12, %rbx
-	adc	$0, %rax
-	mov	(up), %r12
-	mov	8(up), %rbp
-	mulx(	(mp), %r9, %r8)
-	add	%r13, %r12
-	jmp	L(lo0)
-
-	ALIGN(16)
-L(tp0):	adc	%rax, %r9
-	lea	32(up), up
-	adc	%r8, %r11
-	mulx(	16,(mp), %r13, %r12)
-	mov	-16(up), %r8
-	mulx(	24,(mp), %rbx, %rax)
-	lea	32(mp), mp
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	mov	-8(up), %r10
-	mov	(up), %r12
-	add	%r9, %r8
-	mov	8(up), %rbp
-	mov	%r8, -16(up)
-	adc	%r11, %r10
-	mulx(	(mp), %r9, %r8)
-	mov	%r10, -8(up)
-	adc	%r13, %r12
-	mov	%r12, (up)
-L(lo0):	adc	%rbx, %rbp
-	mulx(	8,(mp), %r11, %r10)
-	mov	%rbp, 8(up)
-	inc	i
-	jnz	L(tp0)
-
-L(ed0):	mov	40(up,n,8), %rdx	C next iteration up[0]
-	lea	32(mp,n,8), mp		C mp = (last starting mp)
-	adc	%rax, %r9
-	adc	%r8, %r11
-	mov	16(up), %r8
-	adc	$0, %r10
-	imul	u0inv, %rdx		C next iteration q0
-	mov	24(up), %rax
-	add	%r9, %r8
-	mov	%r8, 16(up)
-	adc	%r11, %rax
-	mov	%rax, 24(up)
-	lea	40(up,n,8), up		C up = (last starting up) + 1
-	adc	$0, %r10
-	mov	%r10, -8(up)
-	inc	j
-	jnz	L(o0)
-
-L(cj):
-IFSTD(`	mov	8(%rsp), %rdi		C param 1: rp
-	lea	16(%rsp), %rsp		C deallocate two slots
-	lea	(up,n,8), %rdx		C param 3: up - n
-	neg	R32(n)		')	C param 4: n
-
-IFDOS(`	mov	up, %rdx		C param 2: up
-	lea	(up,n,8), %r8		C param 3: up - n
-	neg	R32(n)
-	mov	n, %r9			C param 4: n
-	mov	8(%rsp), %rcx		C param 1: rp
-	lea	16(%rsp), %rsp	')	C deallocate two slots
-
-	CALL(	mpn_add_n)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreihwl/sqr_basecase.asm b/gmp/mpn/x86_64/coreihwl/sqr_basecase.asm
deleted file mode 100644
index 641cdf349a..0000000000
--- a/gmp/mpn/x86_64/coreihwl/sqr_basecase.asm
+++ /dev/null
@@ -1,506 +0,0 @@
-dnl  AMD64 mpn_sqr_basecase optimised for Intel Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_2		addmul_2	sqr_diag_addlsh1
-C AMD K8,K9	n/a		n/a			n/a
-C AMD K10	n/a		n/a			n/a
-C AMD bull	n/a		n/a			n/a
-C AMD pile	n/a		n/a			n/a
-C AMD steam	 ?		 ?			 ?
-C AMD bobcat	n/a		n/a			n/a
-C AMD jaguar	 ?		 ?			 ?
-C Intel P4	n/a		n/a			n/a
-C Intel core	n/a		n/a			n/a
-C Intel NHM	n/a		n/a			n/a
-C Intel SBR	n/a		n/a			n/a
-C Intel IBR	n/a		n/a			n/a
-C Intel HWL	 1.86		 2.15			~2.5
-C Intel BWL	 ?		 ?			 ?
-C Intel atom	n/a		n/a			n/a
-C VIA nano	n/a		n/a			n/a
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund, except
-C that the sqr_diag_addlsh1 loop was manually written.
-
-C TODO
-C  * Replace current unoptimised sqr_diag_addlsh1 loop; 1.75 c/l might be
-C    possible.
-C  * Consider splitting outer loop into 2, one for n = 1 (mod 2) and one for
-C    n = 0 (mod 2).  These loops could fall into specific "corner" code.
-C  * Consider splitting outer loop into 4.
-C  * Streamline pointer updates.
-C  * Perhaps suppress a few more xor insns in feed-in code.
-C  * Make sure we write no dead registers in feed-in code.
-C  * We might use 32-bit size ops, since n >= 2^32 is non-terminating.  Watch
-C    out for negative sizes being zero-extended, though.
-C  * Provide straight-line code for n = 4; then look for simplifications in
-C    main code.
-
-define(`rp',	  `%rdi')
-define(`up',	  `%rsi')
-define(`un_param',`%rdx')
-
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_sqr_basecase)
-	FUNC_ENTRY(3)
-
-	cmp	$2, un_param
-	jae	L(gt1)
-
-	mov	(up), %rdx
-	mulx(	%rdx, %rax, %rdx)
-	mov	%rax, (rp)
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt1):	jne	L(gt2)
-
-	mov	(up), %rdx
-	mov	8(up), %rcx
-	mulx(	%rcx, %r9, %r10)	C v0 * v1	W 1 2
-	mulx(	%rdx, %rax, %r8)	C v0 * v0	W 0 1
-	mov	%rcx, %rdx
-	mulx(	%rdx, %r11, %rdx)	C v1 * v1	W 2 3
-	add	%r9, %r9		C		W 1
-	adc	%r10, %r10		C		W 2
-	adc	$0, %rdx		C		W 3
-	add	%r9, %r8		C W 1
-	adc	%r11, %r10		C W 2
-	adc	$0, %rdx		C W 3
-	mov	%rax, (rp)
-	mov	%r8, 8(rp)
-	mov	%r10, 16(rp)
-	mov	%rdx, 24(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt2):	cmp	$4, un_param
-	jae	L(gt3)
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%r10')
-define(`w2', `%r11')
-
-	mov	(up), v0
-	mov	8(up), %rdx
-	mov	%rdx, v1
-	mulx(	v0, w2, %rax)
-	mov	16(up), %rdx
-	mulx(	v0, w0, %rcx)
-	mov	w2, %r8
-	add	%rax, w0
-	adc	$0, %rcx
-	mulx(	v1, %rdx, %rax)
-	add	%rcx, %rdx
-	mov	%rdx, 24(rp)
-	adc	$0, %rax
-	mov	%rax, 32(rp)
-	xor	R32(%rcx), R32(%rcx)
-	mov	(up), %rdx
-	mulx(	%rdx, %rax, w2)
-	mov	%rax, (rp)
-	add	%r8, %r8
-	adc	w0, w0
-	setc	R8(%rcx)
-	mov	8(up), %rdx
-	mulx(	%rdx, %rax, %rdx)
-	add	w2, %r8
-	adc	%rax, w0
-	mov	%r8, 8(rp)
-	mov	w0, 16(rp)
-	mov	24(rp), %r8
-	mov	32(rp), w0
-	lea	(%rdx,%rcx), w2
-	adc	%r8, %r8
-	adc	w0, w0
-	setc	R8(%rcx)
-	mov	16(up), %rdx
-	mulx(	%rdx, %rax, %rdx)
-	add	w2, %r8
-	adc	%rax, w0
-	mov	%r8, 24(rp)
-	mov	w0, 32(rp)
-	adc	%rcx, %rdx
-	mov	%rdx, 40(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt3):
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%r10')
-define(`w1', `%r11')
-define(`w2', `%rbx')
-define(`w3', `%rbp')
-define(`un', `%r12')
-define(`n',  `%rcx')
-
-define(`X0', `%r13')
-define(`X1', `%r14')
-
-L(do_mul_2):
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	mov	$0, R32(un)
-	sub	un_param, un		C free up rdx
-	push	un
-	mov	(up), v0
-	mov	8(up), %rdx
-	lea	2(un), n
-	sar	$2, n			C FIXME: suppress, change loop?
-	inc	un			C decrement |un|
-	mov	%rdx, v1
-
-	test	$1, R8(un)
-	jnz	L(mx1)
-
-L(mx0):	mulx(	v0, w2, w1)
-	mov	16(up), %rdx
-	mov	w2, 8(rp)
-	xor	w2, w2
-	mulx(	v0, w0, w3)
-	test	$2, R8(un)
-	jz	L(m00)
-
-L(m10):	lea	-8(rp), rp
-	lea	-8(up), up
-	jmp	L(mlo2)
-
-L(m00):	lea	8(up), up
-	lea	8(rp), rp
-	jmp	L(mlo0)
-
-L(mx1):	mulx(	v0, w0, w3)
-	mov	16(up), %rdx
-	mov	w0, 8(rp)
-	xor	w0, w0
-	mulx(	v0, w2, w1)
-	test	$2, R8(un)
-	jz	L(mlo3)
-
-L(m01):	lea	16(rp), rp
-	lea	16(up), up
-	jmp	L(mlo1)
-
-	ALIGN(32)
-L(mtop):mulx(	v1, %rax, w0)
-	add	%rax, w2		C 0
-	mov	(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0			C 1
-	add	%rax, w2		C 0
-L(mlo1):adc	$0, w1			C 1
-	add	w3, w2			C 0
-	mov	w2, (rp)		C 0
-	adc	$0, w1			C 1
-	mulx(	v1, %rax, w2)
-	add	%rax, w0		C 1
-	mov	8(up), %rdx
-	adc	$0, w2			C 2
-	mulx(	v0, %rax, w3)
-	add	%rax, w0		C 1
-	adc	$0, w3			C 2
-L(mlo0):add	w1, w0			C 1
-	mov	w0, 8(rp)		C 1
-	adc	$0, w3			C 2
-	mulx(	v1, %rax, w0)
-	add	%rax, w2		C 2
-	mov	16(up), %rdx
-	mulx(	v0, %rax, w1)
-	adc	$0, w0			C 3
-	add	%rax, w2		C 2
-	adc	$0, w1			C 3
-L(mlo3):add	w3, w2			C 2
-	mov	w2, 16(rp)		C 2
-	adc	$0, w1			C 3
-	mulx(	v1, %rax, w2)
-	add	%rax, w0		C 3
-	mov	24(up), %rdx
-	adc	$0, w2			C 4
-	mulx(	v0, %rax, w3)
-	add	%rax, w0		C 3
-	adc	$0, w3			C 4
-L(mlo2):add	w1, w0			C 3
-	lea	32(up), up
-	mov	w0, 24(rp)		C 3
-	adc	$0, w3			C 4
-	inc	n
-	lea	32(rp), rp
-	jnz	L(mtop)
-
-L(mend):mulx(	v1, %rdx, %rax)
-	add	%rdx, w2
-	adc	$0, %rax
-	add	w3, w2
-	mov	w2, (rp)
-	adc	$0, %rax
-	mov	%rax, 8(rp)
-
-	lea	16(up), up
-	lea	-16(rp), rp
-
-L(do_addmul_2):
-L(outer):
-	lea	(up,un,8), up		C put back up to 2 positions above last time
-	lea	48(rp,un,8), rp		C put back rp to 4 positions above last time
-
-	mov	-8(up), v0		C shared between addmul_2 and corner
-
-	add	$2, un			C decrease |un|
-	cmp	$-2, un
-	jge	L(corner)
-
-	mov	(up), v1
-
-	lea	1(un), n
-	sar	$2, n			C FIXME: suppress, change loop?
-
-	mov	v1, %rdx
-	test	$1, R8(un)
-	jnz	L(bx1)
-
-L(bx0):	mov	(rp), X0
-	mov	8(rp), X1
-	mulx(	v0, %rax, w1)
-	add	%rax, X0
-	adc	$0, w1
-	mov	X0, (rp)
-	xor	w2, w2
-	test	$2, R8(un)
-	jnz	L(b10)
-
-L(b00):	mov	8(up), %rdx
-	lea	16(rp), rp
-	lea	16(up), up
-	jmp	L(lo0)
-
-L(b10):	mov	8(up), %rdx
-	mov	16(rp), X0
-	lea	32(up), up
-	inc	n
-	mulx(	v0, %rax, w3)
-	jz	L(ex)
-	jmp	L(lo2)
-
-L(bx1):	mov	(rp), X1
-	mov	8(rp), X0
-	mulx(	v0, %rax, w3)
-	mov	8(up), %rdx
-	add	%rax, X1
-	adc	$0, w3
-	xor	w0, w0
-	mov	X1, (rp)
-	mulx(	v0, %rax, w1)
-	test	$2, R8(un)
-	jz	L(b11)
-
-L(b01):	mov	16(rp), X1
-	lea	24(rp), rp
-	lea	24(up), up
-	jmp	L(lo1)
-
-L(b11):	lea	8(rp), rp
-	lea	8(up), up
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-L(lo2):	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	lea	32(rp), rp
-	add	w1, X1
-	mov	-16(up), %rdx
-	mov	X1, -24(rp)
-	adc	$0, w3
-	add	w2, X0
-	mov	-8(rp), X1
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo1):	add	%rax, X0
-	mulx(	v1, %rax, w2)
-	adc	$0, w1
-	add	w3, X0
-	mov	X0, -16(rp)
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	add	w0, X1
-	mov	-8(up), %rdx
-	adc	$0, w2
-L(lo0):	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mov	(rp), X0
-	mulx(	v1, %rax, w0)
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	mov	X1, -8(rp)
-	adc	$0, w3
-	mov	(up), %rdx
-	add	w2, X0
-	mulx(	v0, %rax, w1)
-	adc	$0, w0
-L(lo3):	add	%rax, X0
-	adc	$0, w1
-	mulx(	v1, %rax, w2)
-	add	w3, X0
-	mov	8(rp), X1
-	mov	X0, (rp)
-	mov	16(rp), X0
-	adc	$0, w1
-	add	%rax, X1
-	adc	$0, w2
-	mov	8(up), %rdx
-	lea	32(up), up
-	inc	n
-	jnz	L(top)
-
-L(end):	mulx(	v0, %rax, w3)
-	add	w0, X1
-	adc	$0, w2
-L(ex):	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rdx, %rax)
-	add	w1, X1
-	mov	X1, 8(rp)
-	adc	$0, w3
-	add	w2, %rdx
-	adc	$0, %rax
-	add	%rdx, w3
-	mov	w3, 16(rp)
-	adc	$0, %rax
-	mov	%rax, 24(rp)
-
-	jmp	L(outer)		C loop until a small corner remains
-
-L(corner):
-	pop	un
-	mov	(up), %rdx
-	jg	L(small_corner)
-
-	mov	%rdx, v1
-	mov	(rp), X0
-	mov	%rax, X1		C Tricky rax reuse of last iteration
-	mulx(	v0, %rax, w1)
-	add	%rax, X0
-	adc	$0, w1
-	mov	X0, (rp)
-	mov	8(up), %rdx
-	mulx(	v0, %rax, w3)
-	add	%rax, X1
-	adc	$0, w3
-	mulx(	v1, %rdx, %rax)
-	add	w1, X1
-	mov	X1, 8(rp)
-	adc	$0, w3
-	add	w3, %rdx
-	mov	%rdx, 16(rp)
-	adc	$0, %rax
-	mov	%rax, 24(rp)
-	lea	32(rp), rp
-	lea	16(up), up
-	jmp	L(com)
-
-L(small_corner):
-	mulx(	v0, X1, w3)
-	add	%rax, X1		C Tricky rax reuse of last iteration
-	adc	$0, w3
-	mov	X1, (rp)
-	mov	w3, 8(rp)
-	lea	16(rp), rp
-	lea	8(up), up
-
-L(com):
-
-L(sqr_diag_addlsh1):
-	lea	8(up,un,8), up		C put back up at its very beginning
-	lea	(rp,un,8), rp
-	lea	(rp,un,8), rp		C put back rp at its very beginning
-	inc	un
-
-	mov	-8(up), %rdx
-	xor	R32(%rbx), R32(%rbx)	C clear CF as side effect
-	mulx(	%rdx, %rax, %r10)
-	mov	%rax, 8(rp)
-	mov	16(rp), %r8
-	mov	24(rp), %r9
-	jmp	L(dm)
-
-	ALIGN(16)
-L(dtop):mov	32(rp), %r8
-	mov	40(rp), %r9
-	lea	16(rp), rp
-	lea	(%rdx,%rbx), %r10
-L(dm):	adc	%r8, %r8
-	adc	%r9, %r9
-	setc	R8(%rbx)
-	mov	(up), %rdx
-	lea	8(up), up
-	mulx(	%rdx, %rax, %rdx)
-	add	%r10, %r8
-	adc	%rax, %r9
-	mov	%r8, 16(rp)
-	mov	%r9, 24(rp)
-	inc	un
-	jnz	L(dtop)
-
-L(dend):adc	%rbx, %rdx
-	mov	%rdx, 32(rp)
-
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreinhm/aorrlsh_n.asm b/gmp/mpn/x86_64/coreinhm/aorrlsh_n.asm
deleted file mode 100644
index eed64e701e..0000000000
--- a/gmp/mpn/x86_64/coreinhm/aorrlsh_n.asm
+++ /dev/null
@@ -1,200 +0,0 @@
-dnl  AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
-dnl  AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
-dnl  Optimised for Nehalem.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 4.75
-C Intel P4	 ?
-C Intel core2	 2.8-3
-C Intel NHM	 2.8
-C Intel SBR	 3.55
-C Intel atom	 ?
-C VIA nano	 ?
-
-C The inner-loop probably runs close to optimally on Nehalem (using 4-way
-C unrolling).  The rest of the code is quite crude, and could perhaps be made
-C both smaller and faster.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
-define(`cnt',	`%r8')
-define(`cy',	`%r9')			C for _nc variant
-
-ifdef(`OPERATION_addlsh_n', `
-	define(ADDSUB,	add)
-	define(ADCSBB,	adc)
-	define(IFRSB,	)
-	define(func_n,	mpn_addlsh_n)
-	define(func_nc,	mpn_addlsh_nc)')
-ifdef(`OPERATION_rsblsh_n', `
-	define(ADDSUB,	sub)
-	define(ADCSBB,	sbb)
-	define(IFRSB,	`$1')
-	define(func_n,	mpn_rsblsh_n)
-	define(func_nc,	mpn_rsblsh_nc)')
-
-C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
-C refmpn_rsblsh_nc
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')	C cnt
-	push	%rbx
-	xor	R32(%rbx), R32(%rbx)	C clear CF save register
-L(ent):	push	%rbp
-	mov	R32(n), R32(%rbp)
-	mov	n, %rax
-
-	mov	R32(cnt), R32(%rcx)
-	neg	R32(%rcx)
-
-	lea	-8(up,%rax,8), up
-	lea	-8(vp,%rax,8), vp
-	lea	-40(rp,%rax,8), rp
-	neg	%rax
-
-	and	$3, R32(%rbp)
-	jz	L(b0)
-	cmp	$2, R32(%rbp)
-	jc	L(b1)
-	jz	L(b2)
-
-L(b3):	xor	R32(%r9), R32(%r9)
-	mov	8(vp,%rax,8), %r10
-	mov	16(vp,%rax,8), %r11
-	shrd	%cl, %r10, %r9
-	shrd	%cl, %r11, %r10
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	8(up,%rax,8), %r9
-	mov	24(vp,%rax,8), %r8
-	ADCSBB	16(up,%rax,8), %r10
-	sbb	R32(%rbx), R32(%rbx)
-	add	$3, %rax
-	jmp	L(lo3)
-
-L(b0):	mov	8(vp,%rax,8), %r9
-	xor	R32(%r8), R32(%r8)
-	shrd	%cl, %r9, %r8
-	mov	16(vp,%rax,8), %r10
-	mov	24(vp,%rax,8), %r11
-	shrd	%cl, %r10, %r9
-	shrd	%cl, %r11, %r10
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	8(up,%rax,8), %r8
-	mov	%r8, 40(rp,%rax,8)	C offset 40
-	ADCSBB	16(up,%rax,8), %r9
-	mov	32(vp,%rax,8), %r8
-	ADCSBB	24(up,%rax,8), %r10
-	sbb	R32(%rbx), R32(%rbx)
-	add	$4, %rax
-	jmp	L(lo0)
-
-L(b1):	mov	8(vp,%rax,8), %r8
-	add	$1, %rax
-	jz	L(1)
-	mov	8(vp,%rax,8), %r9
-	xor	R32(%rbp), R32(%rbp)
-	jmp	L(lo1)
-L(1):	xor	R32(%r11), R32(%r11)
-	jmp	L(wd1)
-
-L(b2):	xor	%r10, %r10
-	mov	8(vp,%rax,8), %r11
-	shrd	%cl, %r11, %r10
-	add	R32(%rbx), R32(%rbx)
-	mov	16(vp,%rax,8), %r8
-	ADCSBB	8(up,%rax,8), %r10
-	sbb	R32(%rbx), R32(%rbx)
-	add	$2, %rax
-	jz	L(end)
-
-	ALIGN(16)
-L(top):	mov	8(vp,%rax,8), %r9
-	mov	%r11, %rbp
-L(lo2):	mov	%r10, 24(rp,%rax,8)	C offset 24
-L(lo1):	shrd	%cl, %r8, %rbp
-	shrd	%cl, %r9, %r8
-	mov	16(vp,%rax,8), %r10
-	mov	24(vp,%rax,8), %r11
-	shrd	%cl, %r10, %r9
-	shrd	%cl, %r11, %r10
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	(up,%rax,8), %rbp
-	ADCSBB	8(up,%rax,8), %r8
-	mov	%r8, 40(rp,%rax,8)	C offset 40
-	ADCSBB	16(up,%rax,8), %r9
-	mov	32(vp,%rax,8), %r8
-	ADCSBB	24(up,%rax,8), %r10
-	sbb	R32(%rbx), R32(%rbx)
-	add	$4, %rax
-	mov	%rbp, (rp,%rax,8)	C offset 32
-L(lo0):
-L(lo3):	mov	%r9, 16(rp,%rax,8)	C offset 48
-	jnz	L(top)
-
-L(end):	mov	%r10, 24(rp,%rax,8)
-L(wd1):	shrd	%cl, %r8, %r11
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	(up,%rax,8), %r11
-	mov	%r11, 32(rp,%rax,8)	C offset 32
-	adc	R32(%rax), R32(%rax)	C rax is zero after loop
-	shr	R8(%rcx), %r8
-	ADDSUB	%r8, %rax
-IFRSB(	neg	%rax)
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')	C cnt
-IFDOS(`	mov	64(%rsp), %r9	')	C cy
-	push	%rbx
-	neg	cy
-	sbb	R32(%rbx), R32(%rbx)	C initialise CF save register
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreinhm/aorsmul_1.asm b/gmp/mpn/x86_64/coreinhm/aorsmul_1.asm
deleted file mode 100644
index b768905b93..0000000000
--- a/gmp/mpn/x86_64/coreinhm/aorsmul_1.asm
+++ /dev/null
@@ -1,187 +0,0 @@
-dnl  AMD64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Nehalem.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM	 4.55  with minor fluctuations
-C Intel SBR
-C Intel IBR
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimization tool suite written by David Harvey and Torbjorn Granlund.
-
-C N.B.: Be careful if editing, making sure the loop alignment padding does not
-C become large, as we currently fall into it.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',       `%rbx')
-
-ifdef(`OPERATION_addmul_1',`
-  define(`ADDSUB', `add')
-  define(`func',   `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-  define(`ADDSUB', `sub')
-  define(`func',   `mpn_submul_1')
-')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	push	%rbx
-
-	mov	(up), %rax
-	lea	-8(up,n_param,8), up
-	mov	(rp), %r8
-	lea	-8(rp,n_param,8), rp
-
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n_param)
-	jnz	L(b10)
-
-L(b00):	mov	$3, R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	$0, R32(%r11)
-	mov	%r8, %r10
-	ADDSUB	%rax, %r10
-	mov	-8(up,n,8), %rax
-	adc	%rdx, %r11
-	jmp	L(lo0)
-
-L(b10):	mov	$1, R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	%r8, %r10
-	mov	$0, R32(%r11)
-	ADDSUB	%rax, %r10
-	mov	8(up,n,8), %rax
-	adc	%rdx, %r11
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n_param)
-	jz	L(b01)
-
-L(b11):	mov	$2, R32(n)
-	sub	n_param, n
-	mul	v0
-	ADDSUB	%rax, %r8
-	mov	$0, R32(%r9)
-	mov	(up,n,8), %rax
-	adc	%rdx, %r9
-	jmp	L(lo3)
-
-L(b01):	mov	$0, R32(n)
-	sub	n_param, n
-	xor	%r11, %r11
-	add	$4, n
-	jc	L(end)
-
-	ALIGN(32)
-L(top):	mul	v0
-	ADDSUB	%rax, %r8
-	mov	$0, R32(%r9)
-	mov	-16(up,n,8), %rax
-	adc	%rdx, %r9
-L(lo1):	mul	v0
-	ADDSUB	%r11, %r8
-	mov	$0, R32(%r11)
-	mov	-16(rp,n,8), %r10
-	adc	$0, %r9
-	ADDSUB	%rax, %r10
-	mov	-8(up,n,8), %rax
-	adc	%rdx, %r11
-	mov	%r8, -24(rp,n,8)
-	ADDSUB	%r9, %r10
-	adc	$0, %r11
-L(lo0):	mov	-8(rp,n,8), %r8
-	mul	v0
-	ADDSUB	%rax, %r8
-	mov	$0, R32(%r9)
-	mov	(up,n,8), %rax
-	adc	%rdx, %r9
-	mov	%r10, -16(rp,n,8)
-	ADDSUB	%r11, %r8
-	adc	$0, %r9
-L(lo3):	mul	v0
-	mov	(rp,n,8), %r10
-	mov	$0, R32(%r11)
-	ADDSUB	%rax, %r10
-	mov	8(up,n,8), %rax
-	adc	%rdx, %r11
-	mov	%r8, -8(rp,n,8)
-	ADDSUB	%r9, %r10
-	adc	$0, %r11
-L(lo2):	mov	8(rp,n,8), %r8
-	mov	%r10, (rp,n,8)
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mul	v0
-	ADDSUB	%rax, %r8
-	mov	$0, R32(%rax)
-	adc	%rdx, %rax
-	ADDSUB	%r11, %r8
-	adc	$0, %rax
-	mov	%r8, (rp)
-
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/coreinhm/gmp-mparam.h b/gmp/mpn/x86_64/coreinhm/gmp-mparam.h
deleted file mode 100644
index 6a7c03639f..0000000000
--- a/gmp/mpn/x86_64/coreinhm/gmp-mparam.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* Nehalem gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2012, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 2667 MHz Core i7 Nehalem */
-/* FFT tuning limit = 100000000 */
-/* Generated by tuneup.c, 2014-03-18, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        16
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      9
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           16
-
-#define MUL_TOOM22_THRESHOLD                18
-#define MUL_TOOM33_THRESHOLD                60
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               228
-#define MUL_TOOM8H_THRESHOLD               309
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      63
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     104
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     147
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                 93
-#define SQR_TOOM4_THRESHOLD                250
-#define SQR_TOOM6_THRESHOLD                351
-#define SQR_TOOM8_THRESHOLD                454
-
-#define MULMID_TOOM42_THRESHOLD             28
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               15
-
-#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    380, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     21, 8}, {     11, 7}, {     24, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    135,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    167,11}, {     95,10}, \
-    {    191, 9}, {    383,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319,12}, {     95,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543,11}, {    287,10}, {    575,11}, \
-    {    303,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
-    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,11}, {    895,12}, {    479,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    543,11}, {   1087,12}, {    575,11}, {   1151,12}, \
-    {    607,13}, {    319,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    831,13}, {    447,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,11}, {   2175,13}, \
-    {    575,12}, {   1215,11}, {   2431,13}, {    639,12}, \
-    {   1279,13}, {    703,12}, {   1407,14}, {    383,13}, \
-    {    767,12}, {   1535,13}, {    831,12}, {   1663,13}, \
-    {    959,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1215,12}, {   2431,14}, {    639,13}, {   1343,12}, \
-    {   2687,13}, {   1407,12}, {   2815,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1919,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2431,12}, \
-    {   4863,14}, {   1279,13}, {   2687,14}, {   1407,13}, \
-    {   2815,15}, {    767,14}, {   1535,13}, {   3071,14}, \
-    {   1663,13}, {   3455,14}, {   1791,13}, {   3583,14}, \
-    {   1919,16}, {    511,15}, {   1023,14}, {   2431,13}, \
-    {   4863,15}, {   1279,14}, {   2943,13}, {   5887,15}, \
-    {   1535,14}, {   3455,15}, {   1791,14}, {   3839,16}, \
-    {   1023,15}, {   2047,14}, {   4223,15}, {   2303,14}, \
-    {   4863,15}, {   2815,14}, {   5887,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 203
-#define MUL_FFT_THRESHOLD                 4032
-
-#define SQR_FFT_MODF_THRESHOLD             312  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    312, 5}, {     17, 6}, {      9, 5}, {     19, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     83,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    135,11}, {     79,10}, {    159, 9}, \
-    {    319,11}, {     95,10}, {    191, 9}, {    383,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319, 9}, {    639,12}, \
-    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
-    {    207,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511, 9}, {   1023,11}, {    271,10}, {    543,11}, \
-    {    287,10}, {    575,11}, {    303,12}, {    159,11}, \
-    {    319,10}, {    639, 9}, {   1279,11}, {    351,10}, \
-    {    703,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,10}, {    831,12}, {    223,11}, {    447,10}, \
-    {    895,11}, {    479,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,11}, {    959,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    831,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1343,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    959,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
-    {   2431,14}, {    639,13}, {   1279,12}, {   2559,13}, \
-    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1791,12}, \
-    {   3583,13}, {   1919,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2431,12}, {   4863,14}, \
-    {   1279,13}, {   2687,14}, {   1407,13}, {   2815,15}, \
-    {    767,14}, {   1663,13}, {   3455,14}, {   1791,13}, \
-    {   3583,16}, {    511,15}, {   1023,14}, {   2303,13}, \
-    {   4607,14}, {   2431,13}, {   4863,15}, {   1279,14}, \
-    {   2943,13}, {   5887,15}, {   1535,14}, {   3455,15}, \
-    {   1791,14}, {   3839,16}, {   1023,15}, {   2047,14}, \
-    {   4223,15}, {   2303,14}, {   4863,15}, {   2815,14}, \
-    {   5887,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 217
-#define SQR_FFT_THRESHOLD                 2752
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  45
-#define MULLO_MUL_N_THRESHOLD             8397
-
-#define DC_DIV_QR_THRESHOLD                 46
-#define DC_DIVAPPR_Q_THRESHOLD             135
-#define DC_BDIV_QR_THRESHOLD                38
-#define DC_BDIV_Q_THRESHOLD                 31
-
-#define INV_MULMOD_BNM1_THRESHOLD           34
-#define INV_NEWTON_THRESHOLD               212
-#define INV_APPR_THRESHOLD                 155
-
-#define BINV_NEWTON_THRESHOLD              254
-#define REDC_1_TO_REDC_2_THRESHOLD          32
-#define REDC_2_TO_REDC_N_THRESHOLD          50
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1360
-#define MUPI_DIV_QR_THRESHOLD               85
-#define MU_BDIV_QR_THRESHOLD              1142
-#define MU_BDIV_Q_THRESHOLD               1210
-
-#define POWM_SEC_TABLE  3,46,194,494,1678
-
-#define MATRIX22_STRASSEN_THRESHOLD         21
-#define HGCD_THRESHOLD                     141
-#define HGCD_APPR_THRESHOLD                175
-#define HGCD_REDUCE_THRESHOLD             2205
-#define GCD_DC_THRESHOLD                   330
-#define GCDEXT_DC_THRESHOLD                361
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                15
-#define GET_STR_PRECOMPUTE_THRESHOLD        21
-#define SET_STR_DC_THRESHOLD               517
-#define SET_STR_PRECOMPUTE_THRESHOLD      1430
-
-#define FAC_DSC_THRESHOLD                  351
-#define FAC_ODD_THRESHOLD                   43
diff --git a/gmp/mpn/x86_64/coreinhm/hamdist.asm b/gmp/mpn/x86_64/coreinhm/hamdist.asm
deleted file mode 100644
index 93e1e5632b..0000000000
--- a/gmp/mpn/x86_64/coreinhm/hamdist.asm
+++ /dev/null
@@ -1,38 +0,0 @@
-dnl  AMD64 mpn_hamdist -- hamming distance.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_hamdist)
-include_mpn(`x86_64/k10/hamdist.asm')
diff --git a/gmp/mpn/x86_64/coreinhm/popcount.asm b/gmp/mpn/x86_64/coreinhm/popcount.asm
deleted file mode 100644
index 8f22a715b6..0000000000
--- a/gmp/mpn/x86_64/coreinhm/popcount.asm
+++ /dev/null
@@ -1,38 +0,0 @@
-dnl  AMD64 mpn_popcount -- population count.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_popcount)
-include_mpn(`x86_64/k10/popcount.asm')
diff --git a/gmp/mpn/x86_64/coreinhm/redc_1.asm b/gmp/mpn/x86_64/coreinhm/redc_1.asm
deleted file mode 100644
index 4d9261d8f9..0000000000
--- a/gmp/mpn/x86_64/coreinhm/redc_1.asm
+++ /dev/null
@@ -1,544 +0,0 @@
-dnl  X86-64 mpn_redc_1 optimised for Intel Nehalem and Westmere.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bull	 ?
-C AMD pile	 ?
-C AMD steam	 ?
-C AMD bobcat	 ?
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel IBR	 ?
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Micro-optimise, none performed thus far.
-C  * Consider inlining mpn_add_n.
-C  * Single basecases out before the pushes.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv',       `%r8')    C stack
-
-define(`i',           `%r14')
-define(`j',           `%r15')
-define(`mp',          `%r12')
-define(`q0',          `%r13')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), q0
-	mov	n, j			C outer loop induction var
-	lea	(mp_param,n,8), mp
-	lea	(up,n,8), up
-	neg	n
-	imul	u0inv, q0		C first iteration q0
-
-	test	$1, R8(n)
-	jz	L(bx0)
-
-L(bx1):	test	$2, R8(n)
-	jz	L(b3)
-
-L(b1):	cmp	$-1, R32(n)
-	jz	L(n1)
-
-L(otp1):lea	3(n), i
-	mov	(mp,n,8), %rax
-	mov	(up,n,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	8(mp,n,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	mov	$0, R32(%r11)
-	mov	8(up,n,8), %rbx
-	add	%rax, %rbx
-	mov	16(mp,n,8), %rax
-	adc	%rdx, %r11
-	add	%r9, %rbx
-	adc	$0, %r11
-	mov	16(up,n,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	24(mp,n,8), %rax
-	adc	%rdx, %r9
-	mov	%rbx, 8(up,n,8)
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e1)
-
-	ALIGNx
-L(tp1):	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	-16(mp,i,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	%r11, %rbp
-	mov	$0, R32(%r11)
-	mov	-16(up,i,8), %r10
-	adc	$0, %r9
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -24(up,i,8)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	-8(up,i,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r9
-	mov	%r10, -16(up,i,8)
-L(e1):	add	%r11, %rbp
-	adc	$0, %r9
-	mul	q0
-	mov	(up,i,8), %r10
-	mov	$0, R32(%r11)
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -8(up,i,8)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	8(up,i,8), %rbp
-	mov	%r10, (up,i,8)
-	add	$4, i
-	jnc	L(tp1)
-
-L(ed1):	mul	q0
-	add	%rax, %rbp
-	adc	$0, %rdx
-	add	%r11, %rbp
-	adc	$0, %rdx
-	mov	%rbp, I(-8(up),-24(up,i,8))
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp1)
-	jmp	L(cj)
-
-L(b3):	cmp	$-3, R32(n)
-	jz	L(n3)
-
-L(otp3):lea	5(n), i
-	mov	(mp,n,8), %rax
-	mov	(up,n,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	8(mp,n,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	mov	8(up,n,8), %rbx
-	mov	$0, R32(%r11)
-	add	%rax, %rbx
-	mov	16(mp,n,8), %rax
-	adc	%rdx, %r11
-	add	%r9, %rbx
-	adc	$0, %r11
-	mov	16(up,n,8), %rbp
-	mov	%rbx, 8(up,n,8)
-	imul	u0inv, %rbx		C next q limb
-C	jmp	L(tp3)
-
-	ALIGNx
-L(tp3):	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	-16(mp,i,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	%r11, %rbp
-	mov	$0, R32(%r11)
-	mov	-16(up,i,8), %r10
-	adc	$0, %r9
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -24(up,i,8)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	-8(up,i,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r9
-	mov	%r10, -16(up,i,8)
-	add	%r11, %rbp
-	adc	$0, %r9
-	mul	q0
-	mov	(up,i,8), %r10
-	mov	$0, R32(%r11)
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -8(up,i,8)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	8(up,i,8), %rbp
-	mov	%r10, (up,i,8)
-	add	$4, i
-	jnc	L(tp3)
-
-L(ed3):	mul	q0
-	add	%rax, %rbp
-	adc	$0, %rdx
-	add	%r11, %rbp
-	adc	$0, %rdx
-	mov	%rbp, I(-8(up),-24(up,i,8))
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp3)
-C	jmp	L(cj)
-
-L(cj):
-IFSTD(`	lea	(up,n,8), up		C param 2: up
-	lea	(up,n,8), %rdx		C param 3: up - n
-	neg	R32(n)		')	C param 4: n
-
-IFDOS(`	lea	(up,n,8), %rdx		C param 2: up
-	lea	(%rdx,n,8), %r8		C param 3: up - n
-	neg	R32(n)
-	mov	n, %r9			C param 4: n
-	mov	rp, %rcx	')	C param 1: rp
-
-	CALL(	mpn_add_n)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b2)
-
-L(b0):
-L(otp0):lea	2(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	$0, R32(%r11)
-	mov	(up,n,8), %r10
-	add	%rax, %r10
-	mov	8(mp,n,8), %rax
-	adc	%rdx, %r11
-	mov	8(up,n,8), %rbx
-	mul	q0
-	add	%rax, %rbx
-	mov	$0, R32(%r9)
-	mov	16(mp,n,8), %rax
-	adc	%rdx, %r9
-	add	%r11, %rbx
-	adc	$0, %r9
-	mul	q0
-	mov	16(up,n,8), %r10
-	mov	$0, R32(%r11)
-	add	%rax, %r10
-	mov	24(mp,n,8), %rax
-	adc	%rdx, %r11
-	mov	%rbx, 8(up,n,8)
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e0)
-
-	ALIGNx
-L(tp0):	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	-16(mp,i,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	%r11, %rbp
-	mov	$0, R32(%r11)
-	mov	-16(up,i,8), %r10
-	adc	$0, %r9
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -24(up,i,8)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	-8(up,i,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r9
-	mov	%r10, -16(up,i,8)
-	add	%r11, %rbp
-	adc	$0, %r9
-	mul	q0
-	mov	(up,i,8), %r10
-	mov	$0, R32(%r11)
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -8(up,i,8)
-L(e0):	add	%r9, %r10
-	adc	$0, %r11
-	mov	8(up,i,8), %rbp
-	mov	%r10, (up,i,8)
-	add	$4, i
-	jnc	L(tp0)
-
-L(ed0):	mul	q0
-	add	%rax, %rbp
-	adc	$0, %rdx
-	add	%r11, %rbp
-	adc	$0, %rdx
-	mov	%rbp, I(-8(up),-24(up,i,8))
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp0)
-	jmp	L(cj)
-
-L(b2):	cmp	$-2, R32(n)
-	jz	L(n2)
-
-L(otp2):lea	4(n), i
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	(up,n,8), %r10
-	mov	$0, R32(%r11)
-	add	%rax, %r10
-	mov	8(mp,n,8), %rax
-	adc	%rdx, %r11
-	mov	8(up,n,8), %rbx
-	mul	q0
-	add	%rax, %rbx
-	mov	$0, R32(%r9)
-	mov	16(mp,n,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	%r11, %rbx
-	mov	$0, R32(%r11)
-	mov	16(up,n,8), %r10
-	adc	$0, %r9
-	add	%rax, %r10
-	mov	24(mp,n,8), %rax
-	adc	%rdx, %r11
-	mov	%rbx, 8(up,n,8)
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e2)
-
-	ALIGNx
-L(tp2):	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	-16(mp,i,8), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	%r11, %rbp
-	mov	$0, R32(%r11)
-	mov	-16(up,i,8), %r10
-	adc	$0, %r9
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -24(up,i,8)
-L(e2):	add	%r9, %r10
-	adc	$0, %r11
-	mov	-8(up,i,8), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	$0, R32(%r9)
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r9
-	mov	%r10, -16(up,i,8)
-	add	%r11, %rbp
-	adc	$0, %r9
-	mul	q0
-	mov	(up,i,8), %r10
-	mov	$0, R32(%r11)
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	%rdx, %r11
-	mov	%rbp, -8(up,i,8)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	8(up,i,8), %rbp
-	mov	%r10, (up,i,8)
-	add	$4, i
-	jnc	L(tp2)
-
-L(ed2):	mul	q0
-	add	%rax, %rbp
-	adc	$0, %rdx
-	add	%r11, %rbp
-	adc	$0, %rdx
-	mov	%rbp, I(-8(up),-24(up,i,8))
-	mov	%rdx, (up,n,8)		C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp2)
-	jmp	L(cj)
-
-L(n1):	mov	(mp_param), %rax
-	mul	q0
-	add	-8(up), %rax
-	adc	(up), %rdx
-	mov	%rdx, (rp)
-	mov	$0, R32(%rax)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-L(n2):	mov	(mp_param), %rax
-	mov	-16(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	-8(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, q0
-	imul	u0inv, q0		C next q0
-	mov	-16(mp), %rax
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	mov	(up), %r14
-	mul	q0
-	add	%rax, %r14
-	adc	$0, %rdx
-	add	%r9, %r14
-	adc	$0, %rdx
-	xor	R32(%rax), R32(%rax)
-	add	%r11, %r14
-	adc	8(up), %rdx
-	mov	%r14, (rp)
-	mov	%rdx, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-	ALIGNx
-L(n3):	mov	-24(mp), %rax
-	mov	-24(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	-16(mp), %rax
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	-16(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-8(mp), %rax
-	add	%r11, %rbp
-	mov	-8(up), %r10
-	adc	$0, %r9
-	mul	q0
-	mov	%rbp, q0
-	imul	u0inv, q0		C next q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	%rbp, -16(up)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, -8(up)
-	mov	%r11, -24(up)		C up[0]
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(n3)
-
-	mov	-48(up), %rdx
-	mov	-40(up), %rbx
-	xor	R32(%rax), R32(%rax)
-	add	%rbp, %rdx
-	adc	%r10, %rbx
-	adc	-8(up), %r11
-	mov	%rdx, (rp)
-	mov	%rbx, 8(rp)
-	mov	%r11, 16(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/coreinhm/sec_tabselect.asm b/gmp/mpn/x86_64/coreinhm/sec_tabselect.asm
deleted file mode 100644
index e4360341d9..0000000000
--- a/gmp/mpn/x86_64/coreinhm/sec_tabselect.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_sec_tabselect.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sec_tabselect)
-include_mpn(`x86_64/fastsse/sec_tabselect.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/addmul_2.asm b/gmp/mpn/x86_64/coreisbr/addmul_2.asm
deleted file mode 100644
index 21f0bf465f..0000000000
--- a/gmp/mpn/x86_64/coreisbr/addmul_2.asm
+++ /dev/null
@@ -1,224 +0,0 @@
-dnl  AMD64 mpn_addmul_2 optimised for Intel Sandy Bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR	 2.93		this
-C Intel IBR	 2.66		this
-C Intel HWL	 2.5		 2.15
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C This code is the result of running a code generation and optimisation tool
-C suite written by David Harvey and Torbjorn Granlund.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vp',      `%rcx')   C r9
-
-define(`n',	  `%rcx')
-define(`v0',      `%rbx')
-define(`v1',      `%rbp')
-define(`w0',      `%r8')
-define(`w1',      `%r9')
-define(`w2',      `%r10')
-define(`w3',      `%r11')
-define(`X0',      `%r12')
-define(`X1',      `%r13')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_addmul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	mov	(up), %rax
-
-	mov	n_param, n
-	neg	n
-
-	lea	(up,n_param,8), up
-	lea	8(rp,n_param,8), rp
-	mul	v0
-
-	test	$1, R8(n)
-	jnz	L(bx1)
-
-L(bx0):	mov	-8(rp,n,8), X0
-	mov	%rdx, w1
-	add	%rax, X0
-	adc	$0, w1
-	mov	(up,n,8), %rax
-	xor	w0, w0
-	xor	w3, w3
-	test	$2, R8(n)
-	jnz	L(b10)
-
-L(b00):	nop				C this nop make loop go faster on SBR!
-	mul	v1
-	mov	(rp,n,8), X1
-	jmp	L(lo0)
-
-L(b10):	lea	-2(n), n
-	jmp	L(lo2)
-
-L(bx1):	mov	-8(rp,n,8), X1
-	mov	%rdx, w3
-	add	%rax, X1
-	adc	$0, w3
-	mov	(up,n,8), %rax
-	xor	w1, w1
-	xor	w2, w2
-	test	$2, R8(n)
-	jz	L(b11)
-
-L(b01):	mov	(rp,n,8), X0
-	inc	n
-	jmp	L(lo1)
-
-L(b11):	dec	n
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):
-L(lo1):	mul	v1
-	mov	%rdx, w0		C 1
-	add	%rax, X0		C 0
-	adc	$0, w0			C 1
-	add	w1, X1			C 3
-	adc	$0, w3			C 0
-	add	w2, X0			C 0
-	adc	$0, w0			C 1
-	mov	(up,n,8), %rax
-	mul	v0
-	add	%rax, X0		C 0
-	mov	%rdx, w1		C 1
-	adc	$0, w1			C 1
-	mov	(up,n,8), %rax
-	mul	v1
-	mov	X1, -16(rp,n,8)		C 3
-	mov	(rp,n,8), X1		C 1
-	add	w3, X0			C 0
-	adc	$0, w1			C 1
-L(lo0):	mov	%rdx, w2		C 2
-	mov	X0, -8(rp,n,8)		C 0
-	add	%rax, X1		C 1
-	adc	$0, w2			C 2
-	mov	8(up,n,8), %rax
-	add	w0, X1			C 1
-	adc	$0, w2			C 2
-	mul	v0
-	add	%rax, X1		C 1
-	mov	%rdx, w3		C 2
-	adc	$0, w3			C 2
-	mov	8(up,n,8), %rax
-L(lo3):	mul	v1
-	add	w1, X1			C 1
-	mov	8(rp,n,8), X0		C 2
-	adc	$0, w3			C 2
-	mov	%rdx, w0		C 3
-	add	%rax, X0		C 2
-	adc	$0, w0			C 3
-	mov	16(up,n,8), %rax
-	mul	v0
-	add	w2, X0			C 2
-	mov	X1, (rp,n,8)		C 1
-	mov	%rdx, w1		C 3
-	adc	$0, w0			C 3
-	add	%rax, X0		C 2
-	adc	$0, w1			C 3
-	mov	16(up,n,8), %rax
-	add	w3, X0			C 2
-	adc	$0, w1			C 3
-L(lo2):	mul	v1
-	mov	16(rp,n,8), X1		C 3
-	add	%rax, X1		C 3
-	mov	%rdx, w2		C 4
-	adc	$0, w2			C 4
-	mov	24(up,n,8), %rax
-	mov	X0, 8(rp,n,8)		C 2
-	mul	v0
-	add	w0, X1			C 3
-	mov	%rdx, w3		C 4
-	adc	$0, w2			C 4
-	add	%rax, X1		C 3
-	mov	24(up,n,8), %rax
-	mov	24(rp,n,8), X0		C 0	useless but harmless final read
-	adc	$0, w3			C 4
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mul	v1
-	add	w1, X1
-	adc	$0, w3
-	add	w2, %rax
-	adc	$0, %rdx
-	mov	X1, I(-16(rp),-16(rp,n,8))
-	add	w3, %rax
-	adc	$0, %rdx
-	mov	%rax, I(-8(rp),-8(rp,n,8))
-	mov	%rdx, %rax
-
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/aorrlsh1_n.asm b/gmp/mpn/x86_64/coreisbr/aorrlsh1_n.asm
deleted file mode 100644
index 2319a80060..0000000000
--- a/gmp/mpn/x86_64/coreisbr/aorrlsh1_n.asm
+++ /dev/null
@@ -1,54 +0,0 @@
-dnl  AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-dnl  AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-define(RSH, 63)
-
-ifdef(`OPERATION_addlsh1_n', `
-	define(ADDSUB,	add)
-	define(ADCSBB,	adc)
-	define(func_n,	mpn_addlsh1_n)
-	define(func_nc,	mpn_addlsh1_nc)')
-ifdef(`OPERATION_rsblsh1_n', `
-	define(ADDSUB,	sub)
-	define(ADCSBB,	sbb)
-	define(func_n,	mpn_rsblsh1_n)
-	define(func_nc,	mpn_rsblsh1_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
-include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/aorrlsh2_n.asm b/gmp/mpn/x86_64/coreisbr/aorrlsh2_n.asm
deleted file mode 100644
index 9416d5a164..0000000000
--- a/gmp/mpn/x86_64/coreisbr/aorrlsh2_n.asm
+++ /dev/null
@@ -1,56 +0,0 @@
-dnl  AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 1)
-dnl  AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 1) - up[]
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 62)
-
-ifdef(`OPERATION_addlsh2_n', `
-	define(ADDSUB,	add)
-	define(ADCSBB,	adc)
-	define(func_n,	mpn_addlsh2_n)
-	define(func_nc,	mpn_addlsh2_nc)')
-ifdef(`OPERATION_rsblsh2_n', `
-	define(ADDSUB,	sub)
-	define(ADCSBB,	sbb)
-	define(func_n,	mpn_rsblsh2_n)
-	define(func_nc,	mpn_rsblsh2_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-C mpn_rsblsh2_nc removed below, its idea of carry-in is inconsistent with
-C refmpn_rsblsh2_nc
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n)
-include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/aorrlshC_n.asm b/gmp/mpn/x86_64/coreisbr/aorrlshC_n.asm
deleted file mode 100644
index 23ace41889..0000000000
--- a/gmp/mpn/x86_64/coreisbr/aorrlshC_n.asm
+++ /dev/null
@@ -1,173 +0,0 @@
-dnl  AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
-dnl  AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
-
-dnl  Copyright 2009-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 ?
-C Intel core2	 3.25
-C Intel NHM	 4
-C Intel SBR	 2  C (or 1.95 when L(top)'s alignment = 16 (mod 32))
-C Intel atom	 ?
-C VIA nano	 ?
-
-C This code probably runs close to optimally on Sandy Bridge (using 4-way
-C unrolling).  It also runs reasonably well on Core 2, but it runs poorly on
-C all other processors, including Nehalem.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
-define(`cy',	`%r8')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbp
-	mov	cy, %rax
-	neg	%rax			C set msb on carry
-	xor	R32(%rbp), R32(%rbp)	C limb carry
-	mov	(vp), %r8
-	shrd	$RSH, %r8, %rbp
-	mov	R32(n), R32(%r9)
-	and	$3, R32(%r9)
-	je	L(b00)
-	cmp	$2, R32(%r9)
-	jc	L(b01)
-	je	L(b10)
-	jmp	L(b11)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbp
-	xor	R32(%rbp), R32(%rbp)	C limb carry
-	mov	(vp), %r8
-	shrd	$RSH, %r8, %rbp
-	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	je	L(b00)
-	cmp	$2, R32(%rax)
-	jc	L(b01)
-	je	L(b10)
-
-L(b11):	mov	8(vp), %r9
-	shrd	$RSH, %r9, %r8
-	mov	16(vp), %r10
-	shrd	$RSH, %r10, %r9
-	add	R32(%rax), R32(%rax)	C init carry flag
-	ADCSBB	(up), %rbp
-	ADCSBB	8(up), %r8
-	ADCSBB	16(up), %r9
-	mov	%rbp, (rp)
-	mov	%r8, 8(rp)
-	mov	%r9, 16(rp)
-	mov	%r10, %rbp
-	lea	24(up), up
-	lea	24(vp), vp
-	lea	24(rp), rp
-	sbb	R32(%rax), R32(%rax)	C save carry flag
-	sub	$3, n
-	ja	L(top)
-	jmp	L(end)
-
-L(b01):	add	R32(%rax), R32(%rax)	C init carry flag
-	ADCSBB	(up), %rbp
-	mov	%rbp, (rp)
-	mov	%r8, %rbp
-	lea	8(up), up
-	lea	8(vp), vp
-	lea	8(rp), rp
-	sbb	R32(%rax), R32(%rax)	C save carry flag
-	sub	$1, n
-	ja	L(top)
-	jmp	L(end)
-
-L(b10):	mov	8(vp), %r9
-	shrd	$RSH, %r9, %r8
-	add	R32(%rax), R32(%rax)	C init carry flag
-	ADCSBB	(up), %rbp
-	ADCSBB	8(up), %r8
-	mov	%rbp, (rp)
-	mov	%r8, 8(rp)
-	mov	%r9, %rbp
-	lea	16(up), up
-	lea	16(vp), vp
-	lea	16(rp), rp
-	sbb	R32(%rax), R32(%rax)	C save carry flag
-	sub	$2, n
-	ja	L(top)
-	jmp	L(end)
-
-	ALIGN(16)
-L(top):	mov	(vp), %r8
-	shrd	$RSH, %r8, %rbp
-L(b00):	mov	8(vp), %r9
-	shrd	$RSH, %r9, %r8
-	mov	16(vp), %r10
-	shrd	$RSH, %r10, %r9
-	mov	24(vp), %r11
-	shrd	$RSH, %r11, %r10
-	lea	32(vp), vp
-	add	R32(%rax), R32(%rax)	C restore carry flag
-	ADCSBB	(up), %rbp
-	ADCSBB	8(up), %r8
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	lea	32(up), up
-	mov	%rbp, (rp)
-	mov	%r8, 8(rp)
-	mov	%r9, 16(rp)
-	mov	%r10, 24(rp)
-	mov	%r11, %rbp
-	lea	32(rp), rp
-	sbb	R32(%rax), R32(%rax)	C save carry flag
-	sub	$4, n
-	jnz	L(top)
-
-L(end):	shr	$RSH, %rbp
-	add	R32(%rax), R32(%rax)	C restore carry flag
-	ADCSBB	$0, %rbp
-	mov	%rbp, %rax
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/aorrlsh_n.asm b/gmp/mpn/x86_64/coreisbr/aorrlsh_n.asm
deleted file mode 100644
index db8ee68849..0000000000
--- a/gmp/mpn/x86_64/coreisbr/aorrlsh_n.asm
+++ /dev/null
@@ -1,215 +0,0 @@
-dnl  AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
-dnl  AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
-dnl  Optimised for Sandy Bridge.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 5.25
-C Intel P4	 ?
-C Intel core2	 3.1
-C Intel NHM	 3.95
-C Intel SBR	 2.75
-C Intel atom	 ?
-C VIA nano	 ?
-
-C The inner-loop probably runs close to optimally on Sandy Bridge (using 4-way
-C unrolling).  The rest of the code is quite crude, and could perhaps be made
-C both smaller and faster.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
-define(`cnt',	`%r8')
-define(`cy',	`%r9')			C for _nc variant
-
-ifdef(`OPERATION_addlsh_n', `
-	define(ADDSUB,	add)
-	define(ADCSBB,	adc)
-	define(IFRSB,	)
-	define(func_n,	mpn_addlsh_n)
-	define(func_nc,	mpn_addlsh_nc)')
-ifdef(`OPERATION_rsblsh_n', `
-	define(ADDSUB,	sub)
-	define(ADCSBB,	sbb)
-	define(IFRSB,	`$1')
-	define(func_n,	mpn_rsblsh_n)
-	define(func_nc,	mpn_rsblsh_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
-C refmpn_rsblsh_nc
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')	C cnt
-	push	%rbx
-	xor	R32(%rbx), R32(%rbx)	C clear CF save register
-L(ent):	push	%rbp
-	mov	R32(n), R32(%rbp)
-	mov	n, %rax
-	mov	R32(cnt), R32(%rcx)
-	neg	R32(%rcx)
-	and	$3, R32(%rbp)
-	jz	L(b0)
-	lea	-32(vp,%rbp,8), vp
-	lea	-32(up,%rbp,8), up
-	lea	-32(rp,%rbp,8), rp
-	cmp	$2, R32(%rbp)
-	jc	L(b1)
-	jz	L(b2)
-
-L(b3):	xor	%r8, %r8
-	mov	8(vp), %r9
-	mov	16(vp), %r10
-	shrd	R8(%rcx), %r9, %r8
-	shrd	R8(%rcx), %r10, %r9
-	mov	24(vp), %r11
-	shrd	R8(%rcx), %r11, %r10
-	sub	$3, %rax
-	jz	L(3)
-	add	R32(%rbx), R32(%rbx)
-	lea	32(vp), vp
-	ADCSBB	8(up), %r8
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	lea	32(up), up
-	jmp	L(lo3)
-L(3):	add	R32(%rbx), R32(%rbx)
-	lea	32(vp), vp
-	ADCSBB	8(up), %r8
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	jmp	L(wd3)
-
-L(b0):	mov	(vp), %r8
-	mov	8(vp), %r9
-	xor	R32(%rbp), R32(%rbp)
-	jmp	L(lo0)
-
-L(b1):	xor	%r10, %r10
-	mov	24(vp), %r11
-	shrd	R8(%rcx), %r11, %r10
-	sub	$1, %rax
-	jz	L(1)
-	add	R32(%rbx), R32(%rbx)
-	lea	32(vp), vp
-	ADCSBB	24(up), %r10
-	lea	32(up), up
-	mov	(vp), %r8
-	jmp	L(lo1)
-L(1):	add	R32(%rbx), R32(%rbx)
-	ADCSBB	24(up), %r10
-	jmp	L(wd1)
-
-L(b2):	xor	%r9, %r9
-	mov	16(vp), %r10
-	shrd	R8(%rcx), %r10, %r9
-	mov	24(vp), %r11
-	shrd	R8(%rcx), %r11, %r10
-	sub	$2, %rax
-	jz	L(2)
-	add	R32(%rbx), R32(%rbx)
-	lea	32(vp), vp
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	lea	32(up), up
-	jmp	L(lo2)
-L(2):	add	R32(%rbx), R32(%rbx)
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	jmp	L(wd2)
-
-	ALIGN(32)			C 16-byte alignment is not enough!
-L(top):	shrd	R8(%rcx), %r11, %r10
-	add	R32(%rbx), R32(%rbx)
-	lea	32(vp), vp
-	ADCSBB	(up), %rbp
-	ADCSBB	8(up), %r8
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	mov	%rbp, (rp)
-	lea	32(up), up
-L(lo3):	mov	%r8, 8(rp)
-L(lo2):	mov	%r9, 16(rp)
-	mov	(vp), %r8
-L(lo1):	mov	%r10, 24(rp)
-	mov	8(vp), %r9
-	mov	%r11, %rbp
-	lea	32(rp), rp
-	sbb	R32(%rbx), R32(%rbx)
-L(lo0):	shrd	R8(%rcx), %r8, %rbp
-	mov	16(vp), %r10
-	shrd	R8(%rcx), %r9, %r8
-	shrd	R8(%rcx), %r10, %r9
-	mov	24(vp), %r11
-	sub	$4, %rax
-	jg	L(top)
-
-	shrd	R8(%rcx), %r11, %r10
-	add	R32(%rbx), R32(%rbx)
-	ADCSBB	(up), %rbp
-	ADCSBB	8(up), %r8
-	ADCSBB	16(up), %r9
-	ADCSBB	24(up), %r10
-	mov	%rbp, (rp)
-L(wd3):	mov	%r8, 8(rp)
-L(wd2):	mov	%r9, 16(rp)
-L(wd1):	mov	%r10, 24(rp)
-	adc	R32(%rax), R32(%rax)	C rax is zero after loop
-	shr	R8(%rcx), %r11
-	ADDSUB	%r11, %rax
-IFRSB(	neg	%rax)
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')	C cnt
-IFDOS(`	mov	64(%rsp), %r9	')	C cy
-	push	%rbx
-	neg	cy
-	sbb	R32(%rbx), R32(%rbx)	C initialise CF save register
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/aors_n.asm b/gmp/mpn/x86_64/coreisbr/aors_n.asm
deleted file mode 100644
index 01abf78a0d..0000000000
--- a/gmp/mpn/x86_64/coreisbr/aors_n.asm
+++ /dev/null
@@ -1,198 +0,0 @@
-dnl  AMD64 mpn_add_n, mpn_sub_n optimised for Sandy bridge, Ivy bridge, and
-dnl  Haswell.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2010-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bull	 1.82		average over 400-600
-C AMD pile	 1.83		average over 400-600
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR	 1.55		fluctuates
-C Intel IBR	 1.55		fluctuates
-C Intel HWL	 1.33		fluctuates
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The loop of this code was manually written.  It runs close to optimally on
-C Intel SBR, IBR, and HWL far as we know, except for the fluctuation problems.
-C It also runs slightly faster on average on AMD bull and pile.
-C
-C No micro-optimisation has been done.
-C
-C N.B.!  The loop alignment padding insns are executed.  If editing the code,
-C make sure the padding does not become excessive.  It is now a 4-byte nop.
-
-define(`rp',	`%rdi')	C rcx
-define(`up',	`%rsi')	C rdx
-define(`vp',	`%rdx')	C r8
-define(`n',	`%rcx')	C r9
-define(`cy',	`%r8')	C rsp+40    (mpn_add_nc and mpn_sub_nc)
-
-ifdef(`OPERATION_add_n', `
-  define(ADCSBB,    adc)
-  define(func,      mpn_add_n)
-  define(func_nc,   mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
-  define(ADCSBB,    sbb)
-  define(func,      mpn_sub_n)
-  define(func_nc,   mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	xor	%r8, %r8
-
-L(ent):	mov	R32(n), R32(%rax)
-	shr	$2, n
-
-	test	$1, R8(%rax)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(%rax)
-	jnz	L(b10)
-
-L(b00):	neg	%r8
-	mov	(up), %r8
-	mov	8(up), %r9
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-	mov	16(up), %r10
-	mov	24(up), %r11
-	lea	32(up), up
-	ADCSBB	16(vp), %r10
-	ADCSBB	24(vp), %r11
-	lea	32(vp), vp
-	lea	-16(rp), rp
-	jmp	L(lo0)
-
-L(b10):	neg	%r8
-	mov	(up), %r10
-	mov	8(up), %r11
-	ADCSBB	0(vp), %r10
-	ADCSBB	8(vp), %r11
-	jrcxz	L(e2)
-	mov	16(up), %r8
-	mov	24(up), %r9
-	lea	16(up), up
-	ADCSBB	16(vp), %r8
-	ADCSBB	24(vp), %r9
-	lea	16(vp), vp
-	lea	(rp), rp
-	jmp	L(lo2)
-
-L(e2):	mov	%r10, (rp)
-	mov	%r11, 8(rp)
-	setc	R8(%rax)
-	FUNC_EXIT()
-	ret
-
-L(bx1):	test	$2, R8(%rax)
-	jnz	L(b11)
-
-L(b01):	neg	%r8
-	mov	(up), %r11
-	ADCSBB	(vp), %r11
-	jrcxz	L(e1)
-	mov	8(up), %r8
-	mov	16(up), %r9
-	lea	8(up), up
-	lea	-8(rp), rp
-	ADCSBB	8(vp), %r8
-	ADCSBB	16(vp), %r9
-	lea	8(vp), vp
-	jmp	L(lo1)
-
-L(e1):	mov	%r11, (rp)
-	setc	R8(%rax)
-	FUNC_EXIT()
-	ret
-
-L(b11):	neg	%r8
-	mov	(up), %r9
-	ADCSBB	(vp), %r9
-	mov	8(up), %r10
-	mov	16(up), %r11
-	lea	24(up), up
-	ADCSBB	8(vp), %r10
-	ADCSBB	16(vp), %r11
-	lea	24(vp), vp
-	mov	%r9, (rp)
-	lea	8(rp), rp
-	jrcxz	L(end)
-
-	ALIGN(32)
-L(top):	mov	(up), %r8
-	mov	8(up), %r9
-	ADCSBB	(vp), %r8
-	ADCSBB	8(vp), %r9
-L(lo2):	mov	%r10, (rp)
-L(lo1):	mov	%r11, 8(rp)
-	mov	16(up), %r10
-	mov	24(up), %r11
-	lea	32(up), up
-	ADCSBB	16(vp), %r10
-	ADCSBB	24(vp), %r11
-	lea	32(vp), vp
-L(lo0):	mov	%r8, 16(rp)
-L(lo3):	mov	%r9, 24(rp)
-	lea	32(rp), rp
-	dec	n
-	jnz	L(top)
-
-L(end):	mov	R32(n), R32(%rax)	C zero rax
-	mov	%r10, (rp)
-	mov	%r11, 8(rp)
-	setc	R8(%rax)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-	ALIGN(16)
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	jmp	L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/aorsmul_1.asm b/gmp/mpn/x86_64/coreisbr/aorsmul_1.asm
deleted file mode 100644
index 9f01d9c061..0000000000
--- a/gmp/mpn/x86_64/coreisbr/aorsmul_1.asm
+++ /dev/null
@@ -1,209 +0,0 @@
-dnl  X86-64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Sandy Bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR	 3.24 (average, fluctuating in 3.20-3.57)
-C Intel IBR	 3.04
-C Intel HWL
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimization tool suite written by David Harvey and Torbjörn Granlund.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',       `%rbx')
-
-define(`I',`$1')
-
-ifdef(`OPERATION_addmul_1',`
-      define(`ADDSUB',        `add')
-      define(`func',  `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-      define(`ADDSUB',        `sub')
-      define(`func',  `mpn_submul_1')
-')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-IFDOS(`	define(`up',     ``%rsi'')') dnl
-IFDOS(`	define(`rp',     ``%rcx'')') dnl
-IFDOS(`	define(`v0',     ``%r9'')') dnl
-IFDOS(`	define(`r9',     ``rdi'')') dnl
-IFDOS(`	define(`n_param',``%r8'')') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(func)
-
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
-	mov	(up), %rax
-	push	%rbx
-	lea	(up,n_param,8), up
-	lea	(rp,n_param,8), rp
-
-	test	$1, R8(n_param)
-	jnz	L(b13)
-
-L(b02):	xor	R32(%r11), R32(%r11)
-	test	$2, R8(n_param)
-	jnz	L(b2)
-
-L(b0):	mov	$1, R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	%rdx, %r9
-	mov	-8(rp,n,8), %r8
-	jmp	L(e0)
-
-	ALIGN(16)
-L(b2):	mov	$-1, n
-	sub	n_param, n
-	mul	v0
-	mov	8(rp,n,8), %r8
-	mov	%rdx, %r9
-	jmp	L(e2)
-
-	ALIGN(16)
-L(b13):	xor	R32(%r9), R32(%r9)
-	test	$2, R8(n_param)
-	jnz	L(b3)
-
-L(b1):	mov	$2, R32(n)
-	sub	n_param, n
-	jns	L(1)
-	mul	v0
-	mov	-16(rp,n,8), %r10
-	mov	%rdx, %r11
-	jmp	L(e1)
-
-	ALIGN(16)
-L(b3):	xor	R32(n), R32(n)
-	sub	n_param, n
-	mul	v0
-	mov	(rp,n,8), %r10
-	jmp	L(e3)
-
-	ALIGN(32)
-L(top):	mul	v0
-	mov	-16(rp,n,8), %r10
-	ADDSUB	%r11, %r8
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%r8, -24(rp,n,8)
-L(e1):	ADDSUB	%rax, %r10
-	mov	-8(up,n,8), %rax
-	adc	$0, %r11
-	mul	v0
-	ADDSUB	%r9, %r10
-	mov	%rdx, %r9
-	mov	-8(rp,n,8), %r8
-	adc	$0, %r11
-	mov	%r10, -16(rp,n,8)
-L(e0):	ADDSUB	%rax, %r8
-	adc	$0, %r9
-	mov	(up,n,8), %rax
-	mul	v0
-	mov	(rp,n,8), %r10
-	ADDSUB	%r11, %r8
-	mov	%r8, -8(rp,n,8)
-	adc	$0, %r9
-L(e3):	mov	%rdx, %r11
-	ADDSUB	%rax, %r10
-	mov	8(up,n,8), %rax
-	adc	$0, %r11
-	mul	v0
-	mov	8(rp,n,8), %r8
-	ADDSUB	%r9, %r10
-	mov	%rdx, %r9
-	mov	%r10, (rp,n,8)
-	adc	$0, %r11
-L(e2):	ADDSUB	%rax, %r8
-	adc	$0, %r9
-	mov	16(up,n,8), %rax
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mul	v0
-	mov	I(-8(rp),-16(rp,n,8)), %r10
-	ADDSUB	%r11, %r8
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%r8, I(-16(rp),-24(rp,n,8))
-	ADDSUB	%rax, %r10
-	adc	$0, %r11
-	ADDSUB	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, I(-8(rp),-16(rp,n,8))
-	mov	%r11, %rax
-
-	pop	%rbx
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
-	ret
-
-	ALIGN(16)
-L(1):	mul	v0
-	ADDSUB	%rax, -8(rp)
-	mov	%rdx, %rax
-	adc	$0, %rax
-	pop	%rbx
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/coreisbr/divrem_1.asm b/gmp/mpn/x86_64/coreisbr/divrem_1.asm
deleted file mode 100644
index d9f371f785..0000000000
--- a/gmp/mpn/x86_64/coreisbr/divrem_1.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_divrem_1
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_divrem_1 mpn_preinv_divrem_1)
-include_mpn(`x86_64/divrem_1.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/gmp-mparam.h b/gmp/mpn/x86_64/coreisbr/gmp-mparam.h
deleted file mode 100644
index 3a91b4c30e..0000000000
--- a/gmp/mpn/x86_64/coreisbr/gmp-mparam.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* Sandy Bridge gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-/* 3300 MHz Core i5 Sandy Bridge */
-/* FFT tuning limit = 100000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD         9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     10
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           30
-
-#define MUL_TOOM22_THRESHOLD                20
-#define MUL_TOOM33_THRESHOLD                65
-#define MUL_TOOM44_THRESHOLD               166
-#define MUL_TOOM6H_THRESHOLD               254
-#define MUL_TOOM8H_THRESHOLD               333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD     105
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     113
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     148
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 28
-#define SQR_TOOM3_THRESHOLD                 93
-#define SQR_TOOM4_THRESHOLD                250
-#define SQR_TOOM6_THRESHOLD                348
-#define SQR_TOOM8_THRESHOLD                454
-
-#define MULMID_TOOM42_THRESHOLD             36
-
-#define MULMOD_BNM1_THRESHOLD               12
-#define SQRMOD_BNM1_THRESHOLD               15
-
-#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    380, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
-    {     23, 7}, {     21, 8}, {     11, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     39, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     79,11}, \
-    {     47,10}, {     95,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    135,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    167,11}, {     95,10}, \
-    {    191, 9}, {    383, 8}, {    767, 7}, {   1599, 8}, \
-    {    831, 9}, {    447,10}, {    239,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
-    {    575,12}, {     95,11}, {    191,10}, {    383,11}, \
-    {    207,10}, {    447,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,10}, {    543, 8}, \
-    {   2175,11}, {    303,12}, {    159,11}, {    319,10}, \
-    {    671,11}, {    367,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    479,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    543,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    703,10}, \
-    {   1407,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    479,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    575,11}, \
-    {   1151,12}, {    607,13}, {    319,12}, {    671,11}, \
-    {   1343,12}, {    703,13}, {    383,12}, {    767,11}, \
-    {   1535,12}, {    831,13}, {    447,12}, {    959,11}, \
-    {   1919,14}, {    255,13}, {    511,12}, {   1087,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1279,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    959,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,12}, \
-    {   2431,14}, {    639,13}, {   1343,12}, {   2687,13}, \
-    {   1407,12}, {   2815,13}, {   1471,12}, {   2943,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
-    {   1407,13}, {   2943,15}, {    767,14}, {   1535,13}, \
-    {   3071,14}, {   1663,13}, {   3455,14}, {   1919,16}, \
-    {    511,15}, {   1023,14}, {   2431,13}, {   4863,15}, \
-    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
-    {   3455,15}, {   1791,14}, {   3839,13}, {   7679,16}, \
-    {   1023,15}, {   2047,14}, {   4223,15}, {   2303,14}, \
-    {   4863,15}, {   2815,14}, {   5887,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 203
-#define MUL_FFT_THRESHOLD                 4736
-
-#define SQR_FFT_MODF_THRESHOLD             336  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    336, 5}, {     11, 4}, {     23, 5}, {     19, 6}, \
-    {     10, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     25, 8}, \
-    {     13, 7}, {     27, 8}, {     15, 7}, {     31, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     33, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     47, 9}, {     27,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     63,10}, {     39, 9}, {     79,10}, {     55,11}, \
-    {     31,10}, {     79,11}, {     47,10}, {     95,12}, \
-    {     31,11}, {     63,10}, {    127, 9}, {    255, 8}, \
-    {    511,10}, {    135,11}, {     79, 9}, {    319, 8}, \
-    {    639,11}, {     95,10}, {    191, 9}, {    383,12}, \
-    {     63,11}, {    127,10}, {    255, 9}, {    511,10}, \
-    {    271, 9}, {    543,11}, {    143,10}, {    287, 8}, \
-    {   1151,10}, {    303, 6}, {   4863, 8}, {   1279, 9}, \
-    {    671,11}, {    175,10}, {    367,12}, {     95,11}, \
-    {    191,10}, {    383,11}, {    207, 9}, {    831,10}, \
-    {    447,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511,11}, {    271, 9}, {   1087,10}, {    575,11}, \
-    {    303,10}, {    607,11}, {    319,10}, {    671,11}, \
-    {    367,12}, {    191,11}, {    383,10}, {    767,11}, \
-    {    415,12}, {    223,11}, {    447,10}, {    959,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    575,10}, \
-    {   1151,11}, {    607,10}, {   1215,12}, {    319,11}, \
-    {    671, 9}, {   2687,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    415,11}, {    831,12}, {    479,14}, \
-    {    127,13}, {    255,12}, {    511,11}, {   1023,12}, \
-    {    607,13}, {    319,12}, {    671,11}, {   1343,12}, \
-    {    703,13}, {    383,12}, {    831,13}, {    447,12}, \
-    {    959,14}, {    255,13}, {    511,12}, {   1087,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1343,13}, \
-    {    703,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    959,14}, {    511,13}, \
-    {   1087,12}, {   2175,13}, {   1215,14}, {    639,13}, \
-    {   1343,12}, {   2687,13}, {   1407,12}, {   2815,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1791,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2687,14}, \
-    {   1407,13}, {   2815,15}, {    767,14}, {   1535,13}, \
-    {   3071,14}, {   1663,13}, {   3455,14}, {   1791,16}, \
-    {    511,15}, {   1023,14}, {   2431,13}, {   4863,15}, \
-    {   1279,14}, {   2943,13}, {   5887,15}, {   1535,14}, \
-    {   3455,15}, {   1791,14}, {   3839,16}, {   1023,15}, \
-    {   2047,14}, {   4223,15}, {   2303,14}, {   4863,15}, \
-    {   2815,14}, {   5887,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 190
-#define SQR_FFT_THRESHOLD                 3264
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  62
-#define MULLO_MUL_N_THRESHOLD             8907
-
-#define DC_DIV_QR_THRESHOLD                 52
-#define DC_DIVAPPR_Q_THRESHOLD             166
-#define DC_BDIV_QR_THRESHOLD                46
-#define DC_BDIV_Q_THRESHOLD                104
-
-#define INV_MULMOD_BNM1_THRESHOLD           42
-#define INV_NEWTON_THRESHOLD               166
-#define INV_APPR_THRESHOLD                 165
-
-#define BINV_NEWTON_THRESHOLD              228
-#define REDC_1_TO_REDC_2_THRESHOLD          32
-#define REDC_2_TO_REDC_N_THRESHOLD          52
-
-#define MU_DIV_QR_THRESHOLD               1334
-#define MU_DIVAPPR_Q_THRESHOLD            1387
-#define MUPI_DIV_QR_THRESHOLD               69
-#define MU_BDIV_QR_THRESHOLD              1187
-#define MU_BDIV_Q_THRESHOLD               1334
-
-#define POWM_SEC_TABLE  3,22,194,452,1167
-
-#define MATRIX22_STRASSEN_THRESHOLD         14
-#define HGCD_THRESHOLD                     119
-#define HGCD_APPR_THRESHOLD                 51
-#define HGCD_REDUCE_THRESHOLD             2479
-#define GCD_DC_THRESHOLD                   478
-#define GCDEXT_DC_THRESHOLD                368
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        22
-#define SET_STR_DC_THRESHOLD               802
-#define SET_STR_PRECOMPUTE_THRESHOLD      2042
-
-#define FAC_DSC_THRESHOLD                  644
-#define FAC_ODD_THRESHOLD                   24
diff --git a/gmp/mpn/x86_64/coreisbr/lshift.asm b/gmp/mpn/x86_64/coreisbr/lshift.asm
deleted file mode 100644
index a1cbc31f61..0000000000
--- a/gmp/mpn/x86_64/coreisbr/lshift.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_lshift optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_lshift)
-include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/lshiftc.asm b/gmp/mpn/x86_64/coreisbr/lshiftc.asm
deleted file mode 100644
index ac90edb76b..0000000000
--- a/gmp/mpn/x86_64/coreisbr/lshiftc.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_lshiftc optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_lshiftc)
-include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/mul_1.asm b/gmp/mpn/x86_64/coreisbr/mul_1.asm
deleted file mode 100644
index ded7d899c2..0000000000
--- a/gmp/mpn/x86_64/coreisbr/mul_1.asm
+++ /dev/null
@@ -1,161 +0,0 @@
-dnl  X86-64 mpn_mul_1 optimised for Intel Sandy Bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9
-C AMD K10
-C AMD bd1
-C AMD bobcat
-C Intel P4
-C Intel core2
-C Intel NHM
-C Intel SBR	 2.5
-C Intel IBR	 2.4
-C Intel atom
-C VIA nano
-
-C The loop of this code is the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-C TODO
-C  * The loop is great, but the prologue code was quickly written.  Tune it!
-C  * Add mul_1c entry point.
-C  * We could preserve one less register under DOS64 calling conventions, using
-C    r10 instead of rsi.
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0',      `%rcx')   C r9
-
-define(`n',	  `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFDOS(`	define(`up',     ``%rsi'')') dnl
-IFDOS(`	define(`rp',     ``%rcx'')') dnl
-IFDOS(`	define(`v0',     ``%r9'')') dnl
-IFDOS(`	define(`r9',     ``rdi'')') dnl
-IFDOS(`	define(`n_param',``%r8'')') dnl
-IFDOS(`	define(`n',      ``%r8'')') dnl
-IFDOS(`	define(`r8',     ``r11'')') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_1)
-
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
-	mov	(up), %rax
-	mov	R32(`n_param'), R32(%r10)
-IFSTD(`	mov	n_param, n		')
-
-	lea	(up,n_param,8), up
-	lea	-8(rp,n_param,8), rp
-	neg	n
-	mul	v0
-	and	$3, R32(%r10)
-	jz	L(b0)
-	cmp	$2, R32(%r10)
-	jb	L(b1)
-	jz	L(b2)
-
-L(b3):	add	$-1, n
-	mov	%rax, %r9
-	mov	%rdx, %r8
-	mov	16(up,n,8), %rax
-	jmp	L(L3)
-
-L(b1):	mov	%rax, %r9
-	mov	%rdx, %r8
-	add	$1, n
-	jnc	L(L1)
-	mov	%rax, (rp)
-	mov	%rdx, %rax
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
-	ret
-
-L(b2):	add	$-2, n
-	mov	%rax, %r8
-	mov	%rdx, %r9
-	mov	24(up,n,8), %rax
-	jmp	L(L2)
-
-L(b0):	mov	%rax, %r8
-	mov	%rdx, %r9
-	mov	8(up,n,8), %rax
-	jmp	L(L0)
-
-	ALIGN(8)
-L(top):	mov	%rdx, %r8
-	add	%rax, %r9
-L(L1):	mov	0(up,n,8), %rax
-	adc	$0, %r8
-	mul	v0
-	add	%rax, %r8
-	mov	%r9, 0(rp,n,8)
-	mov	8(up,n,8), %rax
-	mov	%rdx, %r9
-	adc	$0, %r9
-L(L0):	mul	v0
-	mov	%r8, 8(rp,n,8)
-	add	%rax, %r9
-	mov	%rdx, %r8
-	mov	16(up,n,8), %rax
-	adc	$0, %r8
-L(L3):	mul	v0
-	mov	%r9, 16(rp,n,8)
-	mov	%rdx, %r9
-	add	%rax, %r8
-	mov	24(up,n,8), %rax
-	adc	$0, %r9
-L(L2):	mul	v0
-	mov	%r8, 24(rp,n,8)
-	add	$4, n
-	jnc	L(top)
-
-L(end):	add	%rax, %r9
-	mov	%rdx, %rax
-	adc	$0, %rax
-	mov	%r9, (rp)
-
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/mul_2.asm b/gmp/mpn/x86_64/coreisbr/mul_2.asm
deleted file mode 100644
index ffee78a385..0000000000
--- a/gmp/mpn/x86_64/coreisbr/mul_2.asm
+++ /dev/null
@@ -1,163 +0,0 @@
-dnl  AMD64 mpn_mul_2 optimised for Intel Sandy Bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb	best
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR	 2.57		 2.52 using 4-way code
-C Intel IBR	 2.35		 2.32 using 4-way code
-C Intel HWL	 2.02		 1.86
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C This code is the result of running a code generation and optimisation tool
-C suite written by David Harvey and Torbjorn Granlund.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vp',      `%rcx')   C r9
-
-define(`n',	  `%rcx')
-define(`v0',      `%rbx')
-define(`v1',      `%rbp')
-
-define(`w0',	`%r8')
-define(`w1',	`%r9')
-define(`w2',	`%r10')
-define(`w3',	`%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mul_2)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	mov	(up), %rax
-	lea	(up,n_param,8), up
-	lea	(rp,n_param,8), rp
-
-	test	$1, R8(n_param)
-	jnz	L(b1)
-
-L(b0):	mov	$0, R32(n)
-	sub	n_param, n
-	xor	w0, w0
-	mul	v0
-	mov	%rax, w2
-	mov	%rdx, w1
-	mov	(up,n,8), %rax
-	jmp	L(lo0)
-
-L(b1):	mov	$1, R32(n)
-	sub	n_param, n
-	xor	w2, w2
-	mul	v0
-	mov	%rax, w0
-	mov	%rdx, w3
-	mov	-8(up,n,8), %rax
-	mul	v1
-	jmp	L(lo1)
-
-	ALIGN(32)
-L(top):	mul	v0
-	add	%rax, w0		C 1
-	mov	%rdx, w3		C 2
-	adc	$0, w3			C 2
-	mov	-8(up,n,8), %rax
-	mul	v1
-	add	w1, w0			C 1
-	adc	$0, w3			C 2
-L(lo1):	add	%rax, w2		C 2
-	mov	w0, -8(rp,n,8)		C 1
-	mov	%rdx, w0		C 3
-	adc	$0, w0			C 3
-	mov	(up,n,8), %rax
-	mul	v0
-	add	%rax, w2		C 2
-	mov	%rdx, w1		C 3
-	adc	$0, w1			C 3
-	add	w3, w2			C 2
-	mov	(up,n,8), %rax
-	adc	$0, w1			C 1
-L(lo0):	mul	v1
-	mov	w2, (rp,n,8)		C 2
-	add	%rax, w0		C 3
-	mov	%rdx, w2		C 4
-	mov	8(up,n,8), %rax
-	adc	$0, w2			C 4
-	add	$2, n
-	jnc	L(top)
-
-L(end):	mul	v0
-	add	%rax, w0
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	I(-8(up),-8(up,n,8)), %rax
-	mul	v1
-	add	w1, w0
-	adc	$0, w3
-	add	%rax, w2
-	mov	w0, I(-8(rp),-8(rp,n,8))
-	adc	$0, %rdx
-	add	w3, w2
-	mov	w2, I((rp),(rp,n,8))
-	adc	$0, %rdx
-	mov	%rdx, %rax
-
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/mul_basecase.asm b/gmp/mpn/x86_64/coreisbr/mul_basecase.asm
deleted file mode 100644
index f026136ea0..0000000000
--- a/gmp/mpn/x86_64/coreisbr/mul_basecase.asm
+++ /dev/null
@@ -1,407 +0,0 @@
-dnl  AMD64 mpn_mul_basecase optimised for Intel Sandy bridge and Ivy bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_1		mul_2		mul_3		addmul_2
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR	 2.5		 2.5		 -		 2.95
-C Intel IBR	 2.4		 2.3		 -		 2.68
-C Intel HWL	 2.35		 2.0		 -		 2.5
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-C TODO
-C  * Fix the addmul_2 fluctuation affecting SBR.
-C  * Improve feed-in code, avoiding zeroing of many registers and dummy adds in
-C    the loops at the expense of code size.
-C  * Adjoin a mul_3, avoiding slow mul_1 for odd vn.
-C  * Consider replacing the 2-way mul_2 code with 4-way code, for a very slight
-C    speedup.
-C  * Further micro-optimise.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-
-define(`rp',      `%rdi')
-define(`up',      `%rsi')
-define(`un_param',`%rdx')
-define(`vp',      `%rcx')
-define(`vn',      `%r8')
-
-define(`un',      `%rbx')
-
-define(`w0',	`%r10')
-define(`w1',	`%r11')
-define(`w2',	`%r12')
-define(`w3',	`%r13')
-define(`n',	`%rbp')
-define(`v0',	`%r9')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-	push	%rbx
-	push	%rbp
-	mov	un_param, un		C free up rdx
-	neg	un
-
-	mov	(up), %rax		C shared for mul_1 and mul_2
-	lea	(up,un_param,8), up	C point at operand end
-	lea	(rp,un_param,8), rp	C point at rp[un-1]
-
-	mov	(vp), v0		C shared for mul_1 and mul_2
-	mul	v0			C shared for mul_1 and mul_2
-
-	test	$1, R8(vn)
-	jz	L(do_mul_2)
-
-L(do_mul_1):
-	test	$1, R8(un)
-	jnz	L(m1x1)
-
-L(m1x0):mov	%rax, w0		C un = 2, 4, 6, 8, ...
-	mov	%rdx, w1
-	mov	8(up,un,8), %rax
-	test	$2, R8(un)
-	jnz	L(m110)
-
-L(m100):lea	2(un), n		C un = 4, 8, 12, ...
-	jmp	L(m1l0)
-
-L(m110):lea	(un), n			C un = 2, 6, 10, ...
-	jmp	L(m1l2)
-
-L(m1x1):mov	%rax, w1		C un = 1, 3, 5, 7, ...
-	mov	%rdx, w0
-	test	$2, R8(un)
-	jz	L(m111)
-
-L(m101):lea	3(un), n		C un = 1, 5, 9, ...
-	test	n, n
-	js	L(m1l1)
-	mov	%rax, -8(rp)
-	mov	%rdx, (rp)
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(m111):lea	1(un), n		C un = 3, 7, 11, ...
-	mov	8(up,un,8), %rax
-	jmp	L(m1l3)
-
-	ALIGN(16)		C FIXME
-L(m1tp):mov	%rdx, w0
-	add	%rax, w1
-L(m1l1):mov	-16(up,n,8), %rax
-	adc	$0, w0
-	mul	v0
-	add	%rax, w0
-	mov	w1, -24(rp,n,8)
-	mov	-8(up,n,8), %rax
-	mov	%rdx, w1
-	adc	$0, w1
-L(m1l0):mul	v0
-	mov	w0, -16(rp,n,8)
-	add	%rax, w1
-	mov	%rdx, w0
-	mov	(up,n,8), %rax
-	adc	$0, w0
-L(m1l3):mul	v0
-	mov	w1, -8(rp,n,8)
-	mov	%rdx, w1
-	add	%rax, w0
-	mov	8(up,n,8), %rax
-	adc	$0, w1
-L(m1l2):mul	v0
-	mov	w0, (rp,n,8)
-	add	$4, n
-	jnc	L(m1tp)
-
-L(m1ed):add	%rax, w1
-	adc	$0, %rdx
-	mov	w1, I(-8(rp),-24(rp,n,8))
-	mov	%rdx, I((rp),-16(rp,n,8))
-
-	dec	R32(vn)
-	jz	L(ret2)
-
-	lea	8(vp), vp
-	lea	8(rp), rp
-	push	%r12
-	push	%r13
-	push	%r14
-	jmp	L(do_addmul)
-
-L(do_mul_2):
-define(`v1',	`%r14')
-	push	%r12
-	push	%r13
-	push	%r14
-
-	mov	8(vp), v1
-
-	test	$1, R8(un)
-	jnz	L(m2b1)
-
-L(m2b0):lea	(un), n
-	xor	w0, w0
-	mov	%rax, w2
-	mov	%rdx, w1
-	jmp	L(m2l0)
-
-L(m2b1):lea	1(un), n
-	xor	w1, w1
-	xor	w2, w2
-	mov	%rax, w0
-	mov	%rdx, w3
-	jmp	L(m2l1)
-
-	ALIGN(32)
-L(m2tp):mul	v0
-	add	%rax, w0
-	mov	%rdx, w3
-	adc	$0, w3
-L(m2l1):mov	-8(up,n,8), %rax
-	mul	v1
-	add	w1, w0
-	adc	$0, w3
-	add	%rax, w2
-	mov	w0, -8(rp,n,8)
-	mov	%rdx, w0
-	adc	$0, w0
-	mov	(up,n,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	%rdx, w1
-	adc	$0, w1
-	add	w3, w2
-L(m2l0):mov	(up,n,8), %rax
-	adc	$0, w1
-	mul	v1
-	mov	w2, (rp,n,8)
-	add	%rax, w0
-	mov	%rdx, w2
-	mov	8(up,n,8), %rax
-	adc	$0, w2
-	add	$2, n
-	jnc	L(m2tp)
-
-L(m2ed):mul	v0
-	add	%rax, w0
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	I(-8(up),-8(up,n,8)), %rax
-	mul	v1
-	add	w1, w0
-	adc	$0, w3
-	add	%rax, w2
-	mov	w0, I(-8(rp),-8(rp,n,8))
-	adc	$0, %rdx
-	add	w3, w2
-	mov	w2, I((rp),(rp,n,8))
-	adc	$0, %rdx
-	mov	%rdx, I(8(rp),8(rp,n,8))
-
-	add	$-2, R32(vn)
-	jz	L(ret5)
-	lea	16(vp), vp
-	lea	16(rp), rp
-
-
-L(do_addmul):
-	push	%r15
-	push	vn			C save vn in new stack slot
-define(`vn',	`(%rsp)')
-define(`X0',	`%r14')
-define(`X1',	`%r15')
-define(`v1',	`%r8')
-
-L(outer):
-	mov	(vp), v0
-	mov	8(vp), v1
-	mov	(up,un,8), %rax
-	mul	v0
-	test	$1, R8(un)
-	jnz	L(a1x1)
-
-L(a1x0):mov	(rp,un,8), X0
-	xor	w0, w0
-	mov	%rdx, w1
-	test	$2, R8(un)
-	jnz	L(a110)
-
-L(a100):lea	2(un), n		C un = 4, 8, 12, ...
-	add	%rax, X0
-	adc	$0, w1
-	mov	(up,un,8), %rax
-	mul	v1
-	mov	8(rp,un,8), X1		C FIXME: Use un
-	jmp	L(lo0)
-
-L(a110):lea	(un), n			C un = 2, 6, 10, ...
-	xor	w3, w3
-	jmp	L(lo2)
-
-L(a1x1):mov	(rp,un,8), X1
-	xor	w2, w2
-	xor	w1, w1
-	test	$2, R8(un)
-	jz	L(a111)
-
-L(a101):lea	3(un), n		C un = 1, 5, 9, ...
-	mov	%rdx, w3
-	add	%rax, X1
-	mov	(up,un,8), %rax
-	mov	8(rp,un,8), X0
-	adc	$0, w3
-	jmp	L(top)
-
-L(a111):lea	1(un), n		C un = 3, 7, 11, ...
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):	mul	v1
-	mov	%rdx, w0
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	adc	$0, w3
-	add	w2, X0
-	adc	$0, w0
-	mov	-16(up,n,8), %rax
-	mul	v0
-	add	%rax, X0
-	mov	%rdx, w1
-	adc	$0, w1
-	mov	-16(up,n,8), %rax
-	mul	v1
-	mov	X1, -24(rp,n,8)
-	mov	-8(rp,n,8), X1
-	add	w3, X0
-	adc	$0, w1
-L(lo0):	mov	%rdx, w2
-	mov	X0, -16(rp,n,8)
-	add	%rax, X1
-	adc	$0, w2
-	mov	-8(up,n,8), %rax
-	add	w0, X1
-	adc	$0, w2
-	mul	v0
-L(lo3):	add	%rax, X1
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	-8(up,n,8), %rax
-	mul	v1
-	add	w1, X1
-	mov	(rp,n,8), X0
-	adc	$0, w3
-	mov	%rdx, w0
-	add	%rax, X0
-	adc	$0, w0
-	mov	(up,n,8), %rax
-	mul	v0
-	add	w2, X0
-	mov	X1, -8(rp,n,8)
-	mov	%rdx, w1
-	adc	$0, w0
-L(lo2):	add	%rax, X0
-	adc	$0, w1
-	mov	(up,n,8), %rax
-	add	w3, X0
-	adc	$0, w1
-	mul	v1
-	mov	8(rp,n,8), X1
-	add	%rax, X1
-	mov	%rdx, w2
-	adc	$0, w2
-	mov	8(up,n,8), %rax
-	mov	X0, (rp,n,8)
-	mul	v0
-	add	w0, X1
-	mov	%rdx, w3
-	adc	$0, w2
-	add	%rax, X1
-	mov	8(up,n,8), %rax
-	mov	16(rp,n,8), X0		C useless but harmless in final iter
-	adc	$0, w3
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mul	v1
-	add	w1, X1
-	adc	$0, w3
-	add	w2, %rax
-	adc	$0, %rdx
-	mov	X1, I(-8(rp),-24(rp,n,8))
-	add	w3, %rax
-	adc	$0, %rdx
-	mov	%rax, I((rp),-16(rp,n,8))
-	mov	%rdx, I(8(rp),-8(rp,n,8))
-
-	addl	$-2, vn
-	lea	16(vp), vp
-	lea	16(rp), rp
-	jnz	L(outer)
-
-	pop	%rax		C deallocate vn slot
-	pop	%r15
-L(ret5):pop	%r14
-	pop	%r13
-	pop	%r12
-L(ret2):pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/mullo_basecase.asm b/gmp/mpn/x86_64/coreisbr/mullo_basecase.asm
deleted file mode 100644
index a41a8acee4..0000000000
--- a/gmp/mpn/x86_64/coreisbr/mullo_basecase.asm
+++ /dev/null
@@ -1,384 +0,0 @@
-dnl  AMD64 mpn_mullo_basecase optimised for Intel Sandy bridge and Ivy bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_2		addmul_2
-C AMD K8,K9
-C AMD K10
-C AMD bull
-C AMD pile
-C AMD steam
-C AMD bobcat
-C AMD jaguar
-C Intel P4
-C Intel core
-C Intel NHM
-C Intel SBR	 2.5		 2.95
-C Intel IBR	 2.3		 2.68
-C Intel HWL	 2.0		 2.5
-C Intel BWL
-C Intel atom
-C VIA nano
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C   * Implement proper cor2, replacing current cor0.
-C   * Offset n by 2 in order to avoid the outer loop cmp.  (And sqr_basecase?)
-C   * Micro-optimise.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp_param', `%rdx')
-define(`n',        `%rcx')
-
-define(`vp',       `%r8')
-define(`X0',       `%r14')
-define(`X1',       `%r15')
-
-define(`w0',       `%r10')
-define(`w1',       `%r11')
-define(`w2',       `%r12')
-define(`w3',       `%r13')
-define(`i',        `%rbp')
-define(`v0',       `%r9')
-define(`v1',       `%rbx')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mullo_basecase)
-	FUNC_ENTRY(4)
-
-	mov	(up), %rax
-	mov	vp_param, vp
-
-	cmp	$4, n
-	jb	L(small)
-
-	mov	(vp_param), v0
-	push	%rbx
-	lea	(rp,n,8), rp		C point rp at R[un]
-	push	%rbp
-	lea	(up,n,8), up		C point up right after U's end
-	push	%r12
-	neg	n
-	push	%r13
-	mul	v0
-	mov	8(vp), v1
-
-	test	$1, R8(n)
-	jnz	L(m2b1)
-
-L(m2b0):lea	(n), i
-	xor	w0, w0
-	mov	%rax, w2
-	mov	%rdx, w1
-	jmp	L(m2l0)
-
-L(m2b1):lea	1(n), i
-	xor	w1, w1
-	xor	w2, w2
-	mov	%rax, w0
-	mov	%rdx, w3
-	jmp	L(m2l1)
-
-	ALIGN(32)
-L(m2tp):mul	v0
-	add	%rax, w0
-	mov	%rdx, w3
-	adc	$0, w3
-L(m2l1):mov	-8(up,i,8), %rax
-	mul	v1
-	add	w1, w0
-	adc	$0, w3
-	add	%rax, w2
-	mov	w0, -8(rp,i,8)
-	mov	%rdx, w0
-	adc	$0, w0
-	mov	(up,i,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	%rdx, w1
-	adc	$0, w1
-	add	w3, w2
-L(m2l0):mov	(up,i,8), %rax
-	adc	$0, w1
-	mul	v1
-	mov	w2, (rp,i,8)
-	add	%rax, w0
-	mov	%rdx, w2		C FIXME: dead in last iteration
-	mov	8(up,i,8), %rax
-	adc	$0, w2			C FIXME: dead in last iteration
-	add	$2, i
-	jnc	L(m2tp)
-
-L(m2ed):imul	v0, %rax
-	add	w0, %rax
-	add	w1, %rax
-	mov	%rax, I(-8(rp),-8(rp,i,8))
-
-	add	$2, n
-	lea	16(vp), vp
-	lea	-16(up), up
-	cmp	$-2, n
-	jge	L(cor1)
-
-	push	%r14
-	push	%r15
-
-L(outer):
-	mov	(vp), v0
-	mov	8(vp), v1
-	mov	(up,n,8), %rax
-	mul	v0
-	test	$1, R8(n)
-	jnz	L(a1x1)
-
-L(a1x0):mov	(rp,n,8), X1
-	xor	w2, w2
-	xor	w1, w1
-	test	$2, R8(n)
-	jnz	L(a110)
-
-L(a100):lea	1(n), i
-	jmp	L(lo0)
-
-L(a110):lea	3(n), i
-	mov	%rdx, w3
-	add	%rax, X1
-	mov	(up,n,8), %rax
-	mov	8(rp,n,8), X0
-	adc	$0, w3
-	jmp	L(lo2)
-
-L(a1x1):mov	(rp,n,8), X0
-	xor	w0, w0
-	mov	%rdx, w1
-	test	$2, R8(n)
-	jz	L(a111)
-
-L(a101):lea	2(n), i
-	add	%rax, X0
-	adc	$0, w1
-	mov	(up,n,8), %rax
-	mul	v1
-	mov	8(rp,n,8), X1
-	jmp	L(lo1)
-
-L(a111):lea	(n), i
-	xor	w3, w3
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):
-L(lo2):	mul	v1
-	mov	%rdx, w0
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	adc	$0, w3
-	add	w2, X0
-	adc	$0, w0
-	mov	-16(up,i,8), %rax
-	mul	v0
-	add	%rax, X0
-	mov	%rdx, w1
-	adc	$0, w1
-	mov	-16(up,i,8), %rax
-	mul	v1
-	mov	X1, -24(rp,i,8)
-	mov	-8(rp,i,8), X1
-	add	w3, X0
-	adc	$0, w1
-L(lo1):	mov	%rdx, w2
-	mov	X0, -16(rp,i,8)
-	add	%rax, X1
-	adc	$0, w2
-	mov	-8(up,i,8), %rax
-	add	w0, X1
-	adc	$0, w2
-	mul	v0
-L(lo0):	add	%rax, X1
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	-8(up,i,8), %rax
-	mul	v1
-	add	w1, X1
-	mov	(rp,i,8), X0
-	adc	$0, w3
-	mov	%rdx, w0
-	add	%rax, X0
-	adc	$0, w0
-	mov	(up,i,8), %rax
-	mul	v0
-	add	w2, X0
-	mov	X1, -8(rp,i,8)
-	mov	%rdx, w1
-	adc	$0, w0
-L(lo3):	add	%rax, X0
-	adc	$0, w1
-	mov	(up,i,8), %rax
-	add	w3, X0
-	adc	$0, w1
-	mul	v1
-	mov	8(rp,i,8), X1
-	add	%rax, X1
-	mov	%rdx, w2
-	adc	$0, w2
-	mov	8(up,i,8), %rax
-	mov	X0, (rp,i,8)
-	mul	v0
-	add	w0, X1
-	mov	%rdx, w3
-	adc	$0, w2
-	add	%rax, X1
-	mov	8(up,i,8), %rax
-	mov	16(rp,i,8), X0
-	adc	$0, w3
-	add	$4, i
-	jnc	L(top)
-
-L(end):	imul	v1, %rax
-	add	%rax, X0
-	add	w1, X1
-	adc	$0, w3
-	add	w2, X0
-	mov	I(-8(up),-16(up,i,8)), %rax
-	imul	v0, %rax
-	add	X0, %rax
-	mov	X1, I(-16(rp),-24(rp,i,8))
-	add	w3, %rax
-	mov	%rax, I(-8(rp),-16(rp,i,8))
-
-	add	$2, n
-	lea	16(vp), vp
-	lea	-16(up), up
-	cmp	$-2, n
-	jl	L(outer)
-
-	pop	%r15
-	pop	%r14
-
-	jnz	L(cor0)
-
-L(cor1):mov	(vp), v0
-	mov	8(vp), v1
-	mov	-16(up), %rax
-	mul	v0			C u0 x v2
-	add	-16(rp), %rax		C FIXME: rp[0] still available in reg?
-	adc	-8(rp), %rdx		C FIXME: rp[1] still available in reg?
-	mov	-8(up), %r10
-	imul	v0, %r10
-	mov	-16(up), %r11
-	imul	v1, %r11
-	mov	%rax, -16(rp)
-	add	%r10, %r11
-	add	%rdx, %r11
-	mov	%r11, -8(rp)
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(cor0):mov	(vp), %r11
-	imul	-8(up), %r11
-	add	%rax, %r11
-	mov	%r11, -8(rp)
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(small):
-	cmp	$2, n
-	jae	L(gt1)
-L(n1):	imul	(vp_param), %rax
-	mov	%rax, (rp)
-	FUNC_EXIT()
-	ret
-L(gt1):	ja	L(gt2)
-L(n2):	mov	(vp_param), %r9
-	mul	%r9
-	mov	%rax, (rp)
-	mov	8(up), %rax
-	imul	%r9, %rax
-	add	%rax, %rdx
-	mov	8(vp), %r9
-	mov	(up), %rcx
-	imul	%r9, %rcx
-	add	%rcx, %rdx
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-L(gt2):
-L(n3):	mov	(vp_param), %r9
-	mul	%r9		C u0 x v0
-	mov	%rax, (rp)
-	mov	%rdx, %r10
-	mov	8(up), %rax
-	mul	%r9		C u1 x v0
-	imul	16(up), %r9	C u2 x v0
-	add	%rax, %r10
-	adc	%rdx, %r9
-	mov	8(vp), %r11
-	mov	(up), %rax
-	mul	%r11		C u0 x v1
-	add	%rax, %r10
-	adc	%rdx, %r9
-	imul	8(up), %r11	C u1 x v1
-	add	%r11, %r9
-	mov	%r10, 8(rp)
-	mov	16(vp), %r10
-	mov	(up), %rax
-	imul	%rax, %r10	C u0 x v2
-	add	%r10, %r9
-	mov	%r9, 16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/popcount.asm b/gmp/mpn/x86_64/coreisbr/popcount.asm
deleted file mode 100644
index a5be33e6a7..0000000000
--- a/gmp/mpn/x86_64/coreisbr/popcount.asm
+++ /dev/null
@@ -1,118 +0,0 @@
-dnl  AMD64 mpn_popcount -- population count.
-
-dnl  Copyright 2008, 2010-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C AMD K8,K9		 n/a
-C AMD K10		 1.5		slower than 8-way non-pipelined code
-C AMD bd1		 4.2
-C AMD bobcat		 6.28		slower than 8-way non-pipelined code
-C Intel P4		 n/a
-C Intel core2		 n/a
-C Intel NHM		 1.32
-C Intel SBR		 1.05		fluctuating
-C Intel IBR		 1.05		fluctuating
-C Intel HSW		 1
-C Intel atom		 n/a
-C VIA nano		 n/a
-
-define(`up',		`%rdi')
-define(`n_param',	`%rsi')
-
-define(`n',		`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_popcount)
-	FUNC_ENTRY(2)
-
-	lea	(up,n_param,8), up
-	xor	R32(%rax), R32(%rax)
-
-	test	$1, R8(n_param)
-	jnz	L(bx1)
-
-L(bx0):	test	$2, R8(n_param)
-	jnz	L(b10)
-
-L(b00):	mov	$0, R32(n)
-	sub	n_param, n
-	.byte	0xf3,0x4c,0x0f,0xb8,0x04,0xcf		C popcnt (up,n,8), %r8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xcf,0x08	C popcnt 8(up,n,8), %r9
-	jmp	L(lo0)
-
-L(b10):	mov	$2, R32(n)
-	sub	n_param, n
-	.byte	0xf3,0x4c,0x0f,0xb8,0x54,0xcf,0xf0	C popcnt -16(up,n,8), %r10
-	.byte	0xf3,0x4c,0x0f,0xb8,0x5c,0xcf,0xf8	C popcnt -8(up,n,8), %r11
-	test	n, n
-	jz	L(cj2)
-	jmp	L(lo2)
-
-L(bx1):	test	$2, R8(n_param)
-	jnz	L(b11)
-
-L(b01):	mov	$1, R32(n)
-	sub	n_param, n
-	.byte	0xf3,0x4c,0x0f,0xb8,0x5c,0xcf,0xf8	C popcnt -8(up,n,8), %r11
-	test	n, n
-	jz	L(cj1)
-	.byte	0xf3,0x4c,0x0f,0xb8,0x04,0xcf		C popcnt 0(up,n,8), %r8
-	jmp	L(lo1)
-
-L(b11):	mov	$-1, n
-	sub	n_param, n
-	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xcf,0x08	C popcnt 8(up,n,8), %r9
-	.byte	0xf3,0x4c,0x0f,0xb8,0x54,0xcf,0x10	C popcnt 16(up,n,8), %r10
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):	add	%r9, %rax
-L(lo2):	.byte	0xf3,0x4c,0x0f,0xb8,0x04,0xcf		C popcnt 0(up,n,8), %r8
-	add	%r10, %rax
-L(lo1):	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xcf,0x08	C popcnt 8(up,n,8), %r9
-	add	%r11, %rax
-L(lo0):	.byte	0xf3,0x4c,0x0f,0xb8,0x54,0xcf,0x10	C popcnt 16(up,n,8), %r10
-	add	%r8, %rax
-L(lo3):	.byte	0xf3,0x4c,0x0f,0xb8,0x5c,0xcf,0x18	C popcnt 24(up,n,8), %r11
-	add	$4, n
-	js	L(top)
-
-L(end):	add	%r9, %rax
-L(cj2):	add	%r10, %rax
-L(cj1):	add	%r11, %rax
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/redc_1.asm b/gmp/mpn/x86_64/coreisbr/redc_1.asm
deleted file mode 100644
index 8a5170e3fd..0000000000
--- a/gmp/mpn/x86_64/coreisbr/redc_1.asm
+++ /dev/null
@@ -1,541 +0,0 @@
-dnl  X86-64 mpn_redc_1 optimised for Intel Sandy Bridge and Ivy Bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bull	 ?
-C AMD pile	 ?
-C AMD steam	 ?
-C AMD bobcat	 ?
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 ?
-C Intel NHM	 ?
-C Intel SBR	 3.24
-C Intel IBR	 3.04
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Micro-optimise, none performed thus far.
-C  * Consider inlining mpn_add_n.
-C  * Single basecases out before the pushes.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv',       `%r8')    C stack
-
-define(`i',           `%r14')
-define(`j',           `%r15')
-define(`mp',          `%r12')
-define(`q0',          `%r13')
-
-C rax rbx rcx rdx rdi rsi rbp r8 r9 r10 r11 r12 r13 r14 r15
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-define(`ALIGNx', `ALIGN(16)')
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), q0
-	mov	n, j			C outer loop induction var
-	lea	8(mp_param,n,8), mp
-	lea	8(up,n,8), up
-	neg	n
-	imul	u0inv, q0		C first iteration q0
-
-	test	$1, R8(n)
-	jz	L(bx0)
-
-L(bx1):	test	$2, R8(n)
-	jz	L(b3)
-
-L(b1):	cmp	$-1, R32(n)
-	jz	L(n1)
-
-L(otp1):lea	1(n), i
-	mov	-8(mp,n,8), %rax
-	mul	q0
-	mov	-8(up,n,8), %r10
-	mov	%rdx, %r11
-	add	%rax, %r10
-	mov	(mp,n,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	%rdx, %r9
-	mov	(up,n,8), %rbx
-	add	%rax, %rbx
-	adc	$0, %r9
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	(up,i,8), %r10
-	add	%r11, %rbx
-	mov	%rbx, -8(up,i,8)	C next low remainder limb
-	adc	$0, %r9
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e1)
-
-	ALIGNx
-L(tp1):	mul	q0
-	mov	-16(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%rbp, -24(up,i,8)
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	-8(up,i,8), %rbp
-	adc	$0, %r11
-	mov	%r10, -16(up,i,8)
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rbp, -8(up,i,8)
-	adc	$0, %r9
-L(e1):	mov	%rdx, %r11
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	8(up,i,8), %rbp
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	%r10, (up,i,8)
-	adc	$0, %r11
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	16(mp,i,8), %rax
-	add	$4, i
-	jnc	L(tp1)
-
-L(ed1):	mul	q0
-	mov	I(-16(up),-16(up,i,8)), %r10
-	add	%r11, %rbp
-	adc	$0, %r9
-	mov	%rbp, I(-24(up),-24(up,i,8))
-	add	%rax, %r10
-	adc	$0, %rdx
-	add	%r9, %r10
-	adc	$0, %rdx
-	mov	%r10, I(-16(up),-16(up,i,8))
-	mov	%rdx, -8(up,n,8)	C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp1)
-	jmp	L(cj)
-
-L(b3):	cmp	$-3, R32(n)
-	jz	L(n3)
-
-L(otp3):lea	3(n), i
-	mov	-8(mp,n,8), %rax
-	mul	q0
-	mov	-8(up,n,8), %r10
-	mov	%rdx, %r11
-	add	%rax, %r10
-	mov	(mp,n,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	(up,n,8), %rbx
-	mov	%rdx, %r9
-	add	%rax, %rbx
-	adc	$0, %r9
-	mov	8(mp,n,8), %rax
-	mul	q0
-	mov	8(up,n,8), %r10
-	add	%r11, %rbx
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%rbx, (up,n,8)
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e3)
-
-	ALIGNx
-L(tp3):	mul	q0
-	mov	-16(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%rbp, -24(up,i,8)
-L(e3):	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	-8(up,i,8), %rbp
-	adc	$0, %r11
-	mov	%r10, -16(up,i,8)
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rbp, -8(up,i,8)
-	adc	$0, %r9
-	mov	%rdx, %r11
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	8(up,i,8), %rbp
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	%r10, (up,i,8)
-	adc	$0, %r11
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	16(mp,i,8), %rax
-	add	$4, i
-	jnc	L(tp3)
-
-L(ed3):	mul	q0
-	mov	I(-16(up),-16(up,i,8)), %r10
-	add	%r11, %rbp
-	adc	$0, %r9
-	mov	%rbp, I(-24(up),-24(up,i,8))
-	add	%rax, %r10
-	adc	$0, %rdx
-	add	%r9, %r10
-	adc	$0, %rdx
-	mov	%r10, I(-16(up),-16(up,i,8))
-	mov	%rdx, -8(up,n,8)	C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp3)
-C	jmp	L(cj)
-
-L(cj):
-IFSTD(`	lea	-8(up,n,8), up		C param 2: up
-	lea	(up,n,8), %rdx		C param 3: up - n
-	neg	R32(n)		')	C param 4: n
-
-IFDOS(`	lea	-8(up,n,8), %rdx	C param 2: up
-	lea	(%rdx,n,8), %r8		C param 3: up - n
-	neg	R32(n)
-	mov	n, %r9			C param 4: n
-	mov	rp, %rcx	')	C param 1: rp
-
-	CALL(	mpn_add_n)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(bx0):	test	$2, R8(n)
-	jnz	L(b2)
-
-L(b0):
-L(otp0):lea	(n), i
-	mov	-8(mp,n,8), %rax
-	mul	q0
-	mov	%rdx, %r9
-	mov	-8(up,n,8), %rbp
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	(up,n,8), %rbx
-	mov	%rdx, %r11
-	add	%rax, %rbx
-	mov	8(mp,n,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	8(up,n,8), %rbp
-	add	%r9, %rbx
-	mov	%rdx, %r9
-	mov	%rbx, (up,n,8)
-	adc	$0, %r11
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e0)
-
-	ALIGNx
-L(tp0):	mul	q0
-	mov	-16(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%rbp, -24(up,i,8)
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	-8(up,i,8), %rbp
-	adc	$0, %r11
-	mov	%r10, -16(up,i,8)
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rbp, -8(up,i,8)
-	adc	$0, %r9
-	mov	%rdx, %r11
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	8(up,i,8), %rbp
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	%r10, (up,i,8)
-	adc	$0, %r11
-L(e0):	add	%rax, %rbp
-	adc	$0, %r9
-	mov	16(mp,i,8), %rax
-	add	$4, i
-	jnc	L(tp0)
-
-L(ed0):	mul	q0
-	mov	I(-16(up),-16(up,i,8)), %r10
-	add	%r11, %rbp
-	adc	$0, %r9
-	mov	%rbp, I(-24(up),-24(up,i,8))
-	add	%rax, %r10
-	adc	$0, %rdx
-	add	%r9, %r10
-	adc	$0, %rdx
-	mov	%r10, I(-16(up),-16(up,i,8))
-	mov	%rdx, -8(up,n,8)	C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp0)
-	jmp	L(cj)
-
-L(b2):	cmp	$-2, R32(n)
-	jz	L(n2)
-
-L(otp2):lea	2(n), i
-	mov	-8(mp,n,8), %rax
-	mul	q0
-	mov	-8(up,n,8), %rbp
-	mov	%rdx, %r9
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	(mp,n,8), %rax
-	mul	q0
-	mov	(up,n,8), %rbx
-	mov	%rdx, %r11
-	add	%rax, %rbx
-	mov	8(mp,n,8), %rax
-	adc	$0, %r11
-	mul	q0
-	add	%r9, %rbx
-	mov	%rdx, %r9
-	mov	8(up,n,8), %rbp
-	adc	$0, %r11
-	mov	%rbx, (up,n,8)
-	imul	u0inv, %rbx		C next q limb
-	jmp	L(e2)
-
-	ALIGNx
-L(tp2):	mul	q0
-	mov	-16(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rdx, %r11
-	adc	$0, %r9
-	mov	%rbp, -24(up,i,8)
-	add	%rax, %r10
-	mov	-8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	-8(up,i,8), %rbp
-	adc	$0, %r11
-	mov	%r10, -16(up,i,8)
-L(e2):	add	%rax, %rbp
-	adc	$0, %r9
-	mov	(mp,i,8), %rax
-	mul	q0
-	mov	(up,i,8), %r10
-	add	%r11, %rbp
-	mov	%rbp, -8(up,i,8)
-	adc	$0, %r9
-	mov	%rdx, %r11
-	add	%rax, %r10
-	mov	8(mp,i,8), %rax
-	adc	$0, %r11
-	mul	q0
-	mov	8(up,i,8), %rbp
-	add	%r9, %r10
-	mov	%rdx, %r9
-	mov	%r10, (up,i,8)
-	adc	$0, %r11
-	add	%rax, %rbp
-	adc	$0, %r9
-	mov	16(mp,i,8), %rax
-	add	$4, i
-	jnc	L(tp2)
-
-L(ed2):	mul	q0
-	mov	I(-16(up),-16(up,i,8)), %r10
-	add	%r11, %rbp
-	adc	$0, %r9
-	mov	%rbp, I(-24(up),-24(up,i,8))
-	add	%rax, %r10
-	adc	$0, %rdx
-	add	%r9, %r10
-	adc	$0, %rdx
-	mov	%r10, I(-16(up),-16(up,i,8))
-	mov	%rdx, -8(up,n,8)	C up[0]
-	mov	%rbx, q0		C previously computed q limb -> q0
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(otp2)
-	jmp	L(cj)
-
-L(n1):	mov	(mp_param), %rax
-	mul	q0
-	add	-16(up), %rax
-	adc	-8(up), %rdx
-	mov	%rdx, (rp)
-	mov	$0, R32(%rax)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-L(n2):	mov	(mp_param), %rax
-	mov	-24(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-16(mp), %rax
-	mov	-16(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, q0
-	imul	u0inv, q0		C next q0
-	mov	-24(mp), %rax
-	mul	q0
-	add	%rax, %r10
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-16(mp), %rax
-	mov	-8(up), %r14
-	mul	q0
-	add	%rax, %r14
-	adc	$0, %rdx
-	add	%r9, %r14
-	adc	$0, %rdx
-	xor	R32(%rax), R32(%rax)
-	add	%r11, %r14
-	adc	(up), %rdx
-	mov	%r14, (rp)
-	mov	%rdx, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-	ALIGNx
-L(n3):	mov	-32(mp), %rax
-	mov	-32(up), %r10
-	mul	q0
-	add	%rax, %r10
-	mov	-24(mp), %rax
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	-24(up), %rbp
-	mul	q0
-	add	%rax, %rbp
-	mov	%rdx, %r9
-	adc	$0, %r9
-	mov	-16(mp), %rax
-	add	%r11, %rbp
-	mov	-16(up), %r10
-	adc	$0, %r9
-	mul	q0
-	mov	%rbp, q0
-	imul	u0inv, q0		C next q0
-	add	%rax, %r10
-	mov	%rdx, %r11
-	adc	$0, %r11
-	mov	%rbp, -24(up)
-	add	%r9, %r10
-	adc	$0, %r11
-	mov	%r10, -16(up)
-	mov	%r11, -32(up)		C up[0]
-	lea	8(up), up		C up++
-	dec	j
-	jnz	L(n3)
-	jmp	L(cj)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/coreisbr/rsh1aors_n.asm b/gmp/mpn/x86_64/coreisbr/rsh1aors_n.asm
deleted file mode 100644
index fd2eaea7bb..0000000000
--- a/gmp/mpn/x86_64/coreisbr/rsh1aors_n.asm
+++ /dev/null
@@ -1,193 +0,0 @@
-dnl  X86-64 mpn_rsh1add_n, mpn_rsh1sub_n optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2003, 2005, 2009-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 4.25
-C Intel P4	 21.5
-C Intel core2	 3.2
-C Intel NHM	 3.87
-C Intel SBR	 2.05
-C Intel atom	 ?
-C VIA nano	 44.9
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n',  `%rcx')
-
-ifdef(`OPERATION_rsh1add_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func_n,	      mpn_rsh1add_n)
-	define(func_nc,	      mpn_rsh1add_nc)')
-ifdef(`OPERATION_rsh1sub_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func_n,	      mpn_rsh1sub_n)
-	define(func_nc,	      mpn_rsh1sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-
-	ALIGN(16)
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-	push	%rbp
-
-	neg	%r8			C set C flag from parameter
-	mov	(up), %rbp
-	ADCSBB	(vp), %rbp
-
-	jmp	L(ent)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%rbp
-
-	mov	(up), %rbp
-	ADDSUB	(vp), %rbp
-L(ent):
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-	mov	R32(%rbp), R32(%rax)
-	and	$1, R32(%rax)		C return value
-
-	mov	R32(n), R32(%r11)
-	and	$3, R32(%r11)
-
-	cmp	$1, R32(%r11)
-	je	L(do)			C jump if n = 1 5 9 ...
-
-L(n1):	cmp	$2, R32(%r11)
-	jne	L(n2)			C jump unless n = 2 6 10 ...
-	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	8(up), %r10
-	ADCSBB	8(vp), %r10
-	lea	8(up), up
-	lea	8(vp), vp
-	lea	8(rp), rp
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-
-	shrd	$1, %r10, %rbp
-	mov	%rbp, -8(rp)
-	jmp	L(cj1)
-
-L(n2):	cmp	$3, R32(%r11)
-	jne	L(n3)			C jump unless n = 3 7 11 ...
-	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	8(up), %r9
-	mov	16(up), %r10
-	ADCSBB	8(vp), %r9
-	ADCSBB	16(vp), %r10
-	lea	16(up), up
-	lea	16(vp), vp
-	lea	16(rp), rp
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-
-	shrd	$1, %r9, %rbp
-	mov	%rbp, -16(rp)
-	jmp	L(cj2)
-
-L(n3):	dec	n			C come here for n = 4 8 12 ...
-	add	R32(%rbx), R32(%rbx)	C restore cy
-	mov	8(up), %r8
-	mov	16(up), %r9
-	ADCSBB	8(vp), %r8
-	ADCSBB	16(vp), %r9
-	mov	24(up), %r10
-	ADCSBB	24(vp), %r10
-	lea	24(up), up
-	lea	24(vp), vp
-	lea	24(rp), rp
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-
-	shrd	$1, %r8, %rbp
-	mov	%rbp, -24(rp)
-	shrd	$1, %r9, %r8
-	mov	%r8, -16(rp)
-L(cj2):	shrd	$1, %r10, %r9
-	mov	%r9, -8(rp)
-L(cj1):	mov	%r10, %rbp
-
-L(do):
-	shr	$2, n			C				4
-	je	L(end)			C				2
-	ALIGN(16)
-L(top):	add	R32(%rbx), R32(%rbx)		C restore cy
-
-	mov	8(up), %r8
-	mov	16(up), %r9
-	ADCSBB	8(vp), %r8
-	ADCSBB	16(vp), %r9
-	mov	24(up), %r10
-	mov	32(up), %r11
-	ADCSBB	24(vp), %r10
-	ADCSBB	32(vp), %r11
-
-	lea	32(up), up
-	lea	32(vp), vp
-
-	sbb	R32(%rbx), R32(%rbx)	C save cy
-
-	shrd	$1, %r8, %rbp
-	mov	%rbp, (rp)
-	shrd	$1, %r9, %r8
-	mov	%r8, 8(rp)
-	shrd	$1, %r10, %r9
-	mov	%r9, 16(rp)
-	shrd	$1, %r11, %r10
-	mov	%r10, 24(rp)
-
-	dec	n
-	mov	%r11, %rbp
-	lea	32(rp), rp
-	jne	L(top)
-
-L(end):	shrd	$1, %rbx, %rbp
-	mov	%rbp, (rp)
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/coreisbr/rshift.asm b/gmp/mpn/x86_64/coreisbr/rshift.asm
deleted file mode 100644
index 4c1c0d4cde..0000000000
--- a/gmp/mpn/x86_64/coreisbr/rshift.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_rshift optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_rshift)
-include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/sec_tabselect.asm b/gmp/mpn/x86_64/coreisbr/sec_tabselect.asm
deleted file mode 100644
index e4360341d9..0000000000
--- a/gmp/mpn/x86_64/coreisbr/sec_tabselect.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_sec_tabselect.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sec_tabselect)
-include_mpn(`x86_64/fastsse/sec_tabselect.asm')
diff --git a/gmp/mpn/x86_64/coreisbr/sqr_basecase.asm b/gmp/mpn/x86_64/coreisbr/sqr_basecase.asm
deleted file mode 100644
index 46a36121fe..0000000000
--- a/gmp/mpn/x86_64/coreisbr/sqr_basecase.asm
+++ /dev/null
@@ -1,484 +0,0 @@
-dnl  AMD64 mpn_sqr_basecase optimised for Intel Sandy bridge and Ivy bridge.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2008, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb	mul_2		addmul_2	sqr_diag_addlsh1
-C AMD K8,K9	 ?		 ?			 ?
-C AMD K10	 ?		 ?			 ?
-C AMD bull	 ?		 ?			 ?
-C AMD pile	 ?		 ?			 ?
-C AMD steam	 ?		 ?			 ?
-C AMD bobcat	 ?		 ?			 ?
-C AMD jaguar	 ?		 ?			 ?
-C Intel P4	 ?		 ?			 ?
-C Intel core	 ?		 ?			 ?
-C Intel NHM	 ?		 ?			 ?
-C Intel SBR	 2.57		 2.93			 3.0
-C Intel IBR	 2.35		 2.66			 3.0
-C Intel HWL	 2.02		 2.5			 2.5
-C Intel BWL	 ?		 ?			 ?
-C Intel atom	 ?		 ?			 ?
-C VIA nano	 ?		 ?			 ?
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund, except
-C that the sqr_diag_addlsh1 loop was manually written.
-
-C TODO
-C  * Replace current unoptimised sqr_diag_addlsh1 loop, 2.5 c/l should be easy.
-C  * Streamline pointer updates.
-C  * Perhaps suppress a few more xor insns in feed-in code.
-C  * Make sure we write no dead registers in feed-in code.
-C  * We might use 32-bit size ops, since n >= 2^32 is non-terminating.  Watch
-C    out for negative sizes being zero-extended, though.
-C  * The straight-line code for n <= 3 comes from the K8 code, and might be
-C    quite sub-optimal here.  Write specific code, and add code for n = 4.
-C  * The mul_2 loop has a 10 insn common sequence in the loop start and the
-C    wind-down code.  Try re-rolling it.
-C  * This file has been the subject to just basic micro-optimisation.
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',	  `%rdi')
-define(`up',	  `%rsi')
-define(`un_param',`%rdx')
-
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_sqr_basecase)
-	FUNC_ENTRY(3)
-
-	cmp	$2, un_param
-	jae	L(gt1)
-
-	mov	(up), %rax
-	mul	%rax
-	mov	%rax, (rp)
-	mov	%rdx, 8(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt1):	jne	L(gt2)
-
-	mov	(up), %rax
-	mov	%rax, %r8
-	mul	%rax
-	mov	8(up), %r11
-	mov	%rax, (rp)
-	mov	%r11, %rax
-	mov	%rdx, %r9
-	mul	%rax
-	mov	%rax, %r10
-	mov	%r11, %rax
-	mov	%rdx, %r11
-	mul	%r8
-	xor	%r8, %r8
-	add	%rax, %r9
-	adc	%rdx, %r10
-	adc	%r8, %r11
-	add	%rax, %r9
-	mov	%r9, 8(rp)
-	adc	%rdx, %r10
-	mov	%r10, 16(rp)
-	adc	%r8, %r11
-	mov	%r11, 24(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt2):	cmp	$4, un_param
-	jae	L(gt3)
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%r10')
-define(`w2', `%r11')
-
-	mov	(up), %rax
-	mov	%rax, %r10
-	mul	%rax
-	mov	8(up), %r11
-	mov	%rax, (rp)
-	mov	%r11, %rax
-	mov	%rdx, 8(rp)
-	mul	%rax
-	mov	16(up), %rcx
-	mov	%rax, 16(rp)
-	mov	%rcx, %rax
-	mov	%rdx, 24(rp)
-	mul	%rax
-	mov	%rax, 32(rp)
-	mov	%rdx, 40(rp)
-
-	mov	%r11, %rax
-	mul	%r10
-	mov	%rax, %r8
-	mov	%rcx, %rax
-	mov	%rdx, %r9
-	mul	%r10
-	xor	%r10, %r10
-	add	%rax, %r9
-	mov	%r11, %rax
-	mov	%r10, %r11
-	adc	%rdx, %r10
-
-	mul	%rcx
-	add	%rax, %r10
-	adc	%r11, %rdx
-	add	%r8, %r8
-	adc	%r9, %r9
-	adc	%r10, %r10
-	adc	%rdx, %rdx
-	adc	%r11, %r11
-	add	%r8, 8(rp)
-	adc	%r9, 16(rp)
-	adc	%r10, 24(rp)
-	adc	%rdx, 32(rp)
-	adc	%r11, 40(rp)
-	FUNC_EXIT()
-	ret
-
-L(gt3):
-
-define(`v0', `%r8')
-define(`v1', `%r9')
-define(`w0', `%r10')
-define(`w1', `%r11')
-define(`w2', `%rbx')
-define(`w3', `%rbp')
-define(`un', `%r12')
-define(`n',  `%rcx')
-
-define(`X0', `%r13')
-define(`X1', `%r14')
-
-L(do_mul_2):
-	mov	(up), v0
-	push	%rbx
-	lea	(rp,un_param,8), rp	C point rp at R[un]
-	mov	8(up), %rax
-	push	%rbp
-	lea	(up,un_param,8), up	C point up right after U's end
-	mov	%rax, v1
-	push	%r12
-	mov	$1, R32(un)		C free up rdx
-	push	%r13
-	sub	un_param, un
-	push	%r14
-	push	un
-	mul	v0
-	mov	%rax, (rp,un,8)
-	mov	8(up,un,8), %rax
-	test	$1, R8(un)
-	jnz	L(m2b1)
-
-L(m2b0):lea	2(un), n
-	xor	R32(w1), R32(w1)	C FIXME
-	xor	R32(w2), R32(w2)	C FIXME
-	mov	%rdx, w0
-	jmp	L(m2l0)
-
-L(m2b1):lea	1(un), n
-	xor	R32(w3), R32(w3)	C FIXME
-	xor	R32(w0), R32(w0)	C FIXME
-	mov	%rdx, w2
-	jmp	L(m2l1)
-
-	ALIGN(32)
-L(m2tp):
-L(m2l0):mul	v0
-	add	%rax, w0
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	-8(up,n,8), %rax
-	mul	v1
-	add	w1, w0
-	adc	$0, w3
-	add	%rax, w2
-	mov	w0, -8(rp,n,8)
-	mov	%rdx, w0
-	adc	$0, w0
-	mov	(up,n,8), %rax
-L(m2l1):mul	v0
-	add	%rax, w2
-	mov	%rdx, w1
-	adc	$0, w1
-	add	w3, w2
-	mov	(up,n,8), %rax
-	adc	$0, w1
-	mul	v1
-	mov	w2, (rp,n,8)
-	add	%rax, w0
-	mov	%rdx, w2
-	mov	8(up,n,8), %rax
-	adc	$0, w2
-	add	$2, n
-	jnc	L(m2tp)
-
-L(m2ed):mul	v0
-	add	%rax, w0
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	I(-8(up),-8(up,n,8)), %rax
-	mul	v1
-	add	w1, w0
-	adc	$0, w3
-	add	%rax, w2
-	mov	w0, I(-8(rp),-8(rp,n,8))
-	adc	$0, %rdx
-	add	w3, w2
-	mov	w2, I((rp),(rp,n,8))
-	adc	$0, %rdx
-	mov	%rdx, I(8(rp),8(rp,n,8))
-
-	add	$2, un			C decrease |un|
-
-L(do_addmul_2):
-L(outer):
-	lea	16(rp), rp
-	cmp	$-2, R32(un)		C jump if un C {-1,0}  FIXME jump if un C {-2,1}
-	jge	L(corner)		C FIXME: move to before the lea above
-
-	mov	-8(up,un,8), v0
-	mov	(up,un,8), %rax
-	mov	%rax, v1
-	mul	v0
-	test	$1, R8(un)
-	jnz	L(a1x1)
-
-L(a1x0):mov	(rp,un,8), X0
-	xor	w0, w0
-	mov	8(rp,un,8), X1
-	add	%rax, X0
-	mov	%rdx, w1
-	adc	$0, w1
-	xor	w2, w2
-	mov	X0, (rp,un,8)
-	mov	8(up,un,8), %rax
-	test	$2, R8(un)
-	jnz	L(a110)
-
-L(a100):lea	2(un), n		C un = 4, 8, 12, ...
-	jmp	L(lo0)
-
-L(a110):lea	(un), n			C un = 2, 6, 10, ...
-	jmp	L(lo2)
-
-L(a1x1):mov	(rp,un,8), X1
-	xor	w2, w2
-	mov	8(rp,un,8), X0
-	add	%rax, X1
-	mov	%rdx, w3
-	adc	$0, w3
-	xor	w0, w0
-	mov	8(up,un,8), %rax
-	test	$2, R8(un)
-	jz	L(a111)
-
-L(a101):lea	3(un), n		C un = 1, 5, 9, ...
-	jmp	L(lo1)
-
-L(a111):lea	1(un), n		C un = 3, 7, 11, ...
-	jmp	L(lo3)
-
-	ALIGN(32)
-L(top):	mul	v1
-	mov	%rdx, w0
-	add	%rax, X0
-	adc	$0, w0
-	add	w1, X1
-	adc	$0, w3
-	add	w2, X0
-	adc	$0, w0
-	mov	-16(up,n,8), %rax
-L(lo1):	mul	v0
-	add	%rax, X0
-	mov	%rdx, w1
-	adc	$0, w1
-	mov	-16(up,n,8), %rax
-	mul	v1
-	mov	X1, -24(rp,n,8)
-	mov	-8(rp,n,8), X1
-	add	w3, X0
-	adc	$0, w1
-	mov	%rdx, w2
-	mov	X0, -16(rp,n,8)
-	add	%rax, X1
-	adc	$0, w2
-	mov	-8(up,n,8), %rax
-	add	w0, X1
-	adc	$0, w2
-L(lo0):	mul	v0
-	add	%rax, X1
-	mov	%rdx, w3
-	adc	$0, w3
-	mov	-8(up,n,8), %rax
-	mul	v1
-	add	w1, X1
-	mov	(rp,n,8), X0
-	adc	$0, w3
-	mov	%rdx, w0
-	add	%rax, X0
-	adc	$0, w0
-	mov	(up,n,8), %rax
-L(lo3):	mul	v0
-	add	w2, X0
-	mov	X1, -8(rp,n,8)
-	mov	%rdx, w1
-	adc	$0, w0
-	add	%rax, X0
-	adc	$0, w1
-	mov	(up,n,8), %rax
-	add	w3, X0
-	adc	$0, w1
-	mul	v1
-	mov	8(rp,n,8), X1
-	add	%rax, X1
-	mov	%rdx, w2
-	adc	$0, w2
-	mov	8(up,n,8), %rax
-	mov	X0, (rp,n,8)
-L(lo2):	mul	v0
-	add	w0, X1
-	mov	%rdx, w3
-	adc	$0, w2
-	add	%rax, X1
-	mov	8(up,n,8), %rax
-	mov	16(rp,n,8), X0
-	adc	$0, w3
-	add	$4, n
-	jnc	L(top)
-
-L(end):	mul	v1
-	add	w1, X1
-	adc	$0, w3
-	add	w2, %rax
-	adc	$0, %rdx
-	mov	X1, I(-8(rp),-24(rp,n,8))
-	add	w3, %rax
-	adc	$0, %rdx
-	mov	%rax, I((rp),-16(rp,n,8))
-	mov	%rdx, I(8(rp),-8(rp,n,8))
-
-	add	$2, un			C decrease |un|
-	jmp	L(outer)		C loop until a small corner remains
-
-L(corner):
-	pop	n
-	jg	L(small_corner)
-
-	lea	8(rp), rp
-	mov	-24(up), v0
-	mov	-16(up), %rax
-	mov	%rax, v1
-	mul	v0
-	mov	-24(rp), X0
-	mov	-16(rp), X1
-	add	%rax, X0
-	mov	%rdx, w1
-	adc	$0, w1
-	xor	w2, w2
-	mov	X0, -24(rp)
-	mov	-8(up), %rax
-	mul	v0
-	add	$0, X1
-	mov	%rdx, w3
-	adc	$0, w2
-	add	%rax, X1
-	mov	-8(up), %rax
-	adc	$0, w3
-	mul	v1
-	add	w1, X1
-	adc	$0, w3
-	add	w2, %rax
-	adc	$0, %rdx
-	mov	X1, -16(rp)
-	jmp	L(com)
-
-L(small_corner):
-	mov	-8(rp), w3
-	mov	-16(up), v0
-	mov	-8(up), %rax
-	mul	v0
-L(com):	add	w3, %rax
-	adc	$0, %rdx
-	mov	%rax, -8(rp)
-	mov	%rdx, (rp)
-
-L(sqr_diag_addlsh1):
-	mov	-8(up,n,8), %rax
-	shl	n
-	mul	%rax
-	mov	%rax, (rp,n,8)
-
-	xor	R32(%rbx), R32(%rbx)
-	mov	8(rp,n,8), %r8
-	mov	16(rp,n,8), %r9
-	jmp	L(dm)
-
-	ALIGN(32)
-L(dtop):add	%r8, %r10
-	adc	%r9, %rax
-	mov	8(rp,n,8), %r8
-	mov	16(rp,n,8), %r9
-	mov	%r10, -8(rp,n,8)
-	mov	%rax, (rp,n,8)
-L(dm):	adc	%r8, %r8
-	adc	%r9, %r9
-	mov	(up,n,4), %rax
-	lea	(%rdx,%rbx), %r10
-	setc	R8(%rbx)
-	mul	%rax
-	add	$2, n
-	js	L(dtop)
-
-L(dend):add	%r8, %r10
-	adc	%r9, %rax
-	mov	%r10, I(-8(rp),-8(rp,n,8))
-	mov	%rax, I((rp),(rp,n,8))
-	adc	%rbx, %rdx
-	mov	%rdx, I(8(rp),8(rp,n,8))
-
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/darwin.m4 b/gmp/mpn/x86_64/darwin.m4
index 6f8ec7893d..9eb0f53723 100644
--- a/gmp/mpn/x86_64/darwin.m4
+++ b/gmp/mpn/x86_64/darwin.m4
@@ -1,41 +1,23 @@
 divert(-1)
-dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 dnl
 dnl  This file is part of the GNU MP Library.
 dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 define(`DARWIN')
 
-define(`LEA',`dnl
-ifdef(`PIC',
-	`lea	$1(%rip), $2'
-,
-	`movabs	`$'$1, $2')
-')
-
 dnl  Usage: CALL(funcname)
 dnl
 dnl  Simply override the definition in x86_64-defs.m4.
@@ -43,39 +25,6 @@ dnl  Simply override the definition in x86_64-defs.m4.
 define(`CALL',`call	GSYM_PREFIX`'$1')
 
 
-dnl  Usage: JUMPTABSECT
-dnl
-dnl  CAUTION: Do not put anything sensible here, like RODATA.  That works with
-dnl  some Darwin tool chains, but silently breaks with other.  (Note that
-dnl  putting jump tables in the text segment is a really poor idea for PC many
-dnl  processors, since they cannot cache the same thing in both L1D and L2I.)
-
-define(`JUMPTABSECT', `.text')
-
-
-dnl  Usage: JMPENT(targlabel,tablabel)
-
-define(`JMPENT',`dnl
-ifdef(`PIC',
-	`.set	$1_tmp, $1-$2
-	.long	$1_tmp'
-,
-	`.quad	$1'
-)')
-
-dnl  Target ABI macros.  For Darwin we override IFELF (and leave default for
-dnl  IFDOS and IFSTD).
-
-define(`IFELF',   `')
-
-
-dnl  Usage: PROTECT(symbol)
-dnl
-dnl  Used for private GMP symbols that should never be overridden by users.
-dnl  This can save reloc entries and improve shlib sharing as well as
-dnl  application startup times
-
-define(`PROTECT',  `.private_extern $1')
-
+define(`JUMPTABSECT', `DATA')
 
 divert`'dnl
diff --git a/gmp/mpn/x86_64/div_qr_1n_pi1.asm b/gmp/mpn/x86_64/div_qr_1n_pi1.asm
deleted file mode 100644
index cb072e979d..0000000000
--- a/gmp/mpn/x86_64/div_qr_1n_pi1.asm
+++ /dev/null
@@ -1,247 +0,0 @@
-dnl  x86-64 mpn_div_qr_1n_pi1
-dnl  -- Divide an mpn number by a normalized single-limb number,
-dnl     using a single-limb inverse.
-
-dnl  Contributed to the GNU project by Niels Möller
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		c/l
-C AMD K8,K9	13
-C AMD K10	13
-C AMD bull	16.5
-C AMD pile	15
-C AMD steam	 ?
-C AMD bobcat	16
-C AMD jaguar	 ?
-C Intel P4	47	poor
-C Intel core	19.25
-C Intel NHM	18
-C Intel SBR	15	poor
-C Intel IBR	13
-C Intel HWL	11.7
-C Intel BWL	 ?
-C Intel atom	52	very poor
-C VIA nano	19
-
-
-C INPUT Parameters
-define(`QP', `%rdi')
-define(`UP', `%rsi')
-define(`UN_INPUT', `%rdx')
-define(`U1', `%rcx')	C Also in %rax
-define(`D', `%r8')
-define(`DINV', `%r9')
-
-C Invariants
-define(`B2', `%rbp')
-define(`B2md', `%rbx')
-
-C Variables
-define(`UN', `%r8')	C Overlaps D input
-define(`T', `%r10')
-define(`U0', `%r11')
-define(`U2', `%r12')
-define(`Q0', `%r13')
-define(`Q1', `%r14')
-define(`Q2', `%r15')
-
-ABI_SUPPORT(STD64)
-
-	ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_div_qr_1n_pi1)
-	FUNC_ENTRY(6)
-IFDOS(`	mov	56(%rsp), %r8	')
-IFDOS(`	mov	64(%rsp), %r9	')
-	dec	UN_INPUT
-	jnz	L(first)
-
-	C Just a single 2/1 division.
-	C T, U0 are allocated in scratch registers
-	lea	1(U1), T
-	mov	U1, %rax
-	mul	DINV
-	mov	(UP), U0
-	add	U0, %rax
-	adc	T, %rdx
-	mov	%rdx, T
-	imul	D, %rdx
-	sub	%rdx, U0
-	cmp	U0, %rax
-	lea	(U0, D), %rax
-	cmovnc	U0, %rax
-	sbb	$0, T
-	cmp	D, %rax
-	jc	L(single_div_done)
-	sub	D, %rax
-	add	$1, T
-L(single_div_done):
-	mov	T, (QP)
-	FUNC_EXIT
-	ret
-L(first):
-	C FIXME: Could delay some of these until we enter the loop.
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	push	%rbx
-	push	%rbp
-
-	mov	D, B2
-	imul	DINV, B2
-	neg	B2
-	mov	B2, B2md
-	sub	D, B2md
-
-	C D not needed until final reduction
-	push	D
-	mov	UN_INPUT, UN	C Clobbers D
-
-	mov	DINV, %rax
-	mul	U1
-	mov	%rax, Q0
-	add	U1, %rdx
-	mov	%rdx, T
-
-	mov	B2, %rax
-	mul	U1
-	mov	-8(UP, UN, 8), U0
-	mov	(UP, UN, 8), U1
-	mov	T, (QP, UN, 8)
-	add	%rax, U0
-	adc	%rdx, U1
-	sbb	U2, U2
-	dec	UN
-	mov	U1, %rax
-	jz	L(final)
-
-	ALIGN(16)
-
-	C Loop is 28 instructions, 30 decoder slots, should run in 10 cycles.
-	C At entry, %rax holds an extra copy of U1
-L(loop):
-	C {Q2, Q1, Q0} <-- DINV * U1 + B (Q0 + U2 DINV) + B^2 U2
-	C Remains to add in B (U1 + c)
-	mov	DINV, Q1
-	mov	U2, Q2
-	and	U2, Q1
-	neg	Q2
-	mul	DINV
-	add	%rdx, Q1
-	adc	$0, Q2
-	add	Q0, Q1
-	mov	%rax, Q0
-	mov	B2, %rax
-	lea	(B2md, U0), T
-	adc	$0, Q2
-
-	C {U2, U1, U0} <-- (U0 + U2 B2 -c U) B + U1 B2 + u
-	mul	U1
-	and	B2, U2
-	add	U2, U0
-	cmovnc	U0, T
-
-	C {QP+UN, ...} <-- {QP+UN, ...} + {Q2, Q1} + U1 + c
-	adc	U1, Q1
-	mov	-8(UP, UN, 8), U0
-	adc	Q2, 8(QP, UN, 8)
-	jc	L(q_incr)
-L(q_incr_done):
-	add	%rax, U0
-	mov	T, %rax
-	adc	%rdx, %rax
-	mov	Q1, (QP, UN, 8)
-	sbb	U2, U2
-	dec	UN
-	mov	%rax, U1
-	jnz	L(loop)
-
-L(final):
-	pop	D
-
-	mov	U2, Q1
-	and	D, U2
-	sub	U2, %rax
-	neg	Q1
-
-	mov	%rax, U1
-	sub	D, %rax
-	cmovc	U1, %rax
-	sbb	$-1, Q1
-
-	lea	1(%rax), T
-	mul	DINV
-	add	U0, %rax
-	adc	T, %rdx
-	mov	%rdx, T
-	imul	D, %rdx
-	sub	%rdx, U0
-	cmp	U0, %rax
-	lea	(U0, D), %rax
-	cmovnc	U0, %rax
-	sbb	$0, T
-	cmp	D, %rax
-	jc	L(div_done)
-	sub	D, %rax
-	add	$1, T
-L(div_done):
-	add	T, Q0
-	mov	Q0, (QP)
-	adc	Q1, 8(QP)
-	jnc	L(done)
-L(final_q_incr):
-	addq	$1, 16(QP)
-	lea	8(QP), QP
-	jc	L(final_q_incr)
-
-L(done):
-	pop	%rbp
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	FUNC_EXIT
-	ret
-
-L(q_incr):
-	C U1 is not live, so use it for indexing
-	lea	16(QP, UN, 8), U1
-L(q_incr_loop):
-	addq	$1, (U1)
-	jnc	L(q_incr_done)
-	lea	8(U1), U1
-	jmp	L(q_incr_loop)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/div_qr_2n_pi1.asm b/gmp/mpn/x86_64/div_qr_2n_pi1.asm
deleted file mode 100644
index 5e59a0ac5d..0000000000
--- a/gmp/mpn/x86_64/div_qr_2n_pi1.asm
+++ /dev/null
@@ -1,158 +0,0 @@
-dnl  x86-64 mpn_div_qr_2n_pi1
-dnl  -- Divide an mpn number by a normalized 2-limb number,
-dnl     using a single-limb inverse.
-
-dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		c/l
-C INPUT PARAMETERS
-define(`qp',		`%rdi')
-define(`rp',		`%rsi')
-define(`up_param',	`%rdx')
-define(`un',		`%rcx')
-define(`d1',		`%r8')
-define(`d0',		`%r9')
-define(`di_param',	`8(%rsp)')
-
-define(`di',		`%r10')
-define(`up',		`%r11')
-define(`u2',		`%rbx')
-define(`u1',		`%r12')
-define(`t1',		`%r13')
-define(`t0',		`%r14')
-define(`md1',		`%r15')
-
-C TODO
-C * Store qh in the same stack slot as di_param, instead of pushing
-C   it. (we could put it in register %rbp, but then we would need to
-C   save and restore that instead, which doesn't seem like a win).
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_div_qr_2n_pi1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-IFDOS(`	mov	64(%rsp), %r9	')
-IFDOS(`define(`di_param', `72(%rsp)')')
-	mov	di_param, di
-	mov	up_param, up
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	push	%rbx
-
-	mov	-16(up, un, 8), u1
-	mov	-8(up, un, 8), u2
-
-	mov	u1, t0
-	mov	u2, t1
-	sub	d0, t0
-	sbb	d1, t1
-	cmovnc  t0, u1
-	cmovnc	t1, u2
-	C push qh which is !carry
-	sbb	%rax, %rax
-	inc	%rax
-	push	%rax
-	lea	-2(un), un
-	mov	d1, md1
-	neg	md1
-
-	jmp	L(next)
-
-	ALIGN(16)
-L(loop):
-	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
-	C Based on the optimized divrem_2.asm code.
-
-	mov	di, %rax
-	mul	u2
-	mov	u1, t0
-	add	%rax, t0	C q0 in t0
-	adc	u2, %rdx
-	mov	%rdx, t1	C q in t1
-	imul	md1, %rdx
-	mov	d0, %rax
-	lea	(%rdx, u1), u2
-	mul	t1
-	mov	(up, un, 8), u1
-	sub	d0, u1
-	sbb	d1, u2
-	sub	%rax, u1
-	sbb	%rdx, u2
-	xor	R32(%rax), R32(%rax)
-	xor	R32(%rdx), R32(%rdx)
-	cmp	t0, u2
-	cmovnc	d0, %rax
-	cmovnc	d1, %rdx
-	adc	$0, t1
-	nop
-	add	%rax, u1
-	adc	%rdx, u2
-	cmp	d1, u2
-	jae	L(fix)
-L(bck):
-	mov	t1, (qp, un, 8)
-L(next):
-	sub	$1, un
-	jnc	L(loop)
-L(end):
-	mov	u2, 8(rp)
-	mov	u1, (rp)
-
-	C qh on stack
-	pop	%rax
-
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	FUNC_EXIT()
-	ret
-
-L(fix):	C Unlikely update. u2 >= d1
-	seta	%dl
-	cmp	d0, u1
-	setae	%al
-	orb	%dl, %al		C "orb" form to placate Sun tools
-	je	L(bck)
-	inc	t1
-	sub	d0, u1
-	sbb	d1, u2
-	jmp	L(bck)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/div_qr_2u_pi1.asm b/gmp/mpn/x86_64/div_qr_2u_pi1.asm
deleted file mode 100644
index 85af96fbf6..0000000000
--- a/gmp/mpn/x86_64/div_qr_2u_pi1.asm
+++ /dev/null
@@ -1,200 +0,0 @@
-dnl  x86-64 mpn_div_qr_2u_pi1
-dnl  -- Divide an mpn number by an unnormalized 2-limb number,
-dnl     using a single-limb inverse and shifting the dividend on the fly.
-
-dnl  Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		c/l
-C INPUT PARAMETERS
-define(`qp',		`%rdi')
-define(`rp',		`%rsi')
-define(`up_param',	`%rdx')
-define(`un_param',	`%rcx') dnl %rcx needed for shift count
-define(`d1',		`%r8')
-define(`d0',		`%r9')
-define(`shift_param',	`FRAME+8(%rsp)')
-define(`di_param',	`FRAME+16(%rsp)')
-
-define(`di',		`%r10')
-define(`up',		`%r11')
-define(`un',		`%rbp')
-define(`u2',		`%rbx')
-define(`u1',		`%r12')
-define(`u0',		`%rsi') dnl Same as rp, which is saved and restored.
-define(`t1',		`%r13')
-define(`t0',		`%r14')
-define(`md1',		`%r15')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-deflit(`FRAME', 0)
-PROLOGUE(mpn_div_qr_2u_pi1)
-	mov	di_param, di
-	mov	up_param, up
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	push	%rbx
-	push	%rbp
-	push	rp
-deflit(`FRAME', 56)
-	lea	-2(un_param), un
-	mov	d1, md1
-	neg	md1
-
-	C int parameter, 32 bits only
-	movl	shift_param, R32(%rcx)
-
-	C FIXME: Different code for SHLD_SLOW
-
-	xor	R32(u2), R32(u2)
-	mov	8(up, un, 8), u1
-	shld	%cl, u1, u2
-	C Remains to read (up, un, 8) and shift u1, u0
-	C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
-	mov	di, %rax
-	mul	u2
-	mov	(up, un, 8), u0
-	shld	%cl, u0, u1
-	mov	u1, t0
-	add	%rax, t0	C q0 in t0
-	adc	u2, %rdx
-	mov	%rdx, t1	C q in t1
-	imul	md1, %rdx
-	mov	d0, %rax
-	lea	(%rdx, u1), u2
-	mul	t1
-	mov	u0, u1
-	shl	%cl, u1
-	sub	d0, u1
-	sbb	d1, u2
-	sub	%rax, u1
-	sbb	%rdx, u2
-	xor	R32(%rax), R32(%rax)
-	xor	R32(%rdx), R32(%rdx)
-	cmp	t0, u2
-	cmovnc	d0, %rax
-	cmovnc	d1, %rdx
-	adc	$0, t1
-	nop
-	add	%rax, u1
-	adc	%rdx, u2
-	cmp	d1, u2
-	jae	L(fix_qh)
-L(bck_qh):
-	push	t1	C push qh on stack
-
-	jmp	L(next)
-
-	ALIGN(16)
-L(loop):
-	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
-	C Based on the optimized divrem_2.asm code.
-
-	mov	di, %rax
-	mul	u2
-	mov	(up, un, 8), u0
-	xor	R32(t1), R32(t1)
-	shld	%cl, u0, t1
-	or	t1, u1
-	mov	u1, t0
-	add	%rax, t0	C q0 in t0
-	adc	u2, %rdx
-	mov	%rdx, t1	C q in t1
-	imul	md1, %rdx
-	mov	d0, %rax
-	lea	(%rdx, u1), u2
-	mul	t1
-	mov	u0, u1
-	shl	%cl, u1
-	sub	d0, u1
-	sbb	d1, u2
-	sub	%rax, u1
-	sbb	%rdx, u2
-	xor	R32(%rax), R32(%rax)
-	xor	R32(%rdx), R32(%rdx)
-	cmp	t0, u2
-	cmovnc	d0, %rax
-	cmovnc	d1, %rdx
-	adc	$0, t1
-	nop
-	add	%rax, u1
-	adc	%rdx, u2
-	cmp	d1, u2
-	jae	L(fix)
-L(bck):
-	mov	t1, (qp, un, 8)
-L(next):
-	sub	$1, un
-	jnc	L(loop)
-L(end):
-	C qh on stack
-	pop	%rax
-	pop	rp
-	shrd	%cl, u2, u1
-	shr	%cl, u2
-	mov	u2, 8(rp)
-	mov	u1, (rp)
-
-	pop	%rbp
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	ret
-
-L(fix):	C Unlikely update. u2 >= d1
-	seta	%dl
-	cmp	d0, u1
-	setae	%al
-	orb	%dl, %al		C "orb" form to placate Sun tools
-	je	L(bck)
-	inc	t1
-	sub	d0, u1
-	sbb	d1, u2
-	jmp	L(bck)
-
-C Duplicated, just jumping back to a different address.
-L(fix_qh):	C Unlikely update. u2 >= d1
-	seta	%dl
-	cmp	d0, u1
-	setae	%al
-	orb	%dl, %al		C "orb" form to placate Sun tools
-	je	L(bck_qh)
-	inc	t1
-	sub	d0, u1
-	sbb	d1, u2
-	jmp	L(bck_qh)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/dive_1.asm b/gmp/mpn/x86_64/dive_1.asm
index 988bdab632..4889faccb5 100644
--- a/gmp/mpn/x86_64/dive_1.asm
+++ b/gmp/mpn/x86_64/dive_1.asm
@@ -1,44 +1,31 @@
 dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
 
-dnl  Copyright 2001, 2002, 2004-2006, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	10
-C AMD K10	10
-C Intel P4	33
-C Intel core2	13.25
-C Intel corei	14
-C Intel atom	42
-C VIA nano	43
+C K8,K9:	10
+C K10:		10
+C P4:		33
+C P6-15 (Core2):13.25
+C P6-28 (Atom):	42
 
 C A quick adoption of the 32-bit K7 code.
 
@@ -49,66 +36,67 @@ C up		rsi
 C n		rdx
 C divisor	rcx
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_divexact_1)
-	FUNC_ENTRY(4)
-	push	%rbx
+	pushq	%rbx
 
-	mov	%rcx, %rax
-	xor	R32(%rcx), R32(%rcx)	C shift count
-	mov	%rdx, %r8
+	movq	%rcx, %rax
+	movl	$0, %ecx		C shift count
+	movq	%rdx, %r8
 
-	bt	$0, R32(%rax)
+	btl	$0, %eax
 	jnc	L(evn)			C skip bsfq unless divisor is even
 
-L(odd):	mov	%rax, %rbx
-	shr	R32(%rax)
-	and	$127, R32(%rax)		C d/2, 7 bits
+L(odd):	movq	%rax, %rbx
+	shrl	%eax
+	andl	$127, %eax		C d/2, 7 bits
 
-	LEA(	binvert_limb_table, %rdx)
+ifdef(`PIC',`
+	movq	binvert_limb_table@GOTPCREL(%rip), %rdx
+',`
+	movabsq	$binvert_limb_table, %rdx
+')
 
-	movzbl	(%rdx,%rax), R32(%rax)	C inv 8 bits
+	movzbl	(%rax,%rdx), %eax	C inv 8 bits
 
-	mov	%rbx, %r11		C d without twos
+	movq	%rbx, %r11		C d without twos
 
-	lea	(%rax,%rax), R32(%rdx)	C 2*inv
-	imul	R32(%rax), R32(%rax)	C inv*inv
-	imul	R32(%rbx), R32(%rax)	C inv*inv*d
-	sub	R32(%rax), R32(%rdx)	C inv = 2*inv - inv*inv*d, 16 bits
+	leal	(%rax,%rax), %edx	C 2*inv
+	imull	%eax, %eax		C inv*inv
+	imull	%ebx, %eax		C inv*inv*d
+	subl	%eax, %edx		C inv = 2*inv - inv*inv*d, 16 bits
 
-	lea	(%rdx,%rdx), R32(%rax)	C 2*inv
-	imul	R32(%rdx), R32(%rdx)	C inv*inv
-	imul	R32(%rbx), R32(%rdx)	C inv*inv*d
-	sub	R32(%rdx), R32(%rax)	C inv = 2*inv - inv*inv*d, 32 bits
+	leal	(%rdx,%rdx), %eax	C 2*inv
+	imull	%edx, %edx		C inv*inv
+	imull	%ebx, %edx		C inv*inv*d
+	subl	%edx, %eax		C inv = 2*inv - inv*inv*d, 32 bits
 
-	lea	(%rax,%rax), %r10	C 2*inv
-	imul	%rax, %rax		C inv*inv
-	imul	%rbx, %rax		C inv*inv*d
-	sub	%rax, %r10		C inv = 2*inv - inv*inv*d, 64 bits
+	leaq	(%rax,%rax), %rdx	C 2*inv
+	imulq	%rax, %rax		C inv*inv
+	imulq	%rbx, %rax		C inv*inv*d
+	subq	%rax, %rdx		C inv = 2*inv - inv*inv*d, 64 bits
 
-	lea	(%rsi,%r8,8), %rsi	C up end
-	lea	-8(%rdi,%r8,8), %rdi	C rp end
-	neg	%r8			C -n
+	leaq	(%rsi,%r8,8), %rsi	C up end
+	leaq	-8(%rdi,%r8,8), %rdi	C rp end
+	negq	%r8			C -n
 
-	mov	(%rsi,%r8,8), %rax	C up[0]
+	movq	%rdx, %r10		C final inverse
+	movq	(%rsi,%r8,8), %rax	C up[0]
 
-	inc	%r8
+	incq	%r8
 	jz	L(one)
 
-	mov	(%rsi,%r8,8), %rdx	C up[1]
+	movq	(%rsi,%r8,8), %rdx	C up[1]
 
-	shrd	R8(%rcx), %rdx, %rax
+	shrdq	%cl, %rdx, %rax
 
-	xor	R32(%rbx), R32(%rbx)
-	jmp	L(ent)
+	xorl	%ebx, %ebx
+	jmp	L(entry)
 
-L(evn):	bsf	%rax, %rcx
-	shr	R8(%rcx), %rax
+L(evn):	bsfq	%rax, %rcx
+	shrq	%cl, %rax
 	jmp	L(odd)
 
 	ALIGN(8)
@@ -120,39 +108,54 @@ L(top):
 	C rsi	up end
 	C rdi	rp end
 	C r8	counter, limbs, negative
-	C r10	d^(-1) mod 2^64
-	C r11	d, shifted down
-
-	mul	%r11			C carry limb in rdx	0 10
-	mov	-8(%rsi,%r8,8), %rax	C
-	mov	(%rsi,%r8,8), %r9	C
-	shrd	R8(%rcx), %r9, %rax	C
-	nop				C
-	sub	%rbx, %rax		C apply carry bit
-	setc	%bl			C
-	sub	%rdx, %rax		C apply carry limb	5
-	adc	$0, %rbx		C			6
-L(ent):	imul	%r10, %rax		C			6
-	mov	%rax, (%rdi,%r8,8)	C
-	inc	%r8			C
+
+	mulq	%r11			C carry limb in rdx
+
+	movq	-8(%rsi,%r8,8), %rax
+	movq	(%rsi,%r8,8), %r9
+
+	shrdq	%cl, %r9, %rax
+	nop
+
+	subq	%rbx, %rax		C apply carry bit
+	setc	%bl
+
+	subq	%rdx, %rax		C apply carry limb
+	adcq	$0, %rbx
+
+L(entry):
+	imulq	%r10, %rax
+
+	movq	%rax, (%rdi,%r8,8)
+	incq	%r8
 	jnz	L(top)
 
-	mul	%r11			C carry limb in rdx
-	mov	-8(%rsi), %rax		C up high limb
-	shr	R8(%rcx), %rax
-	sub	%rbx, %rax		C apply carry bit
-	sub	%rdx, %rax		C apply carry limb
-	imul	%r10, %rax
-	mov	%rax, (%rdi)
-	pop	%rbx
-	FUNC_EXIT()
+
+	mulq	%r11			C carry limb in rdx
+
+	movq	-8(%rsi), %rax		C up high limb
+	shrq	%cl, %rax
+
+	subq	%rbx, %rax		C apply carry bit
+
+	subq	%rdx, %rax		C apply carry limb
+
+	imulq	%r10, %rax
+
+	movq	%rax, (%rdi)
+
+	popq	%rbx
 	ret
 
-L(one):	shr	R8(%rcx), %rax
-	imul	%r10, %rax
-	mov	%rax, (%rdi)
-	pop	%rbx
-	FUNC_EXIT()
+
+L(one):
+	shrq	%cl, %rax
+
+	imulq	%r10, %rax
+
+	movq	%rax, (%rdi)
+
+	popq	%rbx
 	ret
 
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/divrem_1.asm b/gmp/mpn/x86_64/divrem_1.asm
index 91928d9aa3..2f3e95a839 100644
--- a/gmp/mpn/x86_64/divrem_1.asm
+++ b/gmp/mpn/x86_64/divrem_1.asm
@@ -1,44 +1,42 @@
 dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
 
-dnl  Copyright 2004, 2005, 2007-2012 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C		norm	unorm	frac
-C AMD K8,K9	13	13	12
-C AMD K10	13	13	12
-C Intel P4	43	44	43
-C Intel core2	24.5	24.5	19.5
-C Intel corei	20.5	19.5	18
-C Intel atom	43	46	36
-C VIA nano	25.5	25.5	24
+C K8		13	13	12
+C P4		44.2	44.2	42.3
+C P6-15 (Core2)	24.5	24.5	19.3
+C P6-15 (Atom)	42	52	37
+
+C TODO
+C  * Compute the inverse without relying on the div instruction.
+C    Newton's method and mulq, or perhaps the faster fdiv.
+C  * Tune prologue.
+C  * Optimize for Core 2.
+
+C The code for unnormalized divisors works also for normalized divisors, but
+C for some reason it runs really slowly (on K8) for that case.  Use special
+C code until we can address this.  The Intel Atom is also affected, but
+C understandably (shld slowness).
+define(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',1)
 
 C mp_limb_t
 C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
@@ -67,20 +65,11 @@ define(`un',		`%rbx')
 C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
 C         cnt         qp      d  dinv
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFSTD(`define(`CNTOFF',		`40($1)')')
-IFDOS(`define(`CNTOFF',		`104($1)')')
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_preinv_divrem_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-IFDOS(`	mov	64(%rsp), %r9	')
-	xor	R32(%rax), R32(%rax)
+	xor	%eax, %eax
 	push	%r13
 	push	%r12
 	push	%rbp
@@ -95,17 +84,14 @@ IFDOS(`	mov	64(%rsp), %r9	')
 
 	test	d, d
 	js	L(nent)
-
-	mov	CNTOFF(%rsp), R8(cnt)
+	mov	40(%rsp), R8(cnt)
 	shl	R8(cnt), d
 	jmp	L(uent)
 EPILOGUE()
 
 	ALIGN(16)
 PROLOGUE(mpn_divrem_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	xor	R32(%rax), R32(%rax)
+	xor	%eax, %eax
 	push	%r13
 	push	%r12
 	push	%rbp
@@ -120,6 +106,8 @@ IFDOS(`	mov	56(%rsp), %r8	')
 	lea	-8(qp,un_param,8), qp
 	xor	R32(%rbp), R32(%rbp)
 
+
+ifdef(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',`
 	test	d, d
 	jns	L(unnormalized)
 
@@ -130,54 +118,50 @@ L(normalized):
 	dec	un
 	mov	%rbp, %rax
 	sub	d, %rbp
-	cmovc	%rax, %rbp
-	sbb	R32(%rax), R32(%rax)
-	inc	R32(%rax)
+	cmovb	%rax, %rbp
+	sbb	%eax, %eax
+	inc	%eax
 	mov	%rax, (qp)
 	lea	-8(qp), qp
 L(8):
-IFSTD(`	push	%rdi		')
-IFSTD(`	push	%rsi		')
-	push	%r8
-IFSTD(`	mov	d, %rdi		')
-IFDOS(`	mov	d, %rcx		')
-	CALL(	mpn_invert_limb)
-	pop	%r8
-IFSTD(`	pop	%rsi		')
-IFSTD(`	pop	%rdi		')
-
+	mov	d, %rdx
+	mov	$-1, %rax
+	not	%rdx
+	div	d			C FREE rax rdx rcx r9 r10 r11
 	mov	%rax, dinv
 	mov	%rbp, %rax
 	jmp	L(nent)
 
 	ALIGN(16)
-L(ntop):mov	(up,un,8), %r10		C	    K8-K10  P6-CNR P6-NHM  P4
-	mul	dinv			C	      0,13   0,20   0,18   0,45
-	add	%r10, %rax		C	      4      8      3     12
-	adc	%rbp, %rdx		C	      5      9     10     13
-	mov	%rax, %rbp		C	      5      9      4     13
-	mov	%rdx, %r13		C	      6     11     12     23
-	imul	d, %rdx			C	      6     11     11     23
-	sub	%rdx, %r10		C	     10     16     14     33
+L(nloop):				C		    cycK8  cycP6  cycP4
+	mov	(up,un,8), %r10		C
+	lea	1(%rax), %rbp		C
+	mul	dinv			C		     0,13   0,19  0,45
+	add	%r10, %rax		C		     4      8     12
+	adc	%rbp, %rdx		C		     5      9     13
+	mov	%rax, %rbp		C		     5      9     13
+	mov	%rdx, %r13		C		     6      11    23
+	imul	d, %rdx			C		     6      11    23
+	sub	%rdx, %r10		C		     10     16    33
 	mov	d, %rax			C
-	add	%r10, %rax		C	     11     17     15     34
-	cmp	%rbp, %r10		C	     11     17     15     34
-	cmovc	%r10, %rax		C	     12     18     16     35
+	add	%r10, %rax		C		     11     17    34
+	cmp	%rbp, %r10		C		     11     17    34
+	cmovb	%r10, %rax		C		     12     18    35
 	adc	$-1, %r13		C
 	cmp	d, %rax			C
 	jae	L(nfx)			C
 L(nok):	mov	%r13, (qp)		C
 	sub	$8, qp			C
-L(nent):lea	1(%rax), %rbp		C
-	dec	un			C
-	jns	L(ntop)			C
+L(nent):dec	un			C
+	jns	L(nloop)		C
 
-	xor	R32(%rcx), R32(%rcx)
+	xor	%ecx, %ecx
 	jmp	L(87)
 
 L(nfx):	sub	d, %rax
 	inc	%r13
 	jmp	L(nok)
+')
 
 L(unnormalized):
 	test	un, un
@@ -192,42 +176,30 @@ L(unnormalized):
 	dec	un
 L(44):
 	bsr	d, %rcx
-	not	R32(%rcx)
-	shl	R8(%rcx), d
-	shl	R8(%rcx), %rbp
-
-	push	%rcx
-IFSTD(`	push	%rdi		')
-IFSTD(`	push	%rsi		')
-	push	%r8
-IFSTD(`	mov	d, %rdi		')
-IFDOS(`	mov	d, %rcx		')
-	CALL(	mpn_invert_limb)
-	pop	%r8
-IFSTD(`	pop	%rsi		')
-IFSTD(`	pop	%rdi		')
-	pop	%rcx
-
+	not	%ecx
+	sal	%cl, d
+	sal	%cl, %rbp
+	mov	d, %rdx
+	mov	$-1, %rax
+	not	%rdx
+	div	d			C FREE rax rdx r9 r10 r11
+	test	un, un
 	mov	%rax, dinv
 	mov	%rbp, %rax
-	test	un, un
 	je	L(87)
-
-L(uent):dec	un
-	mov	(up,un,8), %rbp
-	neg	R32(%rcx)
-	shr	R8(%rcx), %rbp
-	neg	R32(%rcx)
-	or	%rbp, %rax
-	jmp	L(ent)
+L(uent):
+	mov	-8(up,un,8), %rbp
+	shr	%cl, %rax
+	shld	%cl, %rbp, %rax
+	sub	$2, un
+	js	L(ulast)
 
 	ALIGN(16)
-L(utop):mov	(up,un,8), %r10
-	shl	R8(%rcx), %rbp
-	neg	R32(%rcx)
-	shr	R8(%rcx), %r10
-	neg	R32(%rcx)
-	or	%r10, %rbp
+L(uloop):
+	nop
+	mov	(up,un,8), %r10
+	lea	1(%rax), %r11
+	shld	%cl, %r10, %rbp
 	mul	dinv
 	add	%rbp, %rax
 	adc	%r11, %rdx
@@ -238,18 +210,18 @@ L(utop):mov	(up,un,8), %r10
 	mov	d, %rax
 	add	%rbp, %rax
 	cmp	%r11, %rbp
-	cmovc	%rbp, %rax
+	cmovb	%rbp, %rax
 	adc	$-1, %r13
 	cmp	d, %rax
 	jae	L(ufx)
 L(uok):	mov	%r13, (qp)
 	sub	$8, qp
-L(ent):	mov	(up,un,8), %rbp
 	dec	un
+	mov	%r10, %rbp
+	jns	L(uloop)
+L(ulast):
 	lea	1(%rax), %r11
-	jns	L(utop)
-
-L(uend):shl	R8(%rcx), %rbp
+	sal	%cl, %rbp
 	mul	dinv
 	add	%rbp, %rax
 	adc	%r11, %rdx
@@ -260,47 +232,48 @@ L(uend):shl	R8(%rcx), %rbp
 	mov	d, %rax
 	add	%rbp, %rax
 	cmp	%r11, %rbp
-	cmovc	%rbp, %rax
+	cmovb	%rbp, %rax
 	adc	$-1, %r13
 	cmp	d, %rax
-	jae	L(efx)
-L(eok):	mov	%r13, (qp)
+	jae	L(93)
+L(69):	mov	%r13, (qp)
 	sub	$8, qp
 	jmp	L(87)
 
 L(ufx):	sub	d, %rax
 	inc	%r13
 	jmp	L(uok)
-L(efx):	sub	d, %rax
+
+L(93):	sub	d, %rax
 	inc	%r13
-	jmp	L(eok)
+	jmp	L(69)
 
 L(87):	mov	d, %rbp
 	neg	%rbp
-	jmp	L(fent)
-
-	ALIGN(16)			C	    K8-K10  P6-CNR P6-NHM  P4
-L(ftop):mul	dinv			C	      0,12   0,17   0,17
-	add	%r11, %rdx		C	      5      8     10
-	mov	%rax, %r11		C	      4      8      3
-	mov	%rdx, %r13		C	      6      9     11
-	imul	%rbp, %rdx		C	      6      9     11
+	jmp	L(87b)
+
+	ALIGN(16)
+L(floop):				C		    cycK8  cycP6  cycP4
+	lea	1(%rax), %r11		C
+	mul	dinv			C		     0,12
+	add	%r11, %rdx		C		     5
+	mov	%rax, %r11		C		     4
+	mov	%rdx, %r13		C		     6
+	imul	%rbp, %rdx		C		     6
 	mov	d, %rax			C
-	add	%rdx, %rax		C	     10     14     14
-	cmp	%r11, %rdx		C	     10     14     14
-	cmovc	%rdx, %rax		C	     11     15     15
+	add	%rdx, %rax		C		     10
+	cmp	%r11, %rdx		C		     10
+	cmovb	%rdx, %rax		C		     11
 	adc	$-1, %r13		C
 	mov	%r13, (qp)		C
 	sub	$8, qp			C
-L(fent):lea	1(%rax), %r11		C
-	dec	fn			C
-	jns	L(ftop)			C
+L(87b):	dec	fn			C
+	jns	L(floop)		C
 
-	shr	R8(%rcx), %rax
+	shr	%cl, %rax
 L(ret):	pop	%rbx
 	pop	%rbp
 	pop	%r12
 	pop	%r13
-	FUNC_EXIT()
 	ret
-EPILOGUE()
+EPILOGUE(mpn_divrem_1)
diff --git a/gmp/mpn/x86_64/divrem_2.asm b/gmp/mpn/x86_64/divrem_2.asm
index 66c2da1a05..37053ba88d 100644
--- a/gmp/mpn/x86_64/divrem_2.asm
+++ b/gmp/mpn/x86_64/divrem_2.asm
@@ -1,52 +1,37 @@
 dnl  x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
 
-dnl  Copyright 2007, 2008, 2010 Free Software Foundation, Inc.
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C	     cycles/limb	best
-C AMD K8,K9	18
-C AMD K10	18
-C AMD bull
-C AMD pile
-C AMD bobcat
-C AMD jaguar
-C Intel P4	68
-C Intel core	34
-C Intel NHM	30.25
-C Intel SBR	21.3
-C Intel IBR	21.4
-C Intel HWL	20.6
-C Intel BWL
-C Intel atom	73
-C VIA nano	33
+C		norm	frac
+C K8		20	20
+C P4		73	73
+C P6-15		37	37
+
+C TODO
+C  * Perhaps compute the inverse without relying on divq?  Could either use
+C    Newton's method and mulq, or perhaps the faster fdiv.
+C  * The loop has not been carefully tuned, nor analysed for critical path
+C    length.  It seems that 20 c/l is a bit long, compared to the 13 c/l for
+C    mpn_divrem_1.
+C  * Clean up.  This code is really crude.
 
 
 C INPUT PARAMETERS
@@ -56,117 +41,168 @@ define(`up_param',	`%rdx')
 define(`un_param',	`%rcx')
 define(`dp',		`%r8')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`dinv',		`%r9')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
+C         cnt         qp      d  dinv
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_divrem_2)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
+
 	push	%r15
+	lea	(%rdx,%rcx,8), %rax
 	push	%r14
 	push	%r13
-	push	%r12
-	lea	-24(%rdx,%rcx,8), %r12	C r12 = &up[un-1]
 	mov	%rsi, %r13
+	push	%r12
+	lea	-24(%rax), %r12
 	push	%rbp
 	mov	%rdi, %rbp
 	push	%rbx
-	mov	8(%r8), %r11		C d1
-	mov	16(%r12), %rbx
-	mov	(%r8), %r8		C d0
-	mov	8(%r12), %r10
-
+	mov	8(%r8), %r11
+	mov	-8(%rax), %r9
+	mov	(%r8), %r8
+	mov	-16(%rax), %r10
 	xor	R32(%r15), R32(%r15)
-	cmp	%rbx, %r11
+	cmp	%r9, %r11
 	ja	L(2)
 	setb	%dl
 	cmp	%r10, %r8
 	setbe	%al
-	orb	%al, %dl		C "orb" form to placate Sun tools
-	je	L(2)
-	inc	R32(%r15)
-	sub	%r8, %r10
-	sbb	%r11, %rbx
+	or	%al, %dl
+	jne	L(23)
 L(2):
-	lea	-3(%rcx,%r13), %r14	C un + fn - 3
-	test	%r14, %r14
-	js	L(end)
-
-	push	%r8
-	push	%r10
-	push	%r11
-IFSTD(`	mov	%r11, %rdi	')
-IFDOS(`	mov	%r11, %rcx	')
-	CALL(	mpn_invert_limb)
-	pop	%r11
-	pop	%r10
-	pop	%r8
-
+	lea	-3(%rcx,%r13), %rbx	C un + fn - 3
+	test	%rbx, %rbx
+	js	L(6)
+	mov	%r11, %rdx
+	mov	$-1, %rax
+	not	%rdx
+	div	%r11
 	mov	%r11, %rdx
 	mov	%rax, %rdi
 	imul	%rax, %rdx
-	mov	%rdx, %r9
+	mov	%rdx, %r14
 	mul	%r8
-	xor	R32(%rcx), R32(%rcx)
-	add	%r8, %r9
-	adc	$-1, %rcx
-	add	%rdx, %r9
-	adc	$0, %rcx
-	js	2f
-1:	dec	%rdi
-	sub	%r11, %r9
-	sbb	$0, %rcx
-	jns	1b
-2:
-
-	lea	(%rbp,%r14,8), %rbp
+	mov	%rdx, %rcx
+	mov	$-1, %rdx
+	add	%r8, %r14
+	adc	$0, %rdx
+	add	%rcx, %r14
+	adc	$0, %rdx
+	js	L(8)
+L(18):
+	dec	%rdi
+	sub	%r11, %r14
+	sbb	$0, %rdx
+	jns	L(18)
+L(8):
+
+C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+C n2      un      n1 dinv qp  d0        d1  up  fn      msl
+C     n2  un     -d1      n1    dinv XX              XX
+
+ifdef(`NEW',`
+	lea	(%rbp,%rbx,8), %rbp
+	mov	%rbx, %rcx		C un
+	mov	%r9, %rbx
+	mov	%rdi, %r9		C di
+	mov	%r10, %r14
 	mov	%r11, %rsi
 	neg	%rsi			C -d1
-
-C rax rbx rcx rdx rsi rdi  rbp r8 r9 r10 r11 r12 r13 r14 r15
-C     n2  un      -d1 dinv qp  d0 q0     d1  up  fn      msl
-
 	ALIGN(16)
-L(top):	mov	%rdi, %rax		C di		ncp
-	mul	%rbx			C		0, 17
-	mov	%r10, %rcx		C
-	add	%rax, %rcx		C		4
+L(loop):
+	mov	%r9, %rax		C di		ncp
+	mul	%rbx			C		0, 18
+	add	%r14, %rax		C		4
+	mov	%rax, %r10		C q0		5
 	adc	%rbx, %rdx		C		5
-	mov	%rdx, %r9		C q		6
+	mov	%rdx, %rdi		C q		6
 	imul	%rsi, %rdx		C		6
 	mov	%r8, %rax		C		ncp
-	lea	(%rdx, %r10), %rbx	C n1 -= ...	10
-	xor	R32(%r10), R32(%r10)	C
-	mul	%r9			C		7
-	cmp	%r14, %r13		C
+	lea	(%rdx, %r14), %rbx	C n1 -= ...	7
+	mul	%rdi			C		7
+	xor	R32(%r14), R32(%r14)	C
+	cmp	%rcx, %r13		C
 	jg	L(19)			C
-	mov	(%r12), %r10		C
+	mov	(%r12), %r14		C
 	sub	$8, %r12		C
-L(19):	sub	%r8, %r10		C		ncp
-	sbb	%r11, %rbx		C		11
-	sub	%rax, %r10		C		11
+L(19):	sub	%r8, %r14		C		ncp
+	sbb	%r11, %rbx		C		9
+	sub	%rax, %r14		C		11
 	sbb	%rdx, %rbx		C		12
-	xor	R32(%rax), R32(%rax)	C
+	inc	%rdi			C		7
 	xor	R32(%rdx), R32(%rdx)	C
-	cmp	%rcx, %rbx		C		13
-	cmovnc	%r8, %rax		C		14
-	cmovnc	%r11, %rdx		C		14
-	adc	$0, %r9			C adjust q	14
-	nop
-	add	%rax, %r10		C		15
+	cmp	%r10, %rbx		C		13
+	mov	%r8, %rax		C d1		ncp
+	adc	$-1, %rdx		C mask		14
+	add	%rdx, %rdi		C q--		15
+	and	%rdx, %rax		C d0 or 0	15
+	and	%r11, %rdx		C d1 or 0	15
+	add	%rax, %r14		C		16
 	adc	%rdx, %rbx		C		16
-	cmp	%r11, %rbx		C
+	cmp	%r11, %rbx		C		17
 	jae	L(fix)			C
-L(bck):	mov	%r9, (%rbp)		C
+L(bck):	mov	%rdi, (%rbp)		C
 	sub	$8, %rbp		C
-	dec	%r14
-	jns	L(top)
-
-L(end):	mov	%r10, 8(%r12)
-	mov	%rbx, 16(%r12)
+	dec	%rcx
+	jns	L(loop)
+
+	mov	%r14, %r10
+	mov	%rbx, %r9
+',`
+	lea	(%rbp,%rbx,8), %rbp
+	mov	%rbx, %rcx
+	mov	%r9, %rax
+	mov	%r10, %rsi
+	ALIGN(16)
+L(loop):
+	mov	%rax, %r14		C		0, 19
+	mul	%rdi			C		0
+	mov	%r11, %r9		C		1
+	add	%rsi, %rax		C		4
+	mov	%rax, %rbx		C q0		5
+	adc	%r14, %rdx		C q		5
+	lea	1(%rdx), %r10		C		6
+	mov	%rdx, %rax		C		6
+	imul	%rdx, %r9		C		6
+	sub	%r9, %rsi		C		10
+	xor	R32(%r9), R32(%r9)	C
+	mul	%r8			C		7
+	cmp	%rcx, %r13		C
+	jg	L(13)			C
+	mov	(%r12), %r9		C
+	sub	$8, %r12		C
+L(13):	sub	%r8, %r9		C		ncp
+	sbb	%r11, %rsi		C		11
+	sub	%rax, %r9		C		11
+	sbb	%rdx, %rsi		C		12
+	cmp	%rbx, %rsi		C		13
+	sbb	%rax, %rax		C		14
+	not	%rax			C		15
+	add	%rax, %r10		C		16
+	mov	%r8, %rbx		C		ncp
+	and	%rax, %rbx		C		16
+	and	%r11, %rax		C		16
+	add	%rbx, %r9		C		17
+	adc	%rsi, %rax		C		18
+	cmp	%rax, %r11		C		19
+	jbe	L(fix)			C
+L(bck):	mov	%r10, (%rbp)		C
+	sub	$8, %rbp		C
+	mov	%r9, %rsi		C		18
+	dec	%rcx
+	jns	L(loop)
+
+	mov	%rsi, %r10
+	mov	%rax, %r9
+')
+L(6):
+	mov	%r10, 8(%r12)
+	mov	%r9, 16(%r12)
 	pop	%rbx
 	pop	%rbp
 	pop	%r12
@@ -174,16 +210,30 @@ L(end):	mov	%r10, 8(%r12)
 	pop	%r14
 	mov	%r15, %rax
 	pop	%r15
-	FUNC_EXIT()
 	ret
 
+L(23):	inc	R32(%r15)
+	sub	%r8, %r10
+	sbb	%r11, %r9
+	jmp	L(2)
+
+ifdef(`NEW',`
 L(fix):	seta	%dl
-	cmp	%r8, %r10
+	cmp	%r8, %r14
 	setae	%al
-	orb	%dl, %al		C "orb" form to placate Sun tools
+	orb	%dl, %al
 	je	L(bck)
-	inc	%r9
-	sub	%r8, %r10
+	inc	%rdi
+	sub	%r8, %r14
 	sbb	%r11, %rbx
 	jmp	L(bck)
+',`
+L(fix):	jb	L(88)
+	cmp	%r8, %r9
+	jb	L(bck)
+L(88):	inc	%r10
+	sub	%r8, %r9
+	sbb	%r11, %rax
+	jmp	L(bck)
+')
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/dos64.m4 b/gmp/mpn/x86_64/dos64.m4
deleted file mode 100644
index 9414623b56..0000000000
--- a/gmp/mpn/x86_64/dos64.m4
+++ /dev/null
@@ -1,100 +0,0 @@
-divert(-1)
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-define(`HOST_DOS64')
-
-
-dnl  On DOS64 we always generate position-independent-code
-dnl
-
-define(`PIC')
-
-
-define(`LEA',`
-	lea	$1(%rip), $2
-')
-
-
-dnl  Usage: CALL(funcname)
-dnl
-dnl  Simply override the definition in x86_64-defs.m4.
-
-define(`CALL',`call	GSYM_PREFIX`'$1')
-
-
-dnl  Usage: JUMPTABSECT
-
-define(`JUMPTABSECT', `RODATA')
-
-
-dnl  Usage: JMPENT(targlabel,tablabel)
-
-define(`JMPENT', `.long	$1-$2')
-
-
-dnl  Usage: FUNC_ENTRY(nregparmas)
-dnl  Usage: FUNC_EXIT()
-
-dnl  FUNC_ENTRY and FUNC_EXIT provide an easy path for adoption of standard
-dnl  ABI assembly to the DOS64 ABI.
-
-define(`FUNC_ENTRY',
-	`push	%rdi
-	push	%rsi
-	mov	%rcx, %rdi
-ifelse(eval($1>=2),1,`dnl
-	mov	%rdx, %rsi
-ifelse(eval($1>=3),1,`dnl
-	mov	%r8, %rdx
-ifelse(eval($1>=4),1,`dnl
-	mov	%r9, %rcx
-')')')')
-
-define(`FUNC_EXIT',
-	`pop	%rsi
-	pop	%rdi')
-
-
-dnl  Target ABI macros.  For DOS64 we override the defaults.
-
-define(`IFDOS',   `$1')
-define(`IFSTD',   `')
-define(`IFELF',   `')
-
-
-dnl  Usage: PROTECT(symbol)
-dnl
-dnl  Used for private GMP symbols that should never be overridden by users.
-dnl  This can save reloc entries and improve shlib sharing as well as
-dnl  application startup times
-
-define(`PROTECT',  `')
-
-
-divert`'dnl
diff --git a/gmp/mpn/x86_64/fastavx/copyd.asm b/gmp/mpn/x86_64/fastavx/copyd.asm
deleted file mode 100644
index 41c55de5ca..0000000000
--- a/gmp/mpn/x86_64/fastavx/copyd.asm
+++ /dev/null
@@ -1,171 +0,0 @@
-dnl  AMD64 mpn_copyd optimised for CPUs with fast AVX.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003, 2005, 2007, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb aligned	      unaligned	      best seen	     for cpu?
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bull	n/a
-C AMD pile	 4.87		 4.87				N
-C AMD steam	 ?		 ?
-C AMD bobcat	n/a
-C AMD jaguar	n/a
-C Intel P4	n/a
-C Intel core	n/a
-C Intel NHM	n/a
-C Intel SBR	 0.50		 0.91				N
-C Intel IBR	 ?
-C Intel HWL	 0.25		 0.30				Y
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C We try to do as many 32-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.  For the bulk copying, we
-C write using aligned 32-byte operations, but we read with both aligned and
-C unaligned 32-byte operations.
-
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-dnl define(`vmovdqu', vlddqu)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_copyd)
-	FUNC_ENTRY(3)
-
-	lea	-32(rp,n,8), rp
-	lea	-32(up,n,8), up
-
-	cmp	$7, n			C basecase needed for correctness
-	jbe	L(bc)
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(a2)			C jump if rp aligned
-	mov	24(up), %rax
-	lea	-8(up), up
-	mov	%rax, 24(rp)
-	lea	-8(rp), rp
-	dec	n
-L(a2):	test	$16, R8(rp)		C is rp 32-byte aligned?
-	jz	L(a3)			C jump if rp aligned
-	vmovdqu	16(up), %xmm0
-	lea	-16(up), up
-	vmovdqa	%xmm0, 16(rp)
-	lea	-16(rp), rp
-	sub	$2, n
-L(a3):	sub	$16, n
-	jc	L(sma)
-
-	ALIGN(16)
-L(top):	vmovdqu	(up), %ymm0
-	vmovdqu	-32(up), %ymm1
-	vmovdqu	-64(up), %ymm2
-	vmovdqu	-96(up), %ymm3
-	lea	-128(up), up
-	vmovdqa	%ymm0, (rp)
-	vmovdqa	%ymm1, -32(rp)
-	vmovdqa	%ymm2, -64(rp)
-	vmovdqa	%ymm3, -96(rp)
-	lea	-128(rp), rp
-L(ali):	sub	$16, n
-	jnc	L(top)
-
-L(sma):	test	$8, R8(n)
-	jz	1f
-	vmovdqu	(up), %ymm0
-	vmovdqu	-32(up), %ymm1
-	lea	-64(up), up
-	vmovdqa	%ymm0, (rp)
-	vmovdqa	%ymm1, -32(rp)
-	lea	-64(rp), rp
-1:
-	test	$4, R8(n)
-	jz	1f
-	vmovdqu	(up), %ymm0
-	lea	-32(up), up
-	vmovdqa	%ymm0, (rp)
-	lea	-32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	vmovdqu	16(up), %xmm0
-	lea	-16(up), up
-	vmovdqa	%xmm0, 16(rp)
-	lea	-16(rp), rp
-1:
-	test	$1, R8(n)
-	jz	1f
-	mov	24(up), %r8
-	mov	%r8, 24(rp)
-1:
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(bc):	test	$4, R8(n)
-	jz	1f
-	mov	24(up), %rax
-	mov	16(up), %rcx
-	mov	8(up), %r8
-	mov	(up), %r9
-	lea	-32(up), up
-	mov	%rax, 24(rp)
-	mov	%rcx, 16(rp)
-	mov	%r8, 8(rp)
-	mov	%r9, (rp)
-	lea	-32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	mov	24(up), %rax
-	mov	16(up), %rcx
-	lea	-16(up), up
-	mov	%rax, 24(rp)
-	mov	%rcx, 16(rp)
-	lea	-16(rp), rp
-1:
-	test	$1, R8(n)
-	jz	1f
-	mov	24(up), %rax
-	mov	%rax, 24(rp)
-1:
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastavx/copyi.asm b/gmp/mpn/x86_64/fastavx/copyi.asm
deleted file mode 100644
index 97264ef837..0000000000
--- a/gmp/mpn/x86_64/fastavx/copyi.asm
+++ /dev/null
@@ -1,168 +0,0 @@
-dnl  AMD64 mpn_copyi optimised for CPUs with fast AVX.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003, 2005, 2007, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb aligned	      unaligned	      best seen	     for cpu?
-C AMD K8,K9	n/a
-C AMD K10	n/a
-C AMD bull	n/a
-C AMD pile	 4.87		 4.87				N
-C AMD steam	 ?		 ?
-C AMD bobcat	n/a
-C AMD jaguar	n/a
-C Intel P4	n/a
-C Intel core	n/a
-C Intel NHM	n/a
-C Intel SBR	 0.50		 0.91				N
-C Intel IBR	 ?
-C Intel HWL	 0.25		 0.30				Y
-C Intel BWL	 ?
-C Intel atom	n/a
-C VIA nano	n/a
-
-C We try to do as many 32-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.  For the bulk copying, we
-C write using aligned 32-byte operations, but we read with both aligned and
-C unaligned 32-byte operations.
-
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-dnl define(`vmovdqu', vlddqu)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_copyi)
-	FUNC_ENTRY(3)
-
-	cmp	$7, n
-	jbe	L(bc)
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(a2)			C jump if rp aligned
-	mov	(up), %rax
-	lea	8(up), up
-	mov	%rax, (rp)
-	lea	8(rp), rp
-	dec	n
-L(a2):	test	$16, R8(rp)		C is rp 32-byte aligned?
-	jz	L(a3)			C jump if rp aligned
-	vmovdqu	(up), %xmm0
-	lea	16(up), up
-	vmovdqa	%xmm0, (rp)
-	lea	16(rp), rp
-	sub	$2, n
-L(a3):	sub	$16, n
-	jc	L(sma)
-
-	ALIGN(16)
-L(top):	vmovdqu	(up), %ymm0
-	vmovdqu	32(up), %ymm1
-	vmovdqu	64(up), %ymm2
-	vmovdqu	96(up), %ymm3
-	lea	128(up), up
-	vmovdqa	%ymm0, (rp)
-	vmovdqa	%ymm1, 32(rp)
-	vmovdqa	%ymm2, 64(rp)
-	vmovdqa	%ymm3, 96(rp)
-	lea	128(rp), rp
-L(ali):	sub	$16, n
-	jnc	L(top)
-
-L(sma):	test	$8, R8(n)
-	jz	1f
-	vmovdqu	(up), %ymm0
-	vmovdqu	32(up), %ymm1
-	lea	64(up), up
-	vmovdqa	%ymm0, (rp)
-	vmovdqa	%ymm1, 32(rp)
-	lea	64(rp), rp
-1:
-	test	$4, R8(n)
-	jz	1f
-	vmovdqu	(up), %ymm0
-	lea	32(up), up
-	vmovdqa	%ymm0, (rp)
-	lea	32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	vmovdqu	(up), %xmm0
-	lea	16(up), up
-	vmovdqa	%xmm0, (rp)
-	lea	16(rp), rp
-1:
-L(end):	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-1:
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(bc):	test	$4, R8(n)
-	jz	1f
-	mov	(up), %rax
-	mov	8(up), %rcx
-	mov	16(up), %r8
-	mov	24(up), %r9
-	lea	32(up), up
-	mov	%rax, (rp)
-	mov	%rcx, 8(rp)
-	mov	%r8, 16(rp)
-	mov	%r9, 24(rp)
-	lea	32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	mov	(up), %rax
-	mov	8(up), %rcx
-	lea	16(up), up
-	mov	%rax, (rp)
-	mov	%rcx, 8(rp)
-	lea	16(rp), rp
-1:
-	test	$1, R8(n)
-	jz	1f
-	mov	(up), %rax
-	mov	%rax, (rp)
-1:
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/README b/gmp/mpn/x86_64/fastsse/README
deleted file mode 100644
index 520551ed99..0000000000
--- a/gmp/mpn/x86_64/fastsse/README
+++ /dev/null
@@ -1,21 +0,0 @@
-This directory contains code for x86-64 processors with fast
-implementations of SSE operations, hence the name "fastsse".
-
-Current processors that might benefit from this code are:
-
-  AMD K10
-  AMD Bulldozer
-  Intel Nocona
-  Intel Nehalem/Westmere
-  Intel Sandybridge/Ivybridge
-  VIA Nano
-
-Current processors that do not benefit from this code are:
-
-  AMD K8
-  AMD Bobcat
-  Intel Atom
-
-Intel Conroe/Penryn is a border case; its handling of non-aligned
-128-bit memory operands is poor.  VIA Nano also have poor handling of
-non-aligned operands.
diff --git a/gmp/mpn/x86_64/fastsse/com-palignr.asm b/gmp/mpn/x86_64/fastsse/com-palignr.asm
deleted file mode 100644
index d9641e890d..0000000000
--- a/gmp/mpn/x86_64/fastsse/com-palignr.asm
+++ /dev/null
@@ -1,302 +0,0 @@
-dnl  AMD64 mpn_com optimised for CPUs with fast SSE copying and SSSE3.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb     cycles/limb     cycles/limb      good
-C              aligned	      unaligned	      best seen	     for cpu?
-C AMD K8,K9	 2.0		 illop		1.0/1.0		N
-C AMD K10	 0.85		 illop				Y/N
-C AMD bd1	 1.39		 ? 1.45				Y/N
-C AMD bobcat	 1.97		 ? 8.17		1.5/1.5		N
-C Intel P4	 2.26		 illop				Y/N
-C Intel core2	 0.52		 0.82		opt/0.74	Y
-C Intel NHM	 0.52		 0.65		opt/opt		Y
-C Intel SBR	 0.51		 0.55		opt/0.51	Y
-C Intel atom	 1.16		 1.70		opt/opt		Y
-C VIA nano	 1.09		 1.10		opt/opt		Y
-
-C We use only 16-byte operations, except for unaligned top-most and bottom-most
-C limbs.  We use the SSSE3 palignr instruction when rp - up = 8 (mod 16).  That
-C instruction is better adapted to mpn_copyd's needs, we need to contort the
-C code to use it here.
-C
-C For operands of < COM_SSE_THRESHOLD limbs, we use a plain 64-bit loop, taken
-C from the x86_64 default code.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-C There are three instructions for loading an aligned 128-bit quantity.  We use
-C movaps, since it has the shortest coding.
-define(`movdqa', ``movaps'')
-
-ifdef(`COM_SSE_THRESHOLD',`',`define(`COM_SSE_THRESHOLD', 7)')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_com)
-	FUNC_ENTRY(3)
-
-	cmp	$COM_SSE_THRESHOLD, n
-	jbe	L(bc)
-
-	pcmpeqb	%xmm7, %xmm7		C set to 111...111
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(rp_aligned)		C jump if rp aligned
-
-	mov	(up), %r8
-	lea	8(up), up
-	not	%r8
-	mov	%r8, (rp)
-	lea	8(rp), rp
-	dec	n
-
-L(rp_aligned):
-	test	$8, R8(up)
-	jnz	L(uent)
-
-ifelse(eval(COM_SSE_THRESHOLD >= 8),1,
-`	sub	$8, n',
-`	jmp	L(am)')
-
-	ALIGN(16)
-L(atop):movdqa	0(up), %xmm0
-	movdqa	16(up), %xmm1
-	movdqa	32(up), %xmm2
-	movdqa	48(up), %xmm3
-	lea	64(up), up
-	pxor	%xmm7, %xmm0
-	pxor	%xmm7, %xmm1
-	pxor	%xmm7, %xmm2
-	pxor	%xmm7, %xmm3
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	movdqa	%xmm2, 32(rp)
-	movdqa	%xmm3, 48(rp)
-	lea	64(rp), rp
-L(am):	sub	$8, n
-	jnc	L(atop)
-
-	test	$4, R8(n)
-	jz	1f
-	movdqa	(up), %xmm0
-	movdqa	16(up), %xmm1
-	lea	32(up), up
-	pxor	%xmm7, %xmm0
-	pxor	%xmm7, %xmm1
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	lea	32(rp), rp
-
-1:	test	$2, R8(n)
-	jz	1f
-	movdqa	(up), %xmm0
-	lea	16(up), up
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	lea	16(rp), rp
-
-1:	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	not	%r8
-	mov	%r8, (rp)
-
-1:	FUNC_EXIT()
-	ret
-
-L(uent):
-C Code handling up - rp = 8 (mod 16)
-
-C FIXME: The code below only handles overlap if it is close to complete, or
-C quite separate: up-rp < 5 or up-up > 15 limbs
-	lea	-40(up), %rax		C 40 = 5 * GMP_LIMB_BYTES
-	sub	rp, %rax
-	cmp	$80, %rax		C 80 = (15-5) * GMP_LIMB_BYTES
-	jbe	L(bc)			C deflect to plain loop
-
-	sub	$16, n
-	jc	L(uend)
-
-	movdqa	120(up), %xmm3
-
-	sub	$16, n
-	jmp	L(um)
-
-	ALIGN(16)
-L(utop):movdqa	120(up), %xmm3
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, -128(rp)
-	sub	$16, n
-L(um):	movdqa	104(up), %xmm2
-	palignr($8, %xmm2, %xmm3)
-	movdqa	88(up), %xmm1
-	pxor	%xmm7, %xmm3
-	movdqa	%xmm3, 112(rp)
-	palignr($8, %xmm1, %xmm2)
-	movdqa	72(up), %xmm0
-	pxor	%xmm7, %xmm2
-	movdqa	%xmm2, 96(rp)
-	palignr($8, %xmm0, %xmm1)
-	movdqa	56(up), %xmm3
-	pxor	%xmm7, %xmm1
-	movdqa	%xmm1, 80(rp)
-	palignr($8, %xmm3, %xmm0)
-	movdqa	40(up), %xmm2
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, 64(rp)
-	palignr($8, %xmm2, %xmm3)
-	movdqa	24(up), %xmm1
-	pxor	%xmm7, %xmm3
-	movdqa	%xmm3, 48(rp)
-	palignr($8, %xmm1, %xmm2)
-	movdqa	8(up), %xmm0
-	pxor	%xmm7, %xmm2
-	movdqa	%xmm2, 32(rp)
-	palignr($8, %xmm0, %xmm1)
-	movdqa	-8(up), %xmm3
-	pxor	%xmm7, %xmm1
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm3, %xmm0)
-	lea	128(up), up
-	lea	128(rp), rp
-	jnc	L(utop)
-
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, -128(rp)
-
-L(uend):test	$8, R8(n)
-	jz	1f
-	movdqa	56(up), %xmm3
-	movdqa	40(up), %xmm2
-	palignr($8, %xmm2, %xmm3)
-	movdqa	24(up), %xmm1
-	pxor	%xmm7, %xmm3
-	movdqa	%xmm3, 48(rp)
-	palignr($8, %xmm1, %xmm2)
-	movdqa	8(up), %xmm0
-	pxor	%xmm7, %xmm2
-	movdqa	%xmm2, 32(rp)
-	palignr($8, %xmm0, %xmm1)
-	movdqa	-8(up), %xmm3
-	pxor	%xmm7, %xmm1
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm3, %xmm0)
-	lea	64(up), up
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	lea	64(rp), rp
-
-1:	test	$4, R8(n)
-	jz	1f
-	movdqa	24(up), %xmm1
-	movdqa	8(up), %xmm0
-	palignr($8, %xmm0, %xmm1)
-	movdqa	-8(up), %xmm3
-	pxor	%xmm7, %xmm1
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm3, %xmm0)
-	lea	32(up), up
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	lea	32(rp), rp
-
-1:	test	$2, R8(n)
-	jz	1f
-	movdqa	8(up), %xmm0
-	movdqa	-8(up), %xmm3
-	palignr($8, %xmm3, %xmm0)
-	lea	16(up), up
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	lea	16(rp), rp
-
-1:	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	not	%r8
-	mov	%r8, (rp)
-
-1:	FUNC_EXIT()
-	ret
-
-C Basecase code.  Needed for good small operands speed, not for
-C correctness as the above code is currently written.
-
-L(bc):	lea	-8(rp), rp
-	sub	$4, R32(n)
-	jc	L(end)
-
-ifelse(eval(1 || COM_SSE_THRESHOLD >= 8),1,
-`	ALIGN(16)')
-L(top):	mov	(up), %r8
-	mov	8(up), %r9
-	lea	32(rp), rp
-	mov	16(up), %r10
-	mov	24(up), %r11
-	lea	32(up), up
-	not	%r8
-	not	%r9
-	not	%r10
-	not	%r11
-	mov	%r8, -24(rp)
-	mov	%r9, -16(rp)
-ifelse(eval(1 || COM_SSE_THRESHOLD >= 8),1,
-`	sub	$4, R32(n)')
-	mov	%r10, -8(rp)
-	mov	%r11, (rp)
-ifelse(eval(1 || COM_SSE_THRESHOLD >= 8),1,
-`	jnc	L(top)')
-
-L(end):	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	not	%r8
-	mov	%r8, 8(rp)
-	lea	8(rp), rp
-	lea	8(up), up
-1:	test	$2, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	8(up), %r9
-	not	%r8
-	not	%r9
-	mov	%r8, 8(rp)
-	mov	%r9, 16(rp)
-1:	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/com.asm b/gmp/mpn/x86_64/fastsse/com.asm
deleted file mode 100644
index 4abb076d3f..0000000000
--- a/gmp/mpn/x86_64/fastsse/com.asm
+++ /dev/null
@@ -1,161 +0,0 @@
-dnl  AMD64 mpn_com optimised for CPUs with fast SSE.
-
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb     cycles/limb     cycles/limb      good
-C              aligned	      unaligned	      best seen	     for cpu?
-C AMD K8,K9	 2.0		 2.0				N
-C AMD K10	 0.85		 1.3				Y/N
-C AMD bd1	 1.40		 1.40				Y
-C AMD bobcat	 3.1		 3.1				N
-C Intel P4	 2.28		 illop				Y
-C Intel core2	 1.02		 1.02				N
-C Intel NHM	 0.53		 0.68				Y
-C Intel SBR	 0.51		 0.75				Y
-C Intel atom	 3.68		 3.68				N
-C VIA nano	 1.17		 5.09				Y/N
-
-C We try to do as many 16-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.  We can always write using
-C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
-C operations.
-
-C Instead of having separate loops for reading aligned and unaligned, we read
-C using MOVDQU.  This seems to work great except for core2; there performance
-C doubles when reading using MOVDQA (for aligned source).  It is unclear how to
-C best handle the unaligned case there.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_com)
-	FUNC_ENTRY(3)
-
-	test	n, n
-	jz	L(don)
-
-	pcmpeqb	%xmm7, %xmm7		C set to 111...111
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(ali)			C jump if rp aligned
-	mov	(up), %rax
-	lea	8(up), up
-	not	%rax
-	mov	%rax, (rp)
-	lea	8(rp), rp
-	dec	n
-
-	sub	$14, n
-	jc	L(sma)
-
-	ALIGN(16)
-L(top):	movdqu	(up), %xmm0
-	movdqu	16(up), %xmm1
-	movdqu	32(up), %xmm2
-	movdqu	48(up), %xmm3
-	movdqu	64(up), %xmm4
-	movdqu	80(up), %xmm5
-	movdqu	96(up), %xmm6
-	lea	112(up), up
-	pxor	%xmm7, %xmm0
-	pxor	%xmm7, %xmm1
-	pxor	%xmm7, %xmm2
-	pxor	%xmm7, %xmm3
-	pxor	%xmm7, %xmm4
-	pxor	%xmm7, %xmm5
-	pxor	%xmm7, %xmm6
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	movdqa	%xmm2, 32(rp)
-	movdqa	%xmm3, 48(rp)
-	movdqa	%xmm4, 64(rp)
-	movdqa	%xmm5, 80(rp)
-	movdqa	%xmm6, 96(rp)
-	lea	112(rp), rp
-L(ali):	sub	$14, n
-	jnc	L(top)
-
-L(sma):	add	$14, n
-	test	$8, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	movdqu	16(up), %xmm1
-	movdqu	32(up), %xmm2
-	movdqu	48(up), %xmm3
-	lea	64(up), up
-	pxor	%xmm7, %xmm0
-	pxor	%xmm7, %xmm1
-	pxor	%xmm7, %xmm2
-	pxor	%xmm7, %xmm3
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	movdqa	%xmm2, 32(rp)
-	movdqa	%xmm3, 48(rp)
-	lea	64(rp), rp
-1:
-	test	$4, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	movdqu	16(up), %xmm1
-	lea	32(up), up
-	pxor	%xmm7, %xmm0
-	pxor	%xmm7, %xmm1
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	lea	32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	lea	16(up), up
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	lea	16(rp), rp
-1:
-	test	$1, R8(n)
-	jz	1f
-	mov	(up), %rax
-	not	%rax
-	mov	%rax, (rp)
-1:
-L(don):	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/copyd-palignr.asm b/gmp/mpn/x86_64/fastsse/copyd-palignr.asm
deleted file mode 100644
index 7430cadc09..0000000000
--- a/gmp/mpn/x86_64/fastsse/copyd-palignr.asm
+++ /dev/null
@@ -1,251 +0,0 @@
-dnl  AMD64 mpn_copyd optimised for CPUs with fast SSE copying and SSSE3.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb     cycles/limb     cycles/limb      good
-C              aligned	      unaligned	      best seen	     for cpu?
-C AMD K8,K9	 2.0		 illop		1.0/1.0		N
-C AMD K10	 0.85		 illop				Y/N
-C AMD bull	 0.70		 0.70				Y
-C AMD pile	 0.68		 0.68				Y
-C AMD steam	 ?		 ?
-C AMD bobcat	 1.97		 8.24		1.5/1.5		N
-C AMD jaguar	 ?		 ?
-C Intel P4	 2.26		 illop				Y/N
-C Intel core	 0.52		0.68-0.80	opt/0.64	Y
-C Intel NHM	 0.52		 0.64		opt/opt		Y
-C Intel SBR	 0.51		 0.51		opt/0.51	Y
-C Intel IBR	 ?		 ?				Y
-C Intel HWL	 0.51		 0.51		0.25/0.25	N
-C Intel atom	 1.16		 1.66		opt/opt		Y
-C VIA nano	 1.08		 1.06		opt/opt		Y
-
-C We use only 16-byte operations, except for unaligned top-most and bottom-most
-C limbs.  We use the SSSE3 palignr instruction when rp - up = 8 (mod 16).
-C
-C For operands of < COPYD_SSE_THRESHOLD limbs, we use a plain 64-bit loop,
-C taken from the x86_64 default code.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-C There are three instructions for loading an aligned 128-bit quantity.  We use
-C movaps, since it has the shortest coding.
-define(`movdqa', ``movaps'')
-
-ifdef(`COPYD_SSE_THRESHOLD',`',`define(`COPYD_SSE_THRESHOLD', 7)')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_copyd)
-	FUNC_ENTRY(3)
-
-	lea	-8(up,n,8), up
-	lea	-8(rp,n,8), rp
-
-	cmp	$COPYD_SSE_THRESHOLD, n
-	jbe	L(bc)
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jnz	L(rp_aligned)		C jump if rp aligned
-
-	mov	(up), %rax		C copy one limb
-	mov	%rax, (rp)
-	lea	-8(up), up
-	lea	-8(rp), rp
-	dec	n
-
-L(rp_aligned):
-	test	$8, R8(up)
-	jz	L(uent)
-
-ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
-`	sub	$8, n',
-`	jmp	L(am)')
-
-	ALIGN(16)
-L(atop):movdqa	-8(up), %xmm0
-	movdqa	-24(up), %xmm1
-	movdqa	-40(up), %xmm2
-	movdqa	-56(up), %xmm3
-	lea	-64(up), up
-	movdqa	%xmm0, -8(rp)
-	movdqa	%xmm1, -24(rp)
-	movdqa	%xmm2, -40(rp)
-	movdqa	%xmm3, -56(rp)
-	lea	-64(rp), rp
-L(am):	sub	$8, n
-	jnc	L(atop)
-
-	test	$4, R8(n)
-	jz	1f
-	movdqa	-8(up), %xmm0
-	movdqa	-24(up), %xmm1
-	lea	-32(up), up
-	movdqa	%xmm0, -8(rp)
-	movdqa	%xmm1, -24(rp)
-	lea	-32(rp), rp
-
-1:	test	$2, R8(n)
-	jz	1f
-	movdqa	-8(up), %xmm0
-	lea	-16(up), up
-	movdqa	%xmm0, -8(rp)
-	lea	-16(rp), rp
-
-1:	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-
-1:	FUNC_EXIT()
-	ret
-
-L(uent):sub	$16, n
-	movdqa	(up), %xmm0
-	jc	L(uend)
-
-	ALIGN(16)
-L(utop):sub	$16, n
-	movdqa	-16(up), %xmm1
-	palignr($8, %xmm1, %xmm0)
-	movdqa	%xmm0, -8(rp)
-	movdqa	-32(up), %xmm2
-	palignr($8, %xmm2, %xmm1)
-	movdqa	%xmm1, -24(rp)
-	movdqa	-48(up), %xmm3
-	palignr($8, %xmm3, %xmm2)
-	movdqa	%xmm2, -40(rp)
-	movdqa	-64(up), %xmm0
-	palignr($8, %xmm0, %xmm3)
-	movdqa	%xmm3, -56(rp)
-	movdqa	-80(up), %xmm1
-	palignr($8, %xmm1, %xmm0)
-	movdqa	%xmm0, -72(rp)
-	movdqa	-96(up), %xmm2
-	palignr($8, %xmm2, %xmm1)
-	movdqa	%xmm1, -88(rp)
-	movdqa	-112(up), %xmm3
-	palignr($8, %xmm3, %xmm2)
-	movdqa	%xmm2, -104(rp)
-	movdqa	-128(up), %xmm0
-	palignr($8, %xmm0, %xmm3)
-	movdqa	%xmm3, -120(rp)
-	lea	-128(up), up
-	lea	-128(rp), rp
-	jnc	L(utop)
-
-L(uend):test	$8, R8(n)
-	jz	1f
-	movdqa	-16(up), %xmm1
-	palignr($8, %xmm1, %xmm0)
-	movdqa	%xmm0, -8(rp)
-	movdqa	-32(up), %xmm0
-	palignr($8, %xmm0, %xmm1)
-	movdqa	%xmm1, -24(rp)
-	movdqa	-48(up), %xmm1
-	palignr($8, %xmm1, %xmm0)
-	movdqa	%xmm0, -40(rp)
-	movdqa	-64(up), %xmm0
-	palignr($8, %xmm0, %xmm1)
-	movdqa	%xmm1, -56(rp)
-	lea	-64(up), up
-	lea	-64(rp), rp
-
-1:	test	$4, R8(n)
-	jz	1f
-	movdqa	-16(up), %xmm1
-	palignr($8, %xmm1, %xmm0)
-	movdqa	%xmm0, -8(rp)
-	movdqa	-32(up), %xmm0
-	palignr($8, %xmm0, %xmm1)
-	movdqa	%xmm1, -24(rp)
-	lea	-32(up), up
-	lea	-32(rp), rp
-
-1:	test	$2, R8(n)
-	jz	1f
-	movdqa	-16(up), %xmm1
-	palignr($8, %xmm1, %xmm0)
-	movdqa	%xmm0, -8(rp)
-	lea	-16(up), up
-	lea	-16(rp), rp
-
-1:	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-
-1:	FUNC_EXIT()
-	ret
-
-C Basecase code.  Needed for good small operands speed, not for
-C correctness as the above code is currently written.
-
-L(bc):	sub	$4, R32(n)
-	jc	L(end)
-
-	ALIGN(16)
-L(top):	mov	(up), %r8
-	mov	-8(up), %r9
-	lea	-32(rp), rp
-	mov	-16(up), %r10
-	mov	-24(up), %r11
-	lea	-32(up), up
-	mov	%r8, 32(rp)
-	mov	%r9, 24(rp)
-ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
-`	sub	$4, R32(n)')
-	mov	%r10, 16(rp)
-	mov	%r11, 8(rp)
-ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
-`	jnc	L(top)')
-
-L(end):	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-	lea	-8(rp), rp
-	lea	-8(up), up
-1:	test	$2, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	-8(up), %r9
-	mov	%r8, (rp)
-	mov	%r9, -8(rp)
-1:	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/copyd.asm b/gmp/mpn/x86_64/fastsse/copyd.asm
deleted file mode 100644
index 5c6094c7e2..0000000000
--- a/gmp/mpn/x86_64/fastsse/copyd.asm
+++ /dev/null
@@ -1,145 +0,0 @@
-dnl  AMD64 mpn_copyd optimised for CPUs with fast SSE.
-
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	    cycles/limb		  good for cpu?
-C AMD K8,K9
-C AMD K10	 0.85			Y
-C AMD bd1	 0.8			Y
-C AMD bobcat
-C Intel P4	 2.28			Y
-C Intel core2	 1
-C Intel NHM	 0.5			Y
-C Intel SBR	 0.5			Y
-C Intel atom
-C VIA nano	 1.1			Y
-
-C We try to do as many 16-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.  We can always write using
-C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
-C operations.
-
-C Instead of having separate loops for reading aligned and unaligned, we read
-C using MOVDQU.  This seems to work great except for core2; there performance
-C doubles when reading using MOVDQA (for aligned source).  It is unclear how to
-C best handle the unaligned case there.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_copyd)
-	FUNC_ENTRY(3)
-
-	test	n, n
-	jz	L(don)
-
-	lea	-16(rp,n,8), rp
-	lea	-16(up,n,8), up
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(ali)			C jump if rp aligned
-	mov	8(up), %rax
-	lea	-8(up), up
-	mov	%rax, 8(rp)
-	lea	-8(rp), rp
-	dec	n
-
-	sub	$16, n
-	jc	L(sma)
-
-	ALIGN(16)
-L(top):	movdqu	(up), %xmm0
-	movdqu	-16(up), %xmm1
-	movdqu	-32(up), %xmm2
-	movdqu	-48(up), %xmm3
-	movdqu	-64(up), %xmm4
-	movdqu	-80(up), %xmm5
-	movdqu	-96(up), %xmm6
-	movdqu	-112(up), %xmm7
-	lea	-128(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, -16(rp)
-	movdqa	%xmm2, -32(rp)
-	movdqa	%xmm3, -48(rp)
-	movdqa	%xmm4, -64(rp)
-	movdqa	%xmm5, -80(rp)
-	movdqa	%xmm6, -96(rp)
-	movdqa	%xmm7, -112(rp)
-	lea	-128(rp), rp
-L(ali):	sub	$16, n
-	jnc	L(top)
-
-L(sma):	test	$8, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	movdqu	-16(up), %xmm1
-	movdqu	-32(up), %xmm2
-	movdqu	-48(up), %xmm3
-	lea	-64(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, -16(rp)
-	movdqa	%xmm2, -32(rp)
-	movdqa	%xmm3, -48(rp)
-	lea	-64(rp), rp
-1:
-	test	$4, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	movdqu	-16(up), %xmm1
-	lea	-32(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, -16(rp)
-	lea	-32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	lea	-16(up), up
-	movdqa	%xmm0, (rp)
-	lea	-16(rp), rp
-1:
-	test	$1, R8(n)
-	jz	1f
-	mov	8(up), %r8
-	mov	%r8, 8(rp)
-1:
-L(don):	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/copyi-palignr.asm b/gmp/mpn/x86_64/fastsse/copyi-palignr.asm
deleted file mode 100644
index fda3c3500f..0000000000
--- a/gmp/mpn/x86_64/fastsse/copyi-palignr.asm
+++ /dev/null
@@ -1,295 +0,0 @@
-dnl  AMD64 mpn_copyi optimised for CPUs with fast SSE copying and SSSE3.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb     cycles/limb     cycles/limb      good
-C              aligned	      unaligned	      best seen	     for cpu?
-C AMD K8,K9	 2.0		 illop		1.0/1.0		N
-C AMD K10	 0.85		 illop				Y/N
-C AMD bull	 0.70		 0.66				Y
-C AMD pile	 0.68		 0.66				Y
-C AMD steam	 ?		 ?
-C AMD bobcat	 1.97		 8.16		1.5/1.5		N
-C AMD jaguar	 ?		 ?
-C Intel P4	 2.26		 illop				Y/N
-C Intel core	 0.52		 0.64		opt/opt		Y
-C Intel NHM	 0.52		 0.71		opt/opt		Y
-C Intel SBR	 0.51		 0.54		opt/0.51	Y
-C Intel IBR	 ?		 ?				Y
-C Intel HWL	 0.51		 0.52		0.25/0.25	N
-C Intel atom	 1.16		 1.61		opt/opt		Y
-C VIA nano	 1.09		 1.08		opt/opt		Y
-
-C We use only 16-byte operations, except for unaligned top-most and bottom-most
-C limbs.  We use the SSSE3 palignr instruction when rp - up = 8 (mod 16).  That
-C instruction is better adapted to mpn_copyd's needs, we need to contort the
-C code to use it here.
-C
-C For operands of < COPYI_SSE_THRESHOLD limbs, we use a plain 64-bit loop,
-C taken from the x86_64 default code.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-C There are three instructions for loading an aligned 128-bit quantity.  We use
-C movaps, since it has the shortest coding.
-dnl define(`movdqa', ``movaps'')
-
-ifdef(`COPYI_SSE_THRESHOLD',`',`define(`COPYI_SSE_THRESHOLD', 7)')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_copyi)
-	FUNC_ENTRY(3)
-
-	cmp	$COPYI_SSE_THRESHOLD, n
-	jbe	L(bc)
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(rp_aligned)		C jump if rp aligned
-
-	movsq				C copy one limb
-	dec	n
-
-L(rp_aligned):
-	test	$8, R8(up)
-	jnz	L(uent)
-
-ifelse(eval(COPYI_SSE_THRESHOLD >= 8),1,
-`	sub	$8, n',
-`	jmp	L(am)')
-
-	ALIGN(16)
-L(atop):movdqa	0(up), %xmm0
-	movdqa	16(up), %xmm1
-	movdqa	32(up), %xmm2
-	movdqa	48(up), %xmm3
-	lea	64(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	movdqa	%xmm2, 32(rp)
-	movdqa	%xmm3, 48(rp)
-	lea	64(rp), rp
-L(am):	sub	$8, n
-	jnc	L(atop)
-
-	test	$4, R8(n)
-	jz	1f
-	movdqa	(up), %xmm0
-	movdqa	16(up), %xmm1
-	lea	32(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	lea	32(rp), rp
-
-1:	test	$2, R8(n)
-	jz	1f
-	movdqa	(up), %xmm0
-	lea	16(up), up
-	movdqa	%xmm0, (rp)
-	lea	16(rp), rp
-
-1:	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-
-1:	FUNC_EXIT()
-	ret
-
-L(uent):
-C Code handling up - rp = 8 (mod 16)
-
-	cmp	$16, n
-	jc	L(ued0)
-
-IFDOS(`	add	$-56, %rsp	')
-IFDOS(`	movdqa	%xmm6, (%rsp)	')
-IFDOS(`	movdqa	%xmm7, 16(%rsp)	')
-IFDOS(`	movdqa	%xmm8, 32(%rsp)	')
-
-	movaps	120(up), %xmm7
-	movaps	104(up), %xmm6
-	movaps	88(up), %xmm5
-	movaps	72(up), %xmm4
-	movaps	56(up), %xmm3
-	movaps	40(up), %xmm2
-	lea	128(up), up
-	sub	$32, n
-	jc	L(ued1)
-
-	ALIGN(16)
-L(utop):movaps	-104(up), %xmm1
-	sub	$16, n
-	movaps	-120(up), %xmm0
-	palignr($8, %xmm6, %xmm7)
-	movaps	-136(up), %xmm8
-	movdqa	%xmm7, 112(rp)
-	palignr($8, %xmm5, %xmm6)
-	movaps	120(up), %xmm7
-	movdqa	%xmm6, 96(rp)
-	palignr($8, %xmm4, %xmm5)
-	movaps	104(up), %xmm6
-	movdqa	%xmm5, 80(rp)
-	palignr($8, %xmm3, %xmm4)
-	movaps	88(up), %xmm5
-	movdqa	%xmm4, 64(rp)
-	palignr($8, %xmm2, %xmm3)
-	movaps	72(up), %xmm4
-	movdqa	%xmm3, 48(rp)
-	palignr($8, %xmm1, %xmm2)
-	movaps	56(up), %xmm3
-	movdqa	%xmm2, 32(rp)
-	palignr($8, %xmm0, %xmm1)
-	movaps	40(up), %xmm2
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm8, %xmm0)
-	lea	128(up), up
-	movdqa	%xmm0, (rp)
-	lea	128(rp), rp
-	jnc	L(utop)
-
-L(ued1):movaps	-104(up), %xmm1
-	movaps	-120(up), %xmm0
-	movaps	-136(up), %xmm8
-	palignr($8, %xmm6, %xmm7)
-	movdqa	%xmm7, 112(rp)
-	palignr($8, %xmm5, %xmm6)
-	movdqa	%xmm6, 96(rp)
-	palignr($8, %xmm4, %xmm5)
-	movdqa	%xmm5, 80(rp)
-	palignr($8, %xmm3, %xmm4)
-	movdqa	%xmm4, 64(rp)
-	palignr($8, %xmm2, %xmm3)
-	movdqa	%xmm3, 48(rp)
-	palignr($8, %xmm1, %xmm2)
-	movdqa	%xmm2, 32(rp)
-	palignr($8, %xmm0, %xmm1)
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm8, %xmm0)
-	movdqa	%xmm0, (rp)
-	lea	128(rp), rp
-
-IFDOS(`	movdqa	(%rsp), %xmm6	')
-IFDOS(`	movdqa	16(%rsp), %xmm7	')
-IFDOS(`	movdqa	32(%rsp), %xmm8	')
-IFDOS(`	add	$56, %rsp	')
-
-L(ued0):test	$8, R8(n)
-	jz	1f
-	movaps	56(up), %xmm3
-	movaps	40(up), %xmm2
-	movaps	24(up), %xmm1
-	movaps	8(up), %xmm0
-	movaps	-8(up), %xmm4
-	palignr($8, %xmm2, %xmm3)
-	movdqa	%xmm3, 48(rp)
-	palignr($8, %xmm1, %xmm2)
-	movdqa	%xmm2, 32(rp)
-	palignr($8, %xmm0, %xmm1)
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm4, %xmm0)
-	lea	64(up), up
-	movdqa	%xmm0, (rp)
-	lea	64(rp), rp
-
-1:	test	$4, R8(n)
-	jz	1f
-	movaps	24(up), %xmm1
-	movaps	8(up), %xmm0
-	palignr($8, %xmm0, %xmm1)
-	movaps	-8(up), %xmm3
-	movdqa	%xmm1, 16(rp)
-	palignr($8, %xmm3, %xmm0)
-	lea	32(up), up
-	movdqa	%xmm0, (rp)
-	lea	32(rp), rp
-
-1:	test	$2, R8(n)
-	jz	1f
-	movdqa	8(up), %xmm0
-	movdqa	-8(up), %xmm3
-	palignr($8, %xmm3, %xmm0)
-	lea	16(up), up
-	movdqa	%xmm0, (rp)
-	lea	16(rp), rp
-
-1:	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-
-1:	FUNC_EXIT()
-	ret
-
-C Basecase code.  Needed for good small operands speed, not for
-C correctness as the above code is currently written.
-
-L(bc):	lea	-8(rp), rp
-	sub	$4, R32(n)
-	jc	L(end)
-
-	ALIGN(16)
-L(top):	mov	(up), %r8
-	mov	8(up), %r9
-	lea	32(rp), rp
-	mov	16(up), %r10
-	mov	24(up), %r11
-	lea	32(up), up
-	mov	%r8, -24(rp)
-	mov	%r9, -16(rp)
-ifelse(eval(COPYI_SSE_THRESHOLD >= 8),1,
-`	sub	$4, R32(n)')
-	mov	%r10, -8(rp)
-	mov	%r11, (rp)
-ifelse(eval(COPYI_SSE_THRESHOLD >= 8),1,
-`	jnc	L(top)')
-
-L(end):	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, 8(rp)
-	lea	8(rp), rp
-	lea	8(up), up
-1:	test	$2, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	8(up), %r9
-	mov	%r8, 8(rp)
-	mov	%r9, 16(rp)
-1:	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/copyi.asm b/gmp/mpn/x86_64/fastsse/copyi.asm
deleted file mode 100644
index a1a1c231dc..0000000000
--- a/gmp/mpn/x86_64/fastsse/copyi.asm
+++ /dev/null
@@ -1,166 +0,0 @@
-dnl  AMD64 mpn_copyi optimised for CPUs with fast SSE.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	    cycles/limb		  good for cpu?
-C AMD K8,K9
-C AMD K10	 0.85	 1.64		Y/N
-C AMD bd1	 1.4	 1.4		Y
-C AMD bobcat
-C Intel P4	 2.3	 2.3		Y
-C Intel core2	 1.0	 1.0
-C Intel NHM	 0.5	 0.67		Y
-C Intel SBR	 0.5	 0.75		Y
-C Intel atom
-C VIA nano	 1.16	 5.16		Y/N
-
-C We try to do as many 16-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.  We can always write using
-C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
-C operations.
-
-C Instead of having separate loops for reading aligned and unaligned, we read
-C using MOVDQU.  This seems to work great except for core2; there performance
-C doubles when reading using MOVDQA (for aligned source).  It is unclear how to
-C best handle the unaligned case there.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n',  `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-dnl define(`movdqu', lddqu)
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_copyi)
-	FUNC_ENTRY(3)
-
-	cmp	$3, n
-	jc	L(bc)
-
-	test	$8, R8(rp)		C is rp 16-byte aligned?
-	jz	L(ali)			C jump if rp aligned
-	movsq				C copy single limb
-	dec	n
-
-	sub	$16, n
-	jc	L(sma)
-
-	ALIGN(16)
-L(top):	movdqu	(up), %xmm0
-	movdqu	16(up), %xmm1
-	movdqu	32(up), %xmm2
-	movdqu	48(up), %xmm3
-	movdqu	64(up), %xmm4
-	movdqu	80(up), %xmm5
-	movdqu	96(up), %xmm6
-	movdqu	112(up), %xmm7
-	lea	128(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	movdqa	%xmm2, 32(rp)
-	movdqa	%xmm3, 48(rp)
-	movdqa	%xmm4, 64(rp)
-	movdqa	%xmm5, 80(rp)
-	movdqa	%xmm6, 96(rp)
-	movdqa	%xmm7, 112(rp)
-	lea	128(rp), rp
-L(ali):	sub	$16, n
-	jnc	L(top)
-
-L(sma):	test	$8, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	movdqu	16(up), %xmm1
-	movdqu	32(up), %xmm2
-	movdqu	48(up), %xmm3
-	lea	64(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	movdqa	%xmm2, 32(rp)
-	movdqa	%xmm3, 48(rp)
-	lea	64(rp), rp
-1:
-	test	$4, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	movdqu	16(up), %xmm1
-	lea	32(up), up
-	movdqa	%xmm0, (rp)
-	movdqa	%xmm1, 16(rp)
-	lea	32(rp), rp
-1:
-	test	$2, R8(n)
-	jz	1f
-	movdqu	(up), %xmm0
-	lea	16(up), up
-	movdqa	%xmm0, (rp)
-	lea	16(rp), rp
-	ALIGN(16)
-1:
-L(end):	test	$1, R8(n)
-	jz	1f
-	mov	(up), %r8
-	mov	%r8, (rp)
-1:
-	FUNC_EXIT()
-	ret
-
-C Basecase code.  Needed for good small operands speed, not for
-C correctness as the above code is currently written.
-
-L(bc):	sub	$2, n
-	jc	L(end)
-	ALIGN(16)
-1:	mov	(up), %rax
-	mov	8(up), %rcx
-	lea	16(up), up
-	mov	%rax, (rp)
-	mov	%rcx, 8(rp)
-	lea	16(rp), rp
-	sub	$2, n
-	jnc	1b
-
-	test	$1, R8(n)
-	jz	L(ret)
-	mov	(up), %rax
-	mov	%rax, (rp)
-L(ret):	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/lshift-movdqu2.asm b/gmp/mpn/x86_64/fastsse/lshift-movdqu2.asm
deleted file mode 100644
index a05e850a1f..0000000000
--- a/gmp/mpn/x86_64/fastsse/lshift-movdqu2.asm
+++ /dev/null
@@ -1,182 +0,0 @@
-dnl  AMD64 mpn_lshift optimised for CPUs with fast SSE including fast movdqu.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb     cycles/limb     cycles/limb    good
-C              aligned	      unaligned	      best seen	   for cpu?
-C AMD K8,K9	 3		 3		 2.35	  no, use shl/shr
-C AMD K10	 1.5-1.8	 1.5-1.8	 1.33	  yes
-C AMD bd1	 1.7-1.9	 1.7-1.9	 1.33	  yes
-C AMD bobcat	 3.17		 3.17			  yes, bad for n < 20
-C Intel P4	 4.67		 4.67		 2.7	  no, slow movdqu
-C Intel core2	 2.15		 2.15		 1.25	  no, use shld/shrd
-C Intel NHM	 1.66		 1.66		 1.25	  no, use shld/shrd
-C Intel SBR	 1.3		 1.3		 1.25	  yes, bad for n = 4-6
-C Intel atom	11.7		11.7		 4.5	  no
-C VIA nano	 5.7		 5.95		 2.0	  no, slow movdqu
-
-C We try to do as many aligned 16-byte operations as possible.  The top-most
-C and bottom-most writes might need 8-byte operations.
-C
-C This variant rely on fast load movdqu, and uses it even for aligned operands,
-C in order to avoid the need for two separate loops.
-C
-C TODO
-C  * Could 2-limb wind-down code be simplified?
-C  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
-C    for other affected CPUs.
-
-C INPUT PARAMETERS
-define(`rp',  `%rdi')
-define(`ap',  `%rsi')
-define(`n',   `%rdx')
-define(`cnt', `%rcx')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_lshift)
-	FUNC_ENTRY(4)
-	movd	R32(%rcx), %xmm4
-	mov	$64, R32(%rax)
-	sub	R32(%rcx), R32(%rax)
-	movd	R32(%rax), %xmm5
-
-	neg	R32(%rcx)
-	mov	-8(ap,n,8), %rax
-	shr	R8(%rcx), %rax
-
-	cmp	$3, n
-	jle	L(bc)
-
-	lea	(rp,n,8), R32(%rcx)
-	test	$8, R8(%rcx)
-	jz	L(rp_aligned)
-
-C Do one initial limb in order to make rp aligned
-	movq	-8(ap,n,8), %xmm0
-	movq	-16(ap,n,8), %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movq	%xmm0, -8(rp,n,8)
-	dec	n
-
-L(rp_aligned):
-	lea	1(n), %r8d
-
-	and	$6, R32(%r8)
-	jz	L(ba0)
-	cmp	$4, R32(%r8)
-	jz	L(ba4)
-	jc	L(ba2)
-L(ba6):	add	$-4, n
-	jmp	L(i56)
-L(ba0):	add	$-6, n
-	jmp	L(i70)
-L(ba4):	add	$-2, n
-	jmp	L(i34)
-L(ba2):	add	$-8, n
-	jle	L(end)
-
-	ALIGN(16)
-L(top):	movdqu	40(ap,n,8), %xmm1
-	movdqu	48(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, 48(rp,n,8)
-L(i70):
-	movdqu	24(ap,n,8), %xmm1
-	movdqu	32(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, 32(rp,n,8)
-L(i56):
-	movdqu	8(ap,n,8), %xmm1
-	movdqu	16(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, 16(rp,n,8)
-L(i34):
-	movdqu	-8(ap,n,8), %xmm1
-	movdqu	(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, (rp,n,8)
-	sub	$8, n
-	jg	L(top)
-
-L(end):	test	$1, R8(n)
-	jnz	L(end8)
-
-	movdqu	(ap), %xmm1
-	pxor	%xmm0, %xmm0
-	punpcklqdq  %xmm1, %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, (rp)
-	FUNC_EXIT()
-	ret
-
-C Basecase
-	ALIGN(16)
-L(bc):	dec	R32(n)
-	jz	L(end8)
-
-	movq	(ap,n,8), %xmm1
-	movq	-8(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movq	%xmm0, (rp,n,8)
-	sub	$2, R32(n)
-	jl	L(end8)
-	movq	8(ap), %xmm1
-	movq	(ap), %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movq	%xmm0, 8(rp)
-
-L(end8):movq	(ap), %xmm0
-	psllq	%xmm4, %xmm0
-	movq	%xmm0, (rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/lshift.asm b/gmp/mpn/x86_64/fastsse/lshift.asm
deleted file mode 100644
index f76972a22f..0000000000
--- a/gmp/mpn/x86_64/fastsse/lshift.asm
+++ /dev/null
@@ -1,169 +0,0 @@
-dnl  AMD64 mpn_lshift optimised for CPUs with fast SSE.
-
-dnl  Contributed to the GNU project by David Harvey and Torbjorn Granlund.
-
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb	     cycles/limb	      good
-C          16-byte aligned         16-byte unaligned	    for cpu?
-C AMD K8,K9	 ?			 ?
-C AMD K10	 1.68  (1.45)		 1.75  (1.49)		Y
-C AMD bd1	 1.82  (1.75)		 1.82  (1.75)		Y
-C AMD bobcat	 4			 4
-C Intel P4	 3     (2.7)		 3     (2.7)		Y
-C Intel core2	 2.05  (1.67)		 2.55  (1.75)
-C Intel NHM	 2.05  (1.75)		 2.09  (2)
-C Intel SBR	 1.5   (1.3125)		 1.5   (1.4375)		Y
-C Intel atom	 ?			 ?
-C VIA nano	 2.25  (2)		 2.5   (2)		Y
-
-C We try to do as many 16-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.
-
-C There are two inner-loops, one for when rp = ap (mod 16) and one when this is
-C not true.  The aligned case reads 16+8 bytes, the unaligned case reads
-C 16+8+X bytes, where X is 8 or 16 depending on how punpcklqdq is implemented.
-
-C This is not yet great code:
-C   (1) The unaligned case makes many reads.
-C   (2) We should do some unrolling, at least 2-way.
-C With 2-way unrolling but no scheduling we reach 1.5 c/l on K10 and 2 c/l on
-C Nano.
-
-C INPUT PARAMETERS
-define(`rp',  `%rdi')
-define(`ap',  `%rsi')
-define(`n',   `%rdx')
-define(`cnt', `%rcx')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_lshift)
-	movd	R32(%rcx), %xmm4
-	mov	$64, R32(%rax)
-	sub	R32(%rcx), R32(%rax)
-	movd	R32(%rax), %xmm5
-
-	neg	R32(%rcx)
-	mov	-8(ap,n,8), %rax
-	shr	R8(%rcx), %rax
-
-	cmp	$2, n
-	jle	L(le2)
-
-	lea	(rp,n,8), R32(%rcx)
-	test	$8, R8(%rcx)
-	je	L(rp_aligned)
-
-C Do one initial limb in order to make rp aligned
-	movq	-8(ap,n,8), %xmm0
-	movq	-16(ap,n,8), %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movq	%xmm0, -8(rp,n,8)
-	dec	n
-
-L(rp_aligned):
-	lea	(ap,n,8), R32(%rcx)
-	test	$8, R8(%rcx)
-	je	L(aent)
-	jmp	L(uent)
-C *****************************************************************************
-
-C Handle the case when ap != rp (mod 16).
-
-	ALIGN(16)
-L(utop):movdqa	-8(ap,n,8), %xmm0
-	movq	(ap,n,8), %xmm1
-	punpcklqdq  8(ap,n,8), %xmm1
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, (rp,n,8)
-L(uent):sub	$2, n
-	ja	L(utop)
-
-	jne	L(end8)
-
-	movq	(ap), %xmm1
-	pxor	%xmm0, %xmm0
-	punpcklqdq  %xmm1, %xmm0
-	punpcklqdq  8(ap), %xmm1
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, (rp)
-	ret
-C *****************************************************************************
-
-C Handle the case when ap = rp (mod 16).
-
-	ALIGN(16)
-L(atop):movdqa	(ap,n,8), %xmm0		C xmm0 = B*ap[n-1] + ap[n-2]
-	movq	-8(ap,n,8), %xmm1	C xmm1 = ap[n-3]
-	punpcklqdq  %xmm0, %xmm1	C xmm1 = B*ap[n-2] + ap[n-3]
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, (rp,n,8)
-L(aent):
-	sub	$2, n
-	ja	L(atop)
-	jne	L(end8)
-
-	movdqa	(ap), %xmm1
-	pxor	%xmm0, %xmm0
-	punpcklqdq  %xmm1, %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, (rp)
-	ret
-C *****************************************************************************
-
-	ALIGN(16)
-L(le2):	jne	L(end8)
-
-	movq	8(ap), %xmm0
-	movq	(ap), %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movq	%xmm0, 8(rp)
-
-L(end8):movq	(ap), %xmm0
-	psllq	%xmm4, %xmm0
-	movq	%xmm0, (rp)
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/lshiftc-movdqu2.asm b/gmp/mpn/x86_64/fastsse/lshiftc-movdqu2.asm
deleted file mode 100644
index 8250910c52..0000000000
--- a/gmp/mpn/x86_64/fastsse/lshiftc-movdqu2.asm
+++ /dev/null
@@ -1,193 +0,0 @@
-dnl  AMD64 mpn_lshiftc optimised for CPUs with fast SSE including fast movdqu.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb     cycles/limb     cycles/limb    good
-C              aligned	      unaligned	      best seen	   for cpu?
-C AMD K8,K9	 3		 3		 ?	  no, use shl/shr
-C AMD K10	 1.8-2.0	 1.8-2.0	 ?	  yes
-C AMD bd1	 1.9		 1.9		 ?	  yes
-C AMD bobcat	 3.67		 3.67			  yes, bad for n < 20
-C Intel P4	 4.75		 4.75		 ?	  no, slow movdqu
-C Intel core2	 2.27		 2.27		 ?	  no, use shld/shrd
-C Intel NHM	 2.15		 2.15		 ?	  no, use shld/shrd
-C Intel SBR	 1.45		 1.45		 ?	  yes, bad for n = 4-6
-C Intel atom	12.9		12.9		 ?	  no
-C VIA nano	 6.18		 6.44		 ?	  no, slow movdqu
-
-C We try to do as many aligned 16-byte operations as possible.  The top-most
-C and bottom-most writes might need 8-byte operations.
-C
-C This variant rely on fast load movdqu, and uses it even for aligned operands,
-C in order to avoid the need for two separate loops.
-C
-C TODO
-C  * Could 2-limb wind-down code be simplified?
-C  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
-C    for other affected CPUs.
-
-C INPUT PARAMETERS
-define(`rp',  `%rdi')
-define(`ap',  `%rsi')
-define(`n',   `%rdx')
-define(`cnt', `%rcx')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_lshiftc)
-	FUNC_ENTRY(4)
-	movd	R32(%rcx), %xmm4
-	mov	$64, R32(%rax)
-	sub	R32(%rcx), R32(%rax)
-	movd	R32(%rax), %xmm5
-
-	neg	R32(%rcx)
-	mov	-8(ap,n,8), %rax
-	shr	R8(%rcx), %rax
-
-	pcmpeqb	%xmm3, %xmm3		C set to 111...111
-
-	cmp	$3, n
-	jle	L(bc)
-
-	lea	(rp,n,8), R32(%rcx)
-	test	$8, R8(%rcx)
-	jz	L(rp_aligned)
-
-C Do one initial limb in order to make rp aligned
-	movq	-8(ap,n,8), %xmm0
-	movq	-16(ap,n,8), %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movq	%xmm0, -8(rp,n,8)
-	dec	n
-
-L(rp_aligned):
-	lea	1(n), %r8d
-
-	and	$6, R32(%r8)
-	jz	L(ba0)
-	cmp	$4, R32(%r8)
-	jz	L(ba4)
-	jc	L(ba2)
-L(ba6):	add	$-4, n
-	jmp	L(i56)
-L(ba0):	add	$-6, n
-	jmp	L(i70)
-L(ba4):	add	$-2, n
-	jmp	L(i34)
-L(ba2):	add	$-8, n
-	jle	L(end)
-
-	ALIGN(16)
-L(top):	movdqu	40(ap,n,8), %xmm1
-	movdqu	48(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movdqa	%xmm0, 48(rp,n,8)
-L(i70):
-	movdqu	24(ap,n,8), %xmm1
-	movdqu	32(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movdqa	%xmm0, 32(rp,n,8)
-L(i56):
-	movdqu	8(ap,n,8), %xmm1
-	movdqu	16(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movdqa	%xmm0, 16(rp,n,8)
-L(i34):
-	movdqu	-8(ap,n,8), %xmm1
-	movdqu	(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movdqa	%xmm0, (rp,n,8)
-	sub	$8, n
-	jg	L(top)
-
-L(end):	test	$1, R8(n)
-	jnz	L(end8)
-
-	movdqu	(ap), %xmm1
-	pxor	%xmm0, %xmm0
-	punpcklqdq  %xmm1, %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movdqa	%xmm0, (rp)
-	FUNC_EXIT()
-	ret
-
-C Basecase
-	ALIGN(16)
-L(bc):	dec	R32(n)
-	jz	L(end8)
-
-	movq	(ap,n,8), %xmm1
-	movq	-8(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movq	%xmm0, (rp,n,8)
-	sub	$2, R32(n)
-	jl	L(end8)
-	movq	8(ap), %xmm1
-	movq	(ap), %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	pxor	%xmm3, %xmm0
-	movq	%xmm0, 8(rp)
-
-L(end8):movq	(ap), %xmm0
-	psllq	%xmm4, %xmm0
-	pxor	%xmm3, %xmm0
-	movq	%xmm0, (rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/lshiftc.asm b/gmp/mpn/x86_64/fastsse/lshiftc.asm
deleted file mode 100644
index d2520690e2..0000000000
--- a/gmp/mpn/x86_64/fastsse/lshiftc.asm
+++ /dev/null
@@ -1,179 +0,0 @@
-dnl  AMD64 mpn_lshiftc optimised for CPUs with fast SSE.
-
-dnl  Contributed to the GNU project by David Harvey and Torbjorn Granlund.
-
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb	     cycles/limb	      good
-C          16-byte aligned         16-byte unaligned	    for cpu?
-C AMD K8,K9	 ?			 ?
-C AMD K10	 1.85  (1.635)		 1.9   (1.67)		Y
-C AMD bd1	 1.82  (1.75)		 1.82  (1.75)		Y
-C AMD bobcat	 4.5			 4.5
-C Intel P4	 3.6   (3.125)		 3.6   (3.125)		Y
-C Intel core2	 2.05  (1.67)		 2.55  (1.75)
-C Intel NHM	 2.05  (1.875)		 2.6   (2.25)
-C Intel SBR	 1.55  (1.44)		 2     (1.57)		Y
-C Intel atom	 ?			 ?
-C VIA nano	 2.5   (2.5)		 2.5   (2.5)		Y
-
-C We try to do as many 16-byte operations as possible.  The top-most and
-C bottom-most writes might need 8-byte operations.  We always write using
-C 16-byte operations, we read with both 8-byte and 16-byte operations.
-
-C There are two inner-loops, one for when rp = ap (mod 16) and one when this is
-C not true.  The aligned case reads 16+8 bytes, the unaligned case reads
-C 16+8+X bytes, where X is 8 or 16 depending on how punpcklqdq is implemented.
-
-C This is not yet great code:
-C   (1) The unaligned case makes too many reads.
-C   (2) We should do some unrolling, at least 2-way.
-C With 2-way unrolling but no scheduling we reach 1.5 c/l on K10 and 2 c/l on
-C Nano.
-
-C INPUT PARAMETERS
-define(`rp',  `%rdi')
-define(`ap',  `%rsi')
-define(`n',   `%rdx')
-define(`cnt', `%rcx')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_lshiftc)
-	movd	R32(%rcx), %xmm4
-	mov	$64, R32(%rax)
-	sub	R32(%rcx), R32(%rax)
-	movd	R32(%rax), %xmm5
-
-	neg	R32(%rcx)
-	mov	-8(ap,n,8), %rax
-	shr	R8(%rcx), %rax
-
-	pcmpeqb	%xmm7, %xmm7		C set to 111...111
-
-	cmp	$2, n
-	jle	L(le2)
-
-	lea	(rp,n,8), R32(%rcx)
-	test	$8, R8(%rcx)
-	je	L(rp_aligned)
-
-C Do one initial limb in order to make rp aligned
-	movq	-8(ap,n,8), %xmm0
-	movq	-16(ap,n,8), %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm7, %xmm0
-	movq	%xmm0, -8(rp,n,8)
-	dec	n
-
-L(rp_aligned):
-	lea	(ap,n,8), R32(%rcx)
-	test	$8, R8(%rcx)
-	je	L(aent)
-	jmp	L(uent)
-C *****************************************************************************
-
-C Handle the case when ap != rp (mod 16).
-
-	ALIGN(16)
-L(utop):movq	(ap,n,8), %xmm1
-	punpcklqdq  8(ap,n,8), %xmm1
-	movdqa	-8(ap,n,8), %xmm0
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp,n,8)
-L(uent):sub	$2, n
-	ja	L(utop)
-
-	jne	L(end8)
-
-	movq	(ap), %xmm1
-	pxor	%xmm0, %xmm0
-	punpcklqdq  %xmm1, %xmm0
-	punpcklqdq  8(ap), %xmm1
-	psllq	%xmm4, %xmm1
-	psrlq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	ret
-C *****************************************************************************
-
-C Handle the case when ap = rp (mod 16).
-
-	ALIGN(16)
-L(atop):movdqa	(ap,n,8), %xmm0		C xmm0 = B*ap[n-1] + ap[n-2]
-	movq	-8(ap,n,8), %xmm1	C xmm1 = ap[n-3]
-	punpcklqdq  %xmm0, %xmm1	C xmm1 = B*ap[n-2] + ap[n-3]
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp,n,8)
-L(aent):sub	$2, n
-	ja	L(atop)
-
-	jne	L(end8)
-
-	movdqa	(ap), %xmm0
-	pxor	%xmm1, %xmm1
-	punpcklqdq  %xmm0, %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm7, %xmm0
-	movdqa	%xmm0, (rp)
-	ret
-C *****************************************************************************
-
-	ALIGN(16)
-L(le2):	jne	L(end8)
-
-	movq	8(ap), %xmm0
-	movq	(ap), %xmm1
-	psllq	%xmm4, %xmm0
-	psrlq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	pxor	%xmm7, %xmm0
-	movq	%xmm0, 8(rp)
-
-L(end8):movq	(ap), %xmm0
-	psllq	%xmm4, %xmm0
-	pxor	%xmm7, %xmm0
-	movq	%xmm0, (rp)
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/rshift-movdqu2.asm b/gmp/mpn/x86_64/fastsse/rshift-movdqu2.asm
deleted file mode 100644
index 1e270b13c3..0000000000
--- a/gmp/mpn/x86_64/fastsse/rshift-movdqu2.asm
+++ /dev/null
@@ -1,201 +0,0 @@
-dnl  AMD64 mpn_rshift optimised for CPUs with fast SSE including fast movdqu.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb     cycles/limb     cycles/limb    good
-C              aligned	      unaligned	      best seen	   for cpu?
-C AMD K8,K9	 3		 3		 2.35	  no, use shl/shr
-C AMD K10	 1.5-1.8	 1.5-1.8	 1.33	  yes
-C AMD bd1	 1.7-1.9	 1.7-1.9	 1.33	  yes
-C AMD bobcat	 3.17		 3.17			  yes, bad for n < 20
-C Intel P4	 4.67		 4.67		 2.7	  no, slow movdqu
-C Intel core2	 2.15		 2.15		 1.25	  no, use shld/shrd
-C Intel NHM	 1.66		 1.66		 1.25	  no, use shld/shrd
-C Intel SBR	 1.3		 1.3		 1.25	  yes, bad for n = 4-6
-C Intel atom	11.7		11.7		 4.5	  no
-C VIA nano	 5.7		 5.95		 2.0	  no, slow movdqu
-
-C We try to do as many aligned 16-byte operations as possible.  The top-most
-C and bottom-most writes might need 8-byte operations.
-C
-C This variant rely on fast load movdqu, and uses it even for aligned operands,
-C in order to avoid the need for two separate loops.
-C
-C TODO
-C  * Could 2-limb wind-down code be simplified?
-C  * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
-C    for other affected CPUs.
-
-C INPUT PARAMETERS
-define(`rp',  `%rdi')
-define(`ap',  `%rsi')
-define(`n',   `%rdx')
-define(`cnt', `%rcx')
-
-ASM_START()
-	TEXT
-	ALIGN(64)
-PROLOGUE(mpn_rshift)
-	FUNC_ENTRY(4)
-	movd	R32(%rcx), %xmm4
-	mov	$64, R32(%rax)
-	sub	R32(%rcx), R32(%rax)
-	movd	R32(%rax), %xmm5
-
-	neg	R32(%rcx)
-	mov	(ap), %rax
-	shl	R8(%rcx), %rax
-
-	cmp	$3, n
-	jle	L(bc)
-
-	test	$8, R8(rp)
-	jz	L(rp_aligned)
-
-C Do one initial limb in order to make rp aligned
-	movq	(ap), %xmm0
-	movq	8(ap), %xmm1
-	psrlq	%xmm4, %xmm0
-	psllq	%xmm5, %xmm1
-	por	%xmm1, %xmm0
-	movq	%xmm0, (rp)
-	lea	8(ap), ap
-	lea	8(rp), rp
-	dec	n
-
-L(rp_aligned):
-	lea	1(n), %r8d
-	lea	(ap,n,8), ap
-	lea	(rp,n,8), rp
-	neg	n
-
-	and	$6, R32(%r8)
-	jz	L(bu0)
-	cmp	$4, R32(%r8)
-	jz	L(bu4)
-	jc	L(bu2)
-L(bu6):	add	$4, n
-	jmp	L(i56)
-L(bu0):	add	$6, n
-	jmp	L(i70)
-L(bu4):	add	$2, n
-	jmp	L(i34)
-L(bu2):	add	$8, n
-	jge	L(end)
-
-	ALIGN(16)
-L(top):	movdqu	-64(ap,n,8), %xmm1
-	movdqu	-56(ap,n,8), %xmm0
-	psllq	%xmm5, %xmm0
-	psrlq	%xmm4, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, -64(rp,n,8)
-L(i70):
-	movdqu	-48(ap,n,8), %xmm1
-	movdqu	-40(ap,n,8), %xmm0
-	psllq	%xmm5, %xmm0
-	psrlq	%xmm4, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, -48(rp,n,8)
-L(i56):
-	movdqu	-32(ap,n,8), %xmm1
-	movdqu	-24(ap,n,8), %xmm0
-	psllq	%xmm5, %xmm0
-	psrlq	%xmm4, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, -32(rp,n,8)
-L(i34):
-	movdqu	-16(ap,n,8), %xmm1
-	movdqu	-8(ap,n,8), %xmm0
-	psllq	%xmm5, %xmm0
-	psrlq	%xmm4, %xmm1
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, -16(rp,n,8)
-	add	$8, n
-	jl	L(top)
-
-L(end):	test	$1, R8(n)
-	jnz	L(e1)
-
-	movdqu	-16(ap), %xmm1
-	movq	-8(ap), %xmm0
-	psrlq	%xmm4, %xmm1
-	psllq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movdqa	%xmm0, -16(rp)
-	FUNC_EXIT()
-	ret
-
-L(e1):	movq	-8(ap), %xmm0
-	psrlq	%xmm4, %xmm0
-	movq	%xmm0, -8(rp)
-	FUNC_EXIT()
-	ret
-
-C Basecase
-	ALIGN(16)
-L(bc):	dec	R32(n)
-	jnz	1f
-	movq	(ap), %xmm0
-	psrlq	%xmm4, %xmm0
-	movq	%xmm0, (rp)
-	FUNC_EXIT()
-	ret
-
-1:	movq	(ap), %xmm1
-	movq	8(ap), %xmm0
-	psrlq	%xmm4, %xmm1
-	psllq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movq	%xmm0, (rp)
-	dec	R32(n)
-	jnz	1f
-	movq	8(ap), %xmm0
-	psrlq	%xmm4, %xmm0
-	movq	%xmm0, 8(rp)
-	FUNC_EXIT()
-	ret
-
-1:	movq	8(ap), %xmm1
-	movq	16(ap), %xmm0
-	psrlq	%xmm4, %xmm1
-	psllq	%xmm5, %xmm0
-	por	%xmm1, %xmm0
-	movq	%xmm0,	8(rp)
-	movq	16(ap), %xmm0
-	psrlq	%xmm4, %xmm0
-	movq	%xmm0, 16(rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fastsse/sec_tabselect.asm b/gmp/mpn/x86_64/fastsse/sec_tabselect.asm
deleted file mode 100644
index e3df110be4..0000000000
--- a/gmp/mpn/x86_64/fastsse/sec_tabselect.asm
+++ /dev/null
@@ -1,192 +0,0 @@
-dnl  AMD64 SSE mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb     cycles/limb     cycles/limb
-C	      ali,evn n	     unal,evn n	      other cases
-C AMD K8,K9	 1.65		1.65		 1.8
-C AMD K10	 0.78		0.78		 0.85
-C AMD bd1	 0.80		0.91		 1.25
-C AMD bobcat	 2.15		2.15		 2.37
-C Intel P4	 2.5		2.5		 2.95
-C Intel core2	 1.17		1.25		 1.25
-C Intel NHM	 0.87		0.90		 0.90
-C Intel SBR	 0.63		0.79		 0.77
-C Intel atom	 4.3		 4.3		 4.3	slower than plain code
-C VIA nano	 1.4		 5.1		 3.14	too alignment dependent
-
-C NOTES
-C  * We only honour the least significant 32 bits of the `which' and `nents'
-C    arguments to allow efficient code using just SSE2.  We would need to
-C    either use the SSE4_1 pcmpeqq, or find some other SSE2 sequence.
-C  * We use movd for copying between xmm and plain registers, since old gas
-C    rejects movq.  But gas assembles movd as movq when given a 64-bit greg.
-
-define(`rp',     `%rdi')
-define(`tp',     `%rsi')
-define(`n',      `%rdx')
-define(`nents',  `%rcx')
-define(`which',  `%r8')
-
-define(`i',      `%r10')
-define(`j',      `%r9')
-
-C rax  rbx  rcx  rdx  rdi  rsi  rbp   r8   r9  r10  r11  r12  r13  r14  r15
-C          nents  n   rp   tab       which j    i   temp  *    *    *    *
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-
-	movd	which, %xmm8
-	pshufd	$0, %xmm8, %xmm8	C 4 `which' copies
-	mov	$1, R32(%rax)
-	movd	%rax, %xmm9
-	pshufd	$0, %xmm9, %xmm9	C 4 copies of 1
-
-	mov	n, j
-	add	$-8, j
-	js	L(outer_end)
-
-L(outer_top):
-	mov	nents, i
-	mov	tp, %r11
-	pxor	%xmm13, %xmm13
-	pxor	%xmm4, %xmm4
-	pxor	%xmm5, %xmm5
-	pxor	%xmm6, %xmm6
-	pxor	%xmm7, %xmm7
-	ALIGN(16)
-L(top):	movdqa	%xmm8, %xmm0
-	pcmpeqd	%xmm13, %xmm0
-	paddd	%xmm9, %xmm13
-	movdqu	0(tp), %xmm2
-	movdqu	16(tp), %xmm3
-	pand	%xmm0, %xmm2
-	pand	%xmm0, %xmm3
-	por	%xmm2, %xmm4
-	por	%xmm3, %xmm5
-	movdqu	32(tp), %xmm2
-	movdqu	48(tp), %xmm3
-	pand	%xmm0, %xmm2
-	pand	%xmm0, %xmm3
-	por	%xmm2, %xmm6
-	por	%xmm3, %xmm7
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(top)
-
-	movdqu	%xmm4, 0(rp)
-	movdqu	%xmm5, 16(rp)
-	movdqu	%xmm6, 32(rp)
-	movdqu	%xmm7, 48(rp)
-
-	lea	64(%r11), tp
-	lea	64(rp), rp
-	add	$-8, j
-	jns	L(outer_top)
-L(outer_end):
-
-	test	$4, R8(n)
-	je	L(b0xx)
-L(b1xx):mov	nents, i
-	mov	tp, %r11
-	pxor	%xmm13, %xmm13
-	pxor	%xmm4, %xmm4
-	pxor	%xmm5, %xmm5
-	ALIGN(16)
-L(tp4):	movdqa	%xmm8, %xmm0
-	pcmpeqd	%xmm13, %xmm0
-	paddd	%xmm9, %xmm13
-	movdqu	0(tp), %xmm2
-	movdqu	16(tp), %xmm3
-	pand	%xmm0, %xmm2
-	pand	%xmm0, %xmm3
-	por	%xmm2, %xmm4
-	por	%xmm3, %xmm5
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(tp4)
-	movdqu	%xmm4, 0(rp)
-	movdqu	%xmm5, 16(rp)
-	lea	32(%r11), tp
-	lea	32(rp), rp
-
-L(b0xx):test	$2, R8(n)
-	je	L(b00x)
-L(b01x):mov	nents, i
-	mov	tp, %r11
-	pxor	%xmm13, %xmm13
-	pxor	%xmm4, %xmm4
-	ALIGN(16)
-L(tp2):	movdqa	%xmm8, %xmm0
-	pcmpeqd	%xmm13, %xmm0
-	paddd	%xmm9, %xmm13
-	movdqu	0(tp), %xmm2
-	pand	%xmm0, %xmm2
-	por	%xmm2, %xmm4
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(tp2)
-	movdqu	%xmm4, 0(rp)
-	lea	16(%r11), tp
-	lea	16(rp), rp
-
-L(b00x):test	$1, R8(n)
-	je	L(b000)
-L(b001):mov	nents, i
-	mov	tp, %r11
-	pxor	%xmm13, %xmm13
-	pxor	%xmm4, %xmm4
-	ALIGN(16)
-L(tp1):	movdqa	%xmm8, %xmm0
-	pcmpeqd	%xmm13, %xmm0
-	paddd	%xmm9, %xmm13
-	movq	0(tp), %xmm2
-	pand	%xmm0, %xmm2
-	por	%xmm2, %xmm4
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(tp1)
-	movq	%xmm4, 0(rp)
-
-L(b000):FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fat/fat.c b/gmp/mpn/x86_64/fat/fat.c
deleted file mode 100644
index 1b3f4e48be..0000000000
--- a/gmp/mpn/x86_64/fat/fat.c
+++ /dev/null
@@ -1,368 +0,0 @@
-/* x86_64 fat binary initializers.
-
-   Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
-   Torbjorn Granlund (port to x86_64)
-
-   THE FUNCTIONS AND VARIABLES IN THIS FILE ARE FOR INTERNAL USE ONLY.
-   THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
-   COMPLETELY IN FUTURE GNU MP RELEASES.
-
-Copyright 2003, 2004, 2009, 2011-2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include <stdio.h>    /* for printf */
-#include <stdlib.h>   /* for getenv */
-#include <string.h>
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Change this to "#define TRACE(x) x" for some traces. */
-#define TRACE(x)
-
-
-/* fat_entry.asm */
-long __gmpn_cpuid (char [12], int);
-
-
-#if WANT_FAKE_CPUID
-/* The "name"s in the table are values for the GMP_CPU_TYPE environment
-   variable.  Anything can be used, but for now it's the canonical cpu types
-   as per config.guess/config.sub.  */
-
-#define __gmpn_cpuid            fake_cpuid
-
-#define MAKE_FMS(family, model)						\
-  ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20)			\
-   + (((model) & 0xf) << 4) + (((model)  &  0xf0) << 12))
-
-static struct {
-  const char  *name;
-  const char  vendor[13];
-  unsigned    fms;
-} fake_cpuid_table[] = {
-  { "core2",      "GenuineIntel", MAKE_FMS (6, 0xf) },
-  { "coreinhm",   "GenuineIntel", MAKE_FMS (6, 0x1a) },
-  { "coreiwsm",   "GenuineIntel", MAKE_FMS (6, 0x25) },
-  { "coreisbr",   "GenuineIntel", MAKE_FMS (6, 0x2a) },
-  { "coreihwl",   "GenuineIntel", MAKE_FMS (6, 0x3c) },
-  { "atom",       "GenuineIntel", MAKE_FMS (6, 0x1c) },
-  { "pentium4",   "GenuineIntel", MAKE_FMS (15, 3) },
-
-  { "k8",         "AuthenticAMD", MAKE_FMS (15, 0) },
-  { "k10",        "AuthenticAMD", MAKE_FMS (16, 0) },
-  { "bobcat",     "AuthenticAMD", MAKE_FMS (20, 1) },
-  { "bulldozer",  "AuthenticAMD", MAKE_FMS (21, 1) },
-  { "piledriver", "AuthenticAMD", MAKE_FMS (21, 2) },
-  { "steamroller","AuthenticAMD", MAKE_FMS (21, 0x30) },
-  { "excavator",  "AuthenticAMD", MAKE_FMS (21, 0x60) },
-  { "jaguar",     "AuthenticAMD", MAKE_FMS (22, 1) },
-
-  { "nano",       "CentaurHauls", MAKE_FMS (6, 15) },
-};
-
-static int
-fake_cpuid_lookup (void)
-{
-  char  *s;
-  int   i;
-
-  s = getenv ("GMP_CPU_TYPE");
-  if (s == NULL)
-    {
-      printf ("Need GMP_CPU_TYPE environment variable for fake cpuid\n");
-      abort ();
-    }
-
-  for (i = 0; i < numberof (fake_cpuid_table); i++)
-    if (strcmp (s, fake_cpuid_table[i].name) == 0)
-      return i;
-
-  printf ("GMP_CPU_TYPE=%s unknown\n", s);
-  abort ();
-}
-
-static long
-fake_cpuid (char dst[12], unsigned int id)
-{
-  int  i = fake_cpuid_lookup();
-
-  switch (id) {
-  case 0:
-    memcpy (dst, fake_cpuid_table[i].vendor, 12);
-    return 0;
-  case 1:
-    return fake_cpuid_table[i].fms;
-  case 7:
-    dst[0] = 0xff;				/* BMI1, AVX2, etc */
-    dst[1] = 0xff;				/* BMI2, etc */
-    return 0;
-  case 0x80000001:
-    dst[4 + 29 / 8] = (1 << (29 % 8));		/* "long" mode */
-    return 0;
-  default:
-    printf ("fake_cpuid(): oops, unknown id %d\n", id);
-    abort ();
-  }
-}
-#endif
-
-
-typedef DECL_preinv_divrem_1 ((*preinv_divrem_1_t));
-typedef DECL_preinv_mod_1    ((*preinv_mod_1_t));
-
-struct cpuvec_t __gmpn_cpuvec = {
-  __MPN(add_n_init),
-  __MPN(addlsh1_n_init),
-  __MPN(addlsh2_n_init),
-  __MPN(addmul_1_init),
-  __MPN(addmul_2_init),
-  __MPN(bdiv_dbm1c_init),
-  __MPN(cnd_add_n_init),
-  __MPN(cnd_sub_n_init),
-  __MPN(com_init),
-  __MPN(copyd_init),
-  __MPN(copyi_init),
-  __MPN(divexact_1_init),
-  __MPN(divrem_1_init),
-  __MPN(gcd_1_init),
-  __MPN(lshift_init),
-  __MPN(lshiftc_init),
-  __MPN(mod_1_init),
-  __MPN(mod_1_1p_init),
-  __MPN(mod_1_1p_cps_init),
-  __MPN(mod_1s_2p_init),
-  __MPN(mod_1s_2p_cps_init),
-  __MPN(mod_1s_4p_init),
-  __MPN(mod_1s_4p_cps_init),
-  __MPN(mod_34lsub1_init),
-  __MPN(modexact_1c_odd_init),
-  __MPN(mul_1_init),
-  __MPN(mul_basecase_init),
-  __MPN(mullo_basecase_init),
-  __MPN(preinv_divrem_1_init),
-  __MPN(preinv_mod_1_init),
-  __MPN(redc_1_init),
-  __MPN(redc_2_init),
-  __MPN(rshift_init),
-  __MPN(sqr_basecase_init),
-  __MPN(sub_n_init),
-  __MPN(sublsh1_n_init),
-  __MPN(submul_1_init),
-  0
-};
-
-int __gmpn_cpuvec_initialized = 0;
-
-/* The following setups start with generic x86, then overwrite with
-   specifics for a chip, and higher versions of that chip.
-
-   The arrangement of the setups here will normally be the same as the $path
-   selections in configure.in for the respective chips.
-
-   This code is reentrant and thread safe.  We always calculate the same
-   decided_cpuvec, so if two copies of the code are running it doesn't
-   matter which completes first, both write the same to __gmpn_cpuvec.
-
-   We need to go via decided_cpuvec because if one thread has completed
-   __gmpn_cpuvec then it may be making use of the threshold values in that
-   vector.  If another thread is still running __gmpn_cpuvec_init then we
-   don't want it to write different values to those fields since some of the
-   asm routines only operate correctly up to their own defined threshold,
-   not an arbitrary value.  */
-
-void
-__gmpn_cpuvec_init (void)
-{
-  struct cpuvec_t  decided_cpuvec;
-  char vendor_string[13];
-  char dummy_string[12];
-  long fms;
-  int family, model;
-
-  TRACE (printf ("__gmpn_cpuvec_init:\n"));
-
-  memset (&decided_cpuvec, '\0', sizeof (decided_cpuvec));
-
-  CPUVEC_SETUP_x86_64;
-  CPUVEC_SETUP_fat;
-
-  __gmpn_cpuid (vendor_string, 0);
-  vendor_string[12] = 0;
-
-  fms = __gmpn_cpuid (dummy_string, 1);
-  family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
-  model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
-
-  /* Check extended feature flags */
-  __gmpn_cpuid (dummy_string, 0x80000001);
-  if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
-    abort (); /* longmode-capable-bit turned off! */
-
-  /*********************************************************/
-  /*** WARNING: keep this list in sync with config.guess ***/
-  /*********************************************************/
-  if (strcmp (vendor_string, "GenuineIntel") == 0)
-    {
-      switch (family)
-	{
-	case 6:
-	  switch (model)
-	    {
-	    case 0x0f:		/* Conroe Merom Kentsfield Allendale */
-	    case 0x10:
-	    case 0x11:
-	    case 0x12:
-	    case 0x13:
-	    case 0x14:
-	    case 0x15:
-	    case 0x16:
-	    case 0x17:		/* PNR Wolfdale Yorkfield */
-	    case 0x18:
-	    case 0x19:
-	    case 0x1d:		/* PNR Dunnington */
-	      CPUVEC_SETUP_core2;
-	      break;
-
-	    case 0x1c:		/* Atom Silverthorne */
-	    case 0x26:		/* Atom Lincroft */
-	    case 0x27:		/* Atom Saltwell? */
-	    case 0x36:		/* Atom Cedarview/Saltwell */
-	      CPUVEC_SETUP_atom;
-	      break;
-
-	    case 0x1a:		/* NHM Gainestown */
-	    case 0x1b:
-	    case 0x1e:		/* NHM Lynnfield/Jasper */
-	    case 0x1f:
-	    case 0x20:
-	    case 0x21:
-	    case 0x22:
-	    case 0x23:
-	    case 0x24:
-	    case 0x25:		/* WSM Clarkdale/Arrandale */
-	    case 0x28:
-	    case 0x29:
-	    case 0x2b:
-	    case 0x2c:		/* WSM Gulftown */
-	    case 0x2e:		/* NHM Beckton */
-	    case 0x2f:		/* WSM Eagleton */
-	    case 0x37:		/* Atom Silvermont */
-	    case 0x4d:		/* Atom Silvermont/Avoton */
-	      CPUVEC_SETUP_core2;
-	      CPUVEC_SETUP_coreinhm;
-	      break;
-
-	    case 0x2a:		/* SB */
-	    case 0x2d:		/* SBC-EP */
-	    case 0x3a:		/* IBR */
-	    case 0x3e:		/* IBR Ivytown */
-	      CPUVEC_SETUP_core2;
-	      CPUVEC_SETUP_coreinhm;
-	      CPUVEC_SETUP_coreisbr;
-	      break;
-	    case 0x3c:		/* Haswell client */
-	    case 0x3d:		/* Broadwell */
-	    case 0x3f:		/* Haswell server */
-	    case 0x45:		/* Haswell ULT */
-	    case 0x46:		/* Crystal Well */
-	    case 0x4f:		/* Broadwell server */
-	    case 0x56:		/* Broadwell microserver */
-	      CPUVEC_SETUP_core2;
-	      CPUVEC_SETUP_coreinhm;
-	      CPUVEC_SETUP_coreisbr;
-	      /* Some Haswells lack BMI2.  Let them appear as Sandybridges for
-		 now.  */
-	      __gmpn_cpuid (dummy_string, 7);
-	      if ((dummy_string[0 + 8 / 8] & (1 << (8 % 8))) != 0)
-		CPUVEC_SETUP_coreihwl;
-	      break;
-	    }
-	  break;
-
-	case 15:
-	  CPUVEC_SETUP_pentium4;
-	  break;
-	}
-    }
-  else if (strcmp (vendor_string, "AuthenticAMD") == 0)
-    {
-      switch (family)
-	{
-	case 0x0f:		/* k8 */
-	case 0x11:		/* "fam 11h", mix of k8 and k10 */
-	case 0x13:
-	case 0x17:
-	  CPUVEC_SETUP_k8;
-	  break;
-
-	case 0x10:		/* k10 */
-	case 0x12:		/* k10 (llano) */
-	  CPUVEC_SETUP_k8;
-	  CPUVEC_SETUP_k10;
-	  break;
-
-	case 0x14:		/* bobcat */
-	case 0x16:		/* jaguar */
-	  CPUVEC_SETUP_k8;
-	  CPUVEC_SETUP_k10;
-	  CPUVEC_SETUP_bobcat;
-	  break;
-
-	case 0x15:	    /* bulldozer, piledriver, steamroller, excavator */
-	  CPUVEC_SETUP_k8;
-	  CPUVEC_SETUP_k10;
-	  CPUVEC_SETUP_bd1;
-	}
-    }
-  else if (strcmp (vendor_string, "CentaurHauls") == 0)
-    {
-      switch (family)
-	{
-	case 6:
-	  if (model >= 15)
-	    CPUVEC_SETUP_nano;
-	  break;
-	}
-    }
-
-  /* There's no x86 generic mpn_preinv_divrem_1 or mpn_preinv_mod_1.
-     Instead default to the plain versions from whichever CPU we detected.
-     The function arguments are compatible, no need for any glue code.  */
-  if (decided_cpuvec.preinv_divrem_1 == NULL)
-    decided_cpuvec.preinv_divrem_1 =(preinv_divrem_1_t)decided_cpuvec.divrem_1;
-  if (decided_cpuvec.preinv_mod_1 == NULL)
-    decided_cpuvec.preinv_mod_1    =(preinv_mod_1_t)   decided_cpuvec.mod_1;
-
-  ASSERT_CPUVEC (decided_cpuvec);
-  CPUVEC_INSTALL (decided_cpuvec);
-
-  /* Set this once the threshold fields are ready.
-     Use volatile to prevent it getting moved.  */
-  *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
-}
diff --git a/gmp/mpn/x86_64/fat/fat_entry.asm b/gmp/mpn/x86_64/fat/fat_entry.asm
deleted file mode 100644
index 764e3d82f2..0000000000
--- a/gmp/mpn/x86_64/fat/fat_entry.asm
+++ /dev/null
@@ -1,204 +0,0 @@
-dnl  x86 fat binary entrypoints.
-
-dnl  Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
-dnl  Torbjorn Granlund (port to x86_64)
-
-dnl  Copyright 2003, 2009, 2011-2014 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-dnl  Forcibly disable profiling.
-dnl
-dnl  The entrypoints and inits are small enough not to worry about, the real
-dnl  routines arrived at will have any profiling.  Also, the way the code
-dnl  here ends with a jump means we won't work properly with the
-dnl  "instrument" profiling scheme anyway.
-
-define(`WANT_PROFILING',no)
-
-
-dnl  We define PRETEND_PIC as a helper symbol, the use it for suppressing
-dnl  normal, fast call code, since that triggers problems on Darwin and
-dnl  OpenBSD.
-
-ifdef(`DARWIN',
-`define(`PRETEND_PIC')')
-ifdef(`OPENBSD',
-`define(`PRETEND_PIC')')
-ifdef(`PIC',
-`define(`PRETEND_PIC')')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-	TEXT
-
-dnl  Usage: FAT_ENTRY(name, offset)
-dnl
-dnl  Emit a fat binary entrypoint function of the given name.  This is the
-dnl  normal entry for applications, eg. __gmpn_add_n.
-dnl
-dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
-dnl  the given "offset" (in bytes).
-dnl
-dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
-dnl  fine for all x86s.
-dnl
-dnl  For ELF/DARWIN PIC, the jumps are 20 bytes each, and are best aligned to
-dnl  16 to ensure at least the first two instructions don't cross a cache line
-dnl  boundary.
-dnl
-dnl  For DOS64, the jumps are 6 bytes.  The same form works also for GNU/Linux
-dnl  (at least with certain assembler/linkers) but FreeBSD 8.2 crashes.  Not
-dnl  tested on Darwin, Slowaris, NetBSD, etc.
-dnl
-dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
-dnl  grepping in configure, stopping that code trying to eval something with
-dnl  $1 in it.
-
-define(FAT_ENTRY,
-m4_assert_numargs(2)
-`ifdef(`HOST_DOS64',
-`	ALIGN(8)
-`'PROLOGUE($1)
-	jmp	*$2+GSYM_PREFIX`'__gmpn_cpuvec(%rip)
-EPILOGUE()
-',
-`	ALIGN(ifdef(`PIC',16,8))
-`'PROLOGUE($1)
-ifdef(`PRETEND_PIC',
-`	LEA(	GSYM_PREFIX`'__gmpn_cpuvec, %rax)
-	jmp	*$2(%rax)
-',`dnl non-PIC
-	jmp	*GSYM_PREFIX`'__gmpn_cpuvec+$2
-')
-EPILOGUE()
-')')
-
-
-dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
-dnl
-
-define(`CPUVEC_offset',0)
-foreach(i,
-`FAT_ENTRY(MPN(i),CPUVEC_offset)
-define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
-CPUVEC_FUNCS_LIST)
-
-
-dnl  Usage: FAT_INIT(name, offset)
-dnl
-dnl  Emit a fat binary initializer function of the given name.  These
-dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
-dnl
-dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
-dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
-dnl  __gmpn_cpuvec_init will have stored the address of the selected
-dnl  implementation there.
-dnl
-dnl  Only one of these routines will be executed, and only once, since after
-dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
-dnl  need for anything special here, just something small and simple.  To
-dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
-dnl  with the offset in %al.  %al is used since the movb instruction is 2
-dnl  bytes where %eax would be 4.
-dnl
-dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
-dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
-dnl  something with $1 in it.
-dnl
-dnl  We need to preserve parameter registers over the __gmpn_cpuvec_init call
-
-define(FAT_INIT,
-m4_assert_numargs(2)
-`PROLOGUE($1)
-	mov	$`'$2, %al
-	jmp	L(fat_init)
-EPILOGUE()
-')
-
-dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
-dnl
-
-define(`CPUVEC_offset',0)
-foreach(i,
-`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
-define(`CPUVEC_offset',eval(CPUVEC_offset + 1))',
-CPUVEC_FUNCS_LIST)
-
-L(fat_init):
-	C al	__gmpn_cpuvec byte offset
-
-	movzbl	%al, %eax
-IFSTD(`	push	%rdi	')
-IFSTD(`	push	%rsi	')
-	push	%rdx
-	push	%rcx
-	push	%r8
-	push	%r9
-	push	%rax
-	CALL(	__gmpn_cpuvec_init)
-	pop	%rax
-	pop	%r9
-	pop	%r8
-	pop	%rcx
-	pop	%rdx
-IFSTD(`	pop	%rsi	')
-IFSTD(`	pop	%rdi	')
-ifdef(`PRETEND_PIC',`
-	LEA(	GSYM_PREFIX`'__gmpn_cpuvec, %r10)
-	jmp	*(%r10,%rax,8)
-',`dnl non-PIC
-	jmp	*GSYM_PREFIX`'__gmpn_cpuvec(,%rax,8)
-')
-
-
-C long __gmpn_cpuid (char dst[12], int id);
-C
-C This is called only 3 times, so just something simple and compact is fine.
-C
-C The rcx/ecx zeroing here is needed for the BMI2 check.
-
-define(`rp',  `%rdi')
-define(`idx', `%rsi')
-
-PROLOGUE(__gmpn_cpuid)
-	FUNC_ENTRY(2)
-	mov	%rbx, %r8
-	mov	R32(idx), R32(%rax)
-	xor	%ecx, %ecx
-	cpuid
-	mov	%ebx, (rp)
-	mov	%edx, 4(rp)
-	mov	%ecx, 8(rp)
-	mov	%r8, %rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/fat/gmp-mparam.h b/gmp/mpn/x86_64/fat/gmp-mparam.h
deleted file mode 100644
index 005c893635..0000000000
--- a/gmp/mpn/x86_64/fat/gmp-mparam.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Fat binary x86_64 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2003, 2009, 2011 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-
-/* mpn_divexact_1 is faster than mpn_divrem_1 at all sizes.  The only time
-   this might not be true currently is for actual 80386 and 80486 chips,
-   where mpn/x86/dive_1.asm might be slower than mpn/x86/divrem_1.asm, but
-   that's not worth worrying about.  */
-#define DIVEXACT_1_THRESHOLD  0
-
-/* Only some of the x86s have an mpn_preinv_divrem_1, but we set
-   USE_PREINV_DIVREM_1 so that all callers use it, and then let the
-   __gmpn_cpuvec pointer go to plain mpn_divrem_1 if there's not an actual
-   preinv.  */
-#define USE_PREINV_DIVREM_1   1
-
-#define BMOD_1_TO_MOD_1_THRESHOLD           20
-
-/* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
-   for mpn_sqr to call the latter.  */
-#define SQR_BASECASE_THRESHOLD 0
-
-/* Sensible fallbacks for these, when not taken from a cpu-specific
-   gmp-mparam.h.  */
-#define MUL_TOOM22_THRESHOLD      20
-#define MUL_TOOM33_THRESHOLD     130
-#define SQR_TOOM2_THRESHOLD       30
-#define SQR_TOOM3_THRESHOLD      200
-
-/* These are values more or less in the middle of what the typical x86 chips
-   come out as.  For a fat binary it's necessary to have values for these,
-   since the defaults for MUL_FFT_TABLE and SQR_FFT_TABLE otherwise come out
-   as non-constant array initializers.  FIXME: Perhaps these should be done
-   in the cpuvec structure like other thresholds.  */
-#define MUL_FFT_TABLE  { 464, 928, 1920, 3584, 10240, 40960, 0 }
-#define MUL_FFT_MODF_THRESHOLD          400
-#define MUL_FFT_THRESHOLD              2000
-
-#define SQR_FFT_TABLE  { 528, 1184, 1920, 4608, 14336, 40960, 0 }
-#define SQR_FFT_MODF_THRESHOLD          500
-#define SQR_FFT_THRESHOLD              3000
diff --git a/gmp/mpn/x86_64/fat/mod_1.c b/gmp/mpn/x86_64/fat/mod_1.c
deleted file mode 100644
index 4f149cc353..0000000000
--- a/gmp/mpn/x86_64/fat/mod_1.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mod_1.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/mod_1.c"
diff --git a/gmp/mpn/x86_64/fat/mul_basecase.c b/gmp/mpn/x86_64/fat/mul_basecase.c
deleted file mode 100644
index d9eb4718c2..0000000000
--- a/gmp/mpn/x86_64/fat/mul_basecase.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mul_basecase.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/mul_basecase.c"
diff --git a/gmp/mpn/x86_64/fat/mullo_basecase.c b/gmp/mpn/x86_64/fat/mullo_basecase.c
deleted file mode 100644
index 7f86be64c5..0000000000
--- a/gmp/mpn/x86_64/fat/mullo_basecase.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mullo_basecase.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/mullo_basecase.c"
diff --git a/gmp/mpn/x86_64/fat/redc_1.c b/gmp/mpn/x86_64/fat/redc_1.c
deleted file mode 100644
index 0025403353..0000000000
--- a/gmp/mpn/x86_64/fat/redc_1.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_redc_1.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/redc_1.c"
diff --git a/gmp/mpn/x86_64/fat/redc_2.c b/gmp/mpn/x86_64/fat/redc_2.c
deleted file mode 100644
index 1932d58323..0000000000
--- a/gmp/mpn/x86_64/fat/redc_2.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_redc_2.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/redc_2.c"
diff --git a/gmp/mpn/x86_64/fat/sqr_basecase.c b/gmp/mpn/x86_64/fat/sqr_basecase.c
deleted file mode 100644
index d1c5dcd2e0..0000000000
--- a/gmp/mpn/x86_64/fat/sqr_basecase.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_sqr_basecase.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "mpn/generic/sqr_basecase.c"
diff --git a/gmp/mpn/x86_64/gcd_1.asm b/gmp/mpn/x86_64/gcd_1.asm
deleted file mode 100644
index 252d4174eb..0000000000
--- a/gmp/mpn/x86_64/gcd_1.asm
+++ /dev/null
@@ -1,163 +0,0 @@
-dnl  AMD64 mpn_gcd_1 -- mpn by 1 gcd.
-
-dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
-dnl  Granlund.
-
-dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/bit (approx)
-C AMD K8,K9	 5.21                 (4.95)
-C AMD K10	 5.15                 (5.00)
-C AMD bd1	 5.42                 (5.14)
-C AMD bobcat	 6.71                 (6.56)
-C Intel P4	13.5                 (12.75)
-C Intel core2	 6.20                 (6.16)
-C Intel NHM	 6.49                 (6.25)
-C Intel SBR	 7.75                 (7.57)
-C Intel atom	 8.77                 (8.54)
-C VIA nano	 6.60                 (6.20)
-C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
-
-C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-
-deflit(MAXSHIFT, 7)
-deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
-
-DEF_OBJECT(ctz_table,64)
-	.byte	MAXSHIFT
-forloop(i,1,MASK,
-`	.byte	m4_count_trailing_zeros(i)
-')
-END_OBJECT(ctz_table)
-
-C Threshold of when to call bmod when U is one limb.  Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 8)
-
-C INPUT PARAMETERS
-define(`up',    `%rdi')
-define(`n',     `%rsi')
-define(`v0',    `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFDOS(`define(`STACK_ALLOC', 40)')
-IFSTD(`define(`STACK_ALLOC', 8)')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_gcd_1)
-	FUNC_ENTRY(3)
-	mov	(up), %rax		C U low limb
-	mov	$-1, R32(%rcx)
-	or	v0, %rax		C x | y
-
-L(twos):
-	inc	R32(%rcx)
-	shr	%rax
-	jnc	L(twos)
-
-	shr	R8(%rcx), v0
-	push	%rcx			C common twos
-
-L(divide_strip_y):
-	shr	v0
-	jnc	L(divide_strip_y)
-	adc	v0, v0
-
-	cmp	$1, n
-	jnz	L(reduce_nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
-	mov	(up), %r8
-	mov	%r8, %rax
-	shr	$BMOD_THRES_LOG2, %r8
-	cmp	%r8, v0
-	ja	L(noreduce)
-	push	v0
-	sub	$STACK_ALLOC, %rsp	C maintain ABI required rsp alignment
-
-L(bmod):
-IFDOS(`	mov	%rdx, %r8	')
-IFDOS(`	mov	%rsi, %rdx	')
-IFDOS(`	mov	%rdi, %rcx	')
-	CALL(	mpn_modexact_1_odd)
-
-L(reduced):
-	add	$STACK_ALLOC, %rsp
-	pop	%rdx
-
-L(noreduce):
-	LEA(	ctz_table, %rsi)
-	test	%rax, %rax
-	mov	%rax, %rcx
-	jnz	L(mid)
-	jmp	L(end)
-
-L(reduce_nby1):
-	push	v0
-	sub	$STACK_ALLOC, %rsp	C maintain ABI required rsp alignment
-
-	cmp	$BMOD_1_TO_MOD_1_THRESHOLD, n
-	jl	L(bmod)
-IFDOS(`	mov	%rdx, %r8	')
-IFDOS(`	mov	%rsi, %rdx	')
-IFDOS(`	mov	%rdi, %rcx	')
-	CALL(	mpn_mod_1)
-	jmp	L(reduced)
-
-	ALIGN(16)			C               K8    BC    P4    NHM   SBR
-L(top):	cmovc	%rcx, %rax		C if x-y < 0	0
-	cmovc	%rdi, %rdx		C use x,y-x	0
-L(mid):	and	$MASK, R32(%rcx)	C		0
-	movzbl	(%rsi,%rcx), R32(%rcx)	C		1
-	jz	L(shift_alot)		C		1
-	shr	R8(%rcx), %rax		C		3
-	mov	%rax, %rdi		C		4
-	mov	%rdx, %rcx		C		3
-	sub	%rax, %rcx		C		4
-	sub	%rdx, %rax		C		4
-	jnz	L(top)			C		5
-
-L(end):	pop	%rcx
-	mov	%rdx, %rax
-	shl	R8(%rcx), %rax
-	FUNC_EXIT()
-	ret
-
-L(shift_alot):
-	shr	$MAXSHIFT, %rax
-	mov	%rax, %rcx
-	jmp	L(mid)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/gmp-mparam.h b/gmp/mpn/x86_64/gmp-mparam.h
index 0dea8c94cd..5e2ed40332 100644
--- a/gmp/mpn/x86_64/gmp-mparam.h
+++ b/gmp/mpn/x86_64/gmp-mparam.h
@@ -1,218 +1,79 @@
-/* AMD K8-K10 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2010, 2012 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
-or both in parallel, as here.
+/* 2200 MHz Opteron / rev A / 1024 Kibyte cache / socket 940 */
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        14
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        28
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           15
-
-#define MUL_TOOM22_THRESHOLD                27
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               234
-#define MUL_TOOM6H_THRESHOLD               418
-#define MUL_TOOM8H_THRESHOLD               466
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     145
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     175
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 36
-#define SQR_TOOM3_THRESHOLD                117
-#define SQR_TOOM4_THRESHOLD                327
-#define SQR_TOOM6_THRESHOLD                446
-#define SQR_TOOM8_THRESHOLD                547
-
-#define MULMID_TOOM42_THRESHOLD             36
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define POWM_SEC_TABLE  2,67,322,991
-
-#define MUL_FFT_MODF_THRESHOLD             570  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     25, 8}, {     13, 7}, {     29, 8}, \
-    {     15, 7}, {     31, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
-    {     23, 7}, {     47, 8}, {     25, 7}, {     51, 8}, \
-    {     29, 9}, {     15, 8}, {     37, 9}, {     19, 8}, \
-    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
-    {     55,10}, {     15, 9}, {     43,10}, {     23, 9}, \
-    {     55,10}, {     31, 9}, {     63, 5}, {   1023, 4}, \
-    {   2431, 5}, {   1279, 6}, {    671, 7}, {    367, 8}, \
-    {    189, 9}, {     95, 8}, {    195, 9}, {    111,11}, \
-    {     31, 9}, {    131,10}, {     71, 9}, {    155,10}, \
-    {     79, 9}, {    159,10}, {     87,11}, {     47,10}, \
-    {    111,11}, {     63,10}, {    135,11}, {     79,10}, \
-    {    167,11}, {     95,10}, {    191,11}, {    111,12}, \
-    {     63,11}, {    143,10}, {    287,11}, {    159,10}, \
-    {    319,11}, {    175,12}, {     95,11}, {    207,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    543,11}, \
-    {    287,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703,12}, \
-    {    191,11}, {    383,10}, {    767,11}, {    415,12}, \
-    {    223,13}, {    127,12}, {    255,11}, {    543,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,12}, \
-    {    319,11}, {    639,10}, {   1279,11}, {    671,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,14}, \
-    {    127,13}, {    255,12}, {    543,11}, {   1087,12}, \
-    {    607,11}, {   1215,13}, {    319,12}, {    671,11}, \
-    {   1343,12}, {    735,13}, {    383,12}, {    767,11}, \
-    {   1535,12}, {    799,11}, {   1599,12}, {    831,13}, \
-    {    447,12}, {    895,11}, {   1791,12}, {    959,14}, \
-    {    255,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
-    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
-    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1855,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,14}, {   1279,13}, {   2687,14}, {   1407,15}, \
-    {    767,14}, {   1535,13}, {   3071,14}, {   1791,16}, \
-    {    511,15}, {   1023,14}, {   2431,15}, {   1279,14}, \
-    {   2815,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
-    {   3583,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 185
-#define MUL_FFT_THRESHOLD                 7552
-
-#define SQR_FFT_MODF_THRESHOLD             460  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    460, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     27, 7}, {     14, 6}, \
-    {     29, 7}, {     15, 6}, {     31, 7}, {     29, 8}, \
-    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
-    {     25, 7}, {     51, 8}, {     29, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     27, 8}, {     55,10}, {     15, 9}, \
-    {     31, 8}, {     63, 9}, {     43,10}, {     23, 9}, \
-    {     55,11}, {     15,10}, {     31, 9}, {     71,10}, \
-    {     39, 9}, {     83,10}, {     47, 6}, {    767, 4}, \
-    {   3263, 5}, {   1727, 4}, {   3455, 5}, {   1791, 6}, \
-    {    927, 7}, {    479, 6}, {    959, 7}, {    511, 8}, \
-    {    271, 9}, {    147,10}, {     87,11}, {     47,10}, \
-    {     95,12}, {     31,11}, {     63,10}, {    135,11}, \
-    {     79,10}, {    167,11}, {     95,10}, {    191,11}, \
-    {    111,12}, {     63,11}, {    127,10}, {    255,11}, \
-    {    143,10}, {    287, 9}, {    575,10}, {    303,11}, \
-    {    159,12}, {     95,11}, {    191,10}, {    383, 9}, \
-    {    767,10}, {    399,11}, {    207,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511,11}, {    271,10}, \
-    {    543,11}, {    287,10}, {    575,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,10}, {    831,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    511,10}, {   1023,11}, \
-    {    543,12}, {    287,11}, {    575,10}, {   1151,11}, \
-    {    607,10}, {   1215,12}, {    319,11}, {    639,10}, \
-    {   1279,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
-    {    575,11}, {   1151,12}, {    607,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
-    {    703,11}, {   1407,12}, {    735,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    799,11}, {   1599,12}, \
-    {    831,13}, {    447,12}, {    959,14}, {    255,13}, \
-    {    511,12}, {   1087,13}, {    575,12}, {   1215,13}, \
-    {    639,12}, {   1343,13}, {    703,12}, {   1407,14}, \
-    {    383,13}, {    767,12}, {   1599,13}, {    831,12}, \
-    {   1663,13}, {    895,12}, {   1791,13}, {    959,15}, \
-    {    255,14}, {    511,13}, {   1087,12}, {   2175,13}, \
-    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1855,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,14}, \
-    {   1279,13}, {   2559,14}, {   1407,15}, {    767,14}, \
-    {   1535,13}, {   3071,14}, {   1791,16}, {    511,15}, \
-    {   1023,14}, {   2303,15}, {   1279,14}, {   2687,15}, \
-    {   1535,14}, {   3199,15}, {   1791,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 203
-#define SQR_FFT_THRESHOLD                 5248
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  35
-#define MULLO_MUL_N_THRESHOLD            15604
-
-#define DC_DIV_QR_THRESHOLD                 56
-#define DC_DIVAPPR_Q_THRESHOLD             220
-#define DC_BDIV_QR_THRESHOLD                52
-#define DC_BDIV_Q_THRESHOLD                152
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 214
-
-#define BINV_NEWTON_THRESHOLD              327
-#define REDC_1_TO_REDC_2_THRESHOLD           4
-#define REDC_2_TO_REDC_N_THRESHOLD          79
-
-#define MU_DIV_QR_THRESHOLD               1895
-#define MU_DIVAPPR_Q_THRESHOLD            1895
-#define MUPI_DIV_QR_THRESHOLD              106
-#define MU_BDIV_QR_THRESHOLD              1589
-#define MU_BDIV_Q_THRESHOLD               1718
-
-#define MATRIX22_STRASSEN_THRESHOLD         16
-#define HGCD_THRESHOLD                     125
-#define HGCD_APPR_THRESHOLD                173
-#define HGCD_REDUCE_THRESHOLD             3524
-#define GCD_DC_THRESHOLD                   555
-#define GCDEXT_DC_THRESHOLD                478
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        28
-#define SET_STR_DC_THRESHOLD               248
-#define SET_STR_PRECOMPUTE_THRESHOLD      1648
-
-#define FAC_DSC_THRESHOLD                 1075
-#define FAC_ODD_THRESHOLD                    0  /* always */
+/* Generated by tuneup.c, 2009-01-14, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          28
+#define MUL_TOOM3_THRESHOLD              97
+#define MUL_TOOM44_THRESHOLD            406
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          38
+#define SQR_TOOM3_THRESHOLD             133
+#define SQR_TOOM4_THRESHOLD             547
+
+#define MULLOW_BASECASE_THRESHOLD        27
+#define MULLOW_DC_THRESHOLD              28
+#define MULLOW_MUL_N_THRESHOLD          199
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 74
+#define POWM_THRESHOLD                  146
+
+#define MATRIX22_STRASSEN_THRESHOLD      24
+#define HGCD_THRESHOLD                  143
+#define GCD_DC_THRESHOLD                529
+#define GCDEXT_DC_THRESHOLD             639
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 4
+#define MOD_1_2_THRESHOLD                 7
+#define MOD_1_4_THRESHOLD                64
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             18
+#define GET_STR_PRECOMPUTE_THRESHOLD     32
+#define SET_STR_DC_THRESHOLD            248
+#define SET_STR_PRECOMPUTE_THRESHOLD   2124
+
+#define MUL_FFT_TABLE  { 432, 928, 2624, 3840, 11264, 36864, 147456, 327680, 0 }
+#define MUL_FFT_MODF_THRESHOLD          656
+#define MUL_FFT_THRESHOLD              7936
+
+#define SQR_FFT_TABLE  { 432, 928, 2368, 4352, 11264, 28672, 114688, 327680, 0 }
+#define SQR_FFT_MODF_THRESHOLD          560
+#define SQR_FFT_THRESHOLD              7936
+
+#define MUL_FFT_TABLE2 {{1,4}, {337,5}, {673,6}, {1729,7}, {1793,6}, {2017,7}, {5633,8}, {11009,9}, {11777,8}, {14593,9}, {15873,8}, {16897,9}, {22017,10}, {23553,9}, {29697,10}, {31745,9}, {36353,10}, {39937,9}, {44545,10}, {48129,9}, {50689,10}, {56833,11}, {63489,10}, {78337,11}, {79873,10}, {86017,11}, {88065,10}, {92161,11}, {96257,10}, {106497,11}, {129025,10}, {141313,11}, {145409,10}, {146433,11}, {161793,10}, {167937,11}, {227329,12}, {258049,11}, {326657,12}, {389121,11}, {424961,13}, {516097,12}, {520193,11}, {528385,10}, {538625,11}, {547841,10}, {552961,11}, {587777,12}, {651265,11}, {718849,10}, {719873,12}, {782337,11}, {787457,10}, {791553,11}, {796673,10}, {802817,11}, {849921,10}, {850945,12}, {913409,11}, {915457,13}, {1040385,12}, {1044481,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,12}, {1437697,13}, {1564673,12}, {1568769,11}, {1581057,12}, {1585153,11}, {1595393,12}, {1597441,11}, {1630209,12}, {1699841,11}, {1761281,12}, {1830913,14}, {2080769,13}, {2088961,12}, {2486273,13}, {2613249,12}, {3010561,13}, {3137537,12}, {3534849,13}, {3661825,12}, {3928065,13}, {3964929,14}, {4014081,13}, {4046849,14}, {4136961,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4976641,13}, {5234689,12}, {5238785,13}, {5349377,12}, {5353473,13}, {5758977,12}, {5763073,14}, {6275073,13}, {7856129,14}, {8372225,13}, {9953281,14}, {10469377,13}, {12050433,14}, {12566529,13}, {13623297,14}, {14663681,13}, {15196161,15}, {16744449,14}, {16760833,13}, {17293313,14}, {18857985,13}, {19394561,14}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {305,5}, {609,6}, {1601,7}, {4737,8}, {4865,7}, {5121,8}, {11009,9}, {11777,8}, {13057,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {22017,10}, {23553,9}, {28161,10}, {31745,9}, {36353,10}, {39937,9}, {42497,10}, {56321,11}, {63489,10}, {89601,11}, {96257,10}, {107521,12}, {126977,11}, {129025,10}, {135169,11}, {137217,10}, {139265,11}, {163841,10}, {173057,11}, {195073,9}, {196097,11}, {196609,10}, {201729,11}, {212993,12}, {217089,11}, {221185,12}, {258049,11}, {260609,10}, {261121,9}, {261633,11}, {292865,10}, {296961,11}, {299009,10}, {302081,11}, {325633,12}, {389121,11}, {392193,9}, {392705,11}, {393217,13}, {401409,11}, {404481,13}, {421889,11}, {424961,13}, {516097,12}, {520193,11}, {526337,10}, {532481,11}, {542721,10}, {543745,11}, {593921,12}, {598017,11}, {608257,12}, {610305,11}, {616449,12}, {651265,11}, {653313,10}, {687617,11}, {718849,10}, {749569,12}, {782337,11}, {784385,10}, {788481,11}, {793601,10}, {800769,11}, {802817,10}, {813057,11}, {850945,12}, {913409,11}, {917505,13}, {1040385,12}, {1044481,11}, {1113089,12}, {1175553,11}, {1243137,12}, {1309697,11}, {1347585,12}, {1351681,11}, {1368065,12}, {1437697,11}, {1503233,13}, {1564673,12}, {1568769,11}, {1628161,12}, {1839105,14}, {1851393,12}, {1884161,14}, {2080769,13}, {2088961,12}, {2488321,13}, {2613249,12}, {3010561,13}, {3137537,12}, {3403777,13}, {3661825,12}, {3928065,14}, {4177921,13}, {4186113,12}, {4452353,13}, {4710401,12}, {4976641,13}, {5234689,12}, {5500929,13}, {5758977,12}, {5763073,14}, {6275073,13}, {6283265,12}, {6549505,13}, {7856129,15}, {8011777,14}, {8060929,15}, {8355841,14}, {8372225,13}, {9953281,14}, {10469377,13}, {12050433,14}, {12566529,13}, {13623297,14}, {14663681,13}, {15196161,15}, {16744449,14}, {16760833,13}, {17293313,14}, {23052289,15}, {25133057,14}, {29343745,16}, {MP_SIZE_T_MAX,0}}
 
+#define INV_NEWTON_THRESHOLD             47
+#define BINV_NEWTON_THRESHOLD            18
diff --git a/gmp/mpn/x86_64/invert_limb.asm b/gmp/mpn/x86_64/invert_limb.asm
index cc79b89a2b..44fb83bd10 100644
--- a/gmp/mpn/x86_64/invert_limb.asm
+++ b/gmp/mpn/x86_64/invert_limb.asm
@@ -1,115 +1,121 @@
 dnl  AMD64 mpn_invert_limb -- Invert a normalized limb.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
-
-dnl  Copyright 2004, 2007-2009, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb (approx)	div
-C AMD K8,K9	 48			 71
-C AMD K10	 48			 77
-C Intel P4	135			161
-C Intel core2	 69			116
-C Intel corei	 55			 89
-C Intel atom	129			191
-C VIA nano	 79			157
+C K8:		 40			 71
+C P4:		141			161
+C P6-15 (Core2): 63			116
+C P6-28 (Atom): 130			191
 
 C rax rcx rdx rdi rsi r8
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-PROTECT(`mpn_invert_limb_table')
 
 ASM_START()
 	TEXT
 	ALIGN(16)
-PROLOGUE(mpn_invert_limb)		C			Kn	C2	Ci
-	FUNC_ENTRY(1)
-	mov	%rdi, %rax		C			 0	 0	 0
-	shr	$55, %rax		C			 1	 1	 1
+PROLOGUE(mpn_invert_limb)
+	mov	%rdi, %rax
+	shr	$55, %rax
 ifdef(`PIC',`
 ifdef(`DARWIN',`
-	mov	mpn_invert_limb_table@GOTPCREL(%rip), %r8
+	mov	approx_tab@GOTPCREL(%rip), %r8
 	add	$-512, %r8
 ',`
-	lea	-512+mpn_invert_limb_table(%rip), %r8
+	lea	-512+approx_tab(%rip), %r8
 ')',`
-	movabs	$-512+mpn_invert_limb_table, %r8
+	movabs	$-512+approx_tab, %r8
 ')
-	movzwl	(%r8,%rax,2), R32(%rcx)	C	%rcx = v0
-
-	C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
-	mov	%rdi, %rsi		C			 0	 0	 0
-	mov	R32(%rcx), R32(%rax)	C			 4	 5	 5
-	imul	R32(%rcx), R32(%rcx)	C			 4	 5	 5
-	shr	$24, %rsi		C			 1	 1	 1
-	inc	%rsi			C	%rsi = d40
-	imul	%rsi, %rcx		C			 8	10	 8
-	shr	$40, %rcx		C			12	15	11
-	sal	$11, R32(%rax)		C			 5	 6	 6
-	dec	R32(%rax)
-	sub	R32(%rcx), R32(%rax)	C	%rax = v1
-
-	C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
-	mov	$0x1000000000000000, %rcx
-	imul	%rax, %rsi		C			14	17	13
-	sub	%rsi, %rcx
-	imul	%rax, %rcx
-	sal	$13, %rax
-	shr	$47, %rcx
-	add	%rax, %rcx		C	%rcx = v2
-
-	C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + ((v2 >> 1) & mask)) >> 65
-	mov	%rdi, %rsi		C			 0	 0	 0
-	shr	%rsi			C d/2
-	sbb	%rax, %rax		C -d0 = -(d mod 2)
-	sub	%rax, %rsi		C d63 = ceil(d/2)
-	imul	%rcx, %rsi		C v2 * d63
-	and	%rcx, %rax		C v2 * d0
-	shr	%rax			C (v2>>1) * d0
-	sub	%rsi, %rax		C (v2>>1) * d0 - v2 * d63
-	mul	%rcx
-	sal	$31, %rcx
-	shr	%rdx
-	add	%rdx, %rcx		C	%rcx = v3
-
-	mov	%rdi, %rax
-	mul	%rcx
-	add	%rdi, %rax
+	movzwl	(%r8,%rax,2), R32(%rcx)
+	mov	%rdi, %rsi
+	mov	R32(%rcx), R32(%rax)
+	imul	R32(%rcx), R32(%rcx)
+	shr	$32, %rsi
+	imul	%rsi, %rcx
+	shr	$31, %rcx
+	sal	$17, %rax
+	sub	%rcx, %rax
+	mov	%rax, %r8
+	imul	%rax, %rax
+	sal	$33, %r8
+	mul	%rdi
+	neg	%rdx
+	lea	(%r8,%rdx,2), %rax
+	mov	%rax, %r8
+	mul	%rax
+	mov	%rax, %rcx
+	mov	%rdx, %rax
+	mul	%rdi
+	mov	%rax, %rsi
 	mov	%rcx, %rax
-	adc	%rdi, %rdx
-	sub	%rdx, %rax
-
-	FUNC_EXIT()
+	mov	%rdx, %rcx
+	mul	%rdi
+	add	%rdx, %rsi
+	sbb	%rcx, %r8
+	shr	$62, %rsi
+	add	$1, %rsi
+	sal	$2, %r8
+	sub	%rsi, %r8
+	mov	%rdi, %rax
+	mul	%r8
+	add	%rdi, %rax		C xl += d
+	adc	%rdi, %rdx		C xh += d
+	mov	%r8, %rax
+	sub	%rdx, %rax		C return zh - xh
 	ret
 EPILOGUE()
+
+	RODATA
+	ALIGN(2)
+approx_tab:
+	.value	0xffc0,0xfec0,0xfdc0,0xfcc0,0xfbc0,0xfac0,0xfa00,0xf900
+	.value	0xf800,0xf700,0xf640,0xf540,0xf440,0xf380,0xf280,0xf180
+	.value	0xf0c0,0xefc0,0xef00,0xee00,0xed40,0xec40,0xeb80,0xeac0
+	.value	0xe9c0,0xe900,0xe840,0xe740,0xe680,0xe5c0,0xe500,0xe400
+	.value	0xe340,0xe280,0xe1c0,0xe100,0xe040,0xdf80,0xdec0,0xde00
+	.value	0xdd40,0xdc80,0xdbc0,0xdb00,0xda40,0xd980,0xd8c0,0xd800
+	.value	0xd740,0xd680,0xd600,0xd540,0xd480,0xd3c0,0xd340,0xd280
+	.value	0xd1c0,0xd140,0xd080,0xcfc0,0xcf40,0xce80,0xcdc0,0xcd40
+	.value	0xcc80,0xcc00,0xcb40,0xcac0,0xca00,0xc980,0xc8c0,0xc840
+	.value	0xc780,0xc700,0xc640,0xc5c0,0xc540,0xc480,0xc400,0xc380
+	.value	0xc2c0,0xc240,0xc1c0,0xc100,0xc080,0xc000,0xbf80,0xbec0
+	.value	0xbe40,0xbdc0,0xbd40,0xbc80,0xbc00,0xbb80,0xbb00,0xba80
+	.value	0xba00,0xb980,0xb900,0xb840,0xb7c0,0xb740,0xb6c0,0xb640
+	.value	0xb5c0,0xb540,0xb4c0,0xb440,0xb3c0,0xb340,0xb2c0,0xb240
+	.value	0xb1c0,0xb140,0xb0c0,0xb080,0xb000,0xaf80,0xaf00,0xae80
+	.value	0xae00,0xad80,0xad40,0xacc0,0xac40,0xabc0,0xab40,0xaac0
+	.value	0xaa80,0xaa00,0xa980,0xa900,0xa8c0,0xa840,0xa7c0,0xa740
+	.value	0xa700,0xa680,0xa600,0xa5c0,0xa540,0xa4c0,0xa480,0xa400
+	.value	0xa380,0xa340,0xa2c0,0xa240,0xa200,0xa180,0xa140,0xa0c0
+	.value	0xa080,0xa000,0x9f80,0x9f40,0x9ec0,0x9e80,0x9e00,0x9dc0
+	.value	0x9d40,0x9d00,0x9c80,0x9c40,0x9bc0,0x9b80,0x9b00,0x9ac0
+	.value	0x9a40,0x9a00,0x9980,0x9940,0x98c0,0x9880,0x9840,0x97c0
+	.value	0x9780,0x9700,0x96c0,0x9680,0x9600,0x95c0,0x9580,0x9500
+	.value	0x94c0,0x9440,0x9400,0x93c0,0x9340,0x9300,0x92c0,0x9240
+	.value	0x9200,0x91c0,0x9180,0x9100,0x90c0,0x9080,0x9000,0x8fc0
+	.value	0x8f80,0x8f40,0x8ec0,0x8e80,0x8e40,0x8e00,0x8d80,0x8d40
+	.value	0x8d00,0x8cc0,0x8c80,0x8c00,0x8bc0,0x8b80,0x8b40,0x8b00
+	.value	0x8a80,0x8a40,0x8a00,0x89c0,0x8980,0x8940,0x88c0,0x8880
+	.value	0x8840,0x8800,0x87c0,0x8780,0x8740,0x8700,0x8680,0x8640
+	.value	0x8600,0x85c0,0x8580,0x8540,0x8500,0x84c0,0x8480,0x8440
+	.value	0x8400,0x8380,0x8340,0x8300,0x82c0,0x8280,0x8240,0x8200
+	.value	0x81c0,0x8180,0x8140,0x8100,0x80c0,0x8080,0x8040,0x8000
 ASM_END()
diff --git a/gmp/mpn/x86_64/invert_limb_table.asm b/gmp/mpn/x86_64/invert_limb_table.asm
deleted file mode 100644
index 739d59e46c..0000000000
--- a/gmp/mpn/x86_64/invert_limb_table.asm
+++ /dev/null
@@ -1,50 +0,0 @@
-dnl  Table used for mpn_invert_limb
-
-dnl  Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
-
-dnl  Copyright 2004, 2007-2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-PROTECT(`mpn_invert_limb_table')
-
-ASM_START()
-C Table entry X contains floor (0x7fd00 / (0x100 + X))
-
-	RODATA
-	ALIGN(2)
-	GLOBL mpn_invert_limb_table
-mpn_invert_limb_table:
-forloop(i,256,512-1,dnl
-`	.value	eval(0x7fd00/i)
-')dnl
-ASM_END()
diff --git a/gmp/mpn/x86_64/k10/gcd_1.asm b/gmp/mpn/x86_64/k10/gcd_1.asm
deleted file mode 100644
index 3d8e5c7ab1..0000000000
--- a/gmp/mpn/x86_64/k10/gcd_1.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  AMD64 mpn_gcd_1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_gcd_1)
-include_mpn(`x86_64/core2/gcd_1.asm')
diff --git a/gmp/mpn/x86_64/k10/gmp-mparam.h b/gmp/mpn/x86_64/k10/gmp-mparam.h
deleted file mode 100644
index 5881306a40..0000000000
--- a/gmp/mpn/x86_64/k10/gmp-mparam.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/* AMD K10 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2012, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-#if 0
-#undef mpn_sublsh_n
-#define mpn_sublsh_n(rp,up,vp,n,c)					\
-  (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c))	\
-   : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
-#endif
-
-/* 3200 MHz K10 Thuban */
-/* FFT tuning limit = 100000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.2 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        17
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        28
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      7
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           15
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               242
-#define MUL_TOOM6H_THRESHOLD               369
-#define MUL_TOOM8H_THRESHOLD               478
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     154
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     145
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     163
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     142
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 34
-#define SQR_TOOM3_THRESHOLD                114
-#define SQR_TOOM4_THRESHOLD                390
-#define SQR_TOOM6_THRESHOLD                446
-#define SQR_TOOM8_THRESHOLD                547
-
-#define MULMID_TOOM42_THRESHOLD             36
-
-#define MULMOD_BNM1_THRESHOLD               17
-#define SQRMOD_BNM1_THRESHOLD               17
-
-#define MUL_FFT_MODF_THRESHOLD             570  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    570, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     29, 7}, {     15, 6}, {     31, 7}, {     29, 8}, \
-    {     15, 7}, {     32, 8}, {     17, 7}, {     35, 8}, \
-    {     19, 7}, {     39, 8}, {     21, 7}, {     43, 8}, \
-    {     23, 7}, {     47, 8}, {     25, 7}, {     51, 8}, \
-    {     29, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
-    {     55,10}, {     15, 9}, {     31, 8}, {     63, 9}, \
-    {     43,10}, {     23, 9}, {     55,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     87,11}, \
-    {     47,10}, {    111,12}, {     31,11}, {     63,10}, \
-    {    135,11}, {     79,10}, {    167, 8}, {    671,11}, \
-    {    111,12}, {     63,11}, {    159,12}, {     95,11}, \
-    {    207,10}, {    415,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    271,12}, {    159,11}, \
-    {    319,10}, {    639,11}, {    335,10}, {    671,11}, \
-    {    367,12}, {    191,11}, {    415,12}, {    223,13}, \
-    {    127,12}, {    255,11}, {    543,12}, {    287,11}, \
-    {    575,10}, {   1151,11}, {    607,10}, {   1215,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,12}, {    415,11}, \
-    {    831,12}, {    447,14}, {    127,13}, {    255,12}, \
-    {    543,11}, {   1087,12}, {    607,13}, {    319,12}, \
-    {    671,11}, {   1343,12}, {    735,13}, {    383,12}, \
-    {    799,11}, {   1599,12}, {    831,13}, {    447,12}, \
-    {    959,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
-    {   1407,14}, {    383,13}, {    767,12}, {   1599,13}, \
-    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
-    {    959,15}, {    255,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
-    {    767,13}, {   1727,14}, {    895,13}, {   1855,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2431,14}, {   1279,13}, {   2559,14}, {   1407,15}, \
-    {    767,14}, {   1535,13}, {   3071,14}, {   1791,16}, \
-    {    511,15}, {   1023,14}, {   2431,15}, {   1279,14}, \
-    {   2815,15}, {   1535,14}, {   3199,15}, {   1791,14}, \
-    {   3583,16}, {   1023,15}, {   2047,14}, {   4223,15}, \
-    {   2303,14}, {   4863,15}, {   2559,14}, {   5247,15}, \
-    {   2815,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 169
-#define MUL_FFT_THRESHOLD                 7808
-
-#define SQR_FFT_MODF_THRESHOLD             448  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    448, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     29, 7}, {     15, 6}, \
-    {     31, 7}, {     29, 8}, {     15, 7}, {     32, 8}, \
-    {     17, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
-    {     21, 7}, {     43, 8}, {     25, 7}, {     51, 8}, \
-    {     29, 9}, {     15, 8}, {     35, 9}, {     19, 8}, \
-    {     43, 9}, {     23, 8}, {     51, 9}, {     27, 8}, \
-    {     55,10}, {     15, 9}, {     31, 8}, {     65, 9}, \
-    {     43,10}, {     23, 9}, {     55,11}, {     15,10}, \
-    {     31, 9}, {     67,10}, {     39, 9}, {     83,10}, \
-    {     47, 9}, {     95,10}, {     55,11}, {     31,10}, \
-    {     79,11}, {     47,10}, {    103,12}, {     31,11}, \
-    {     63,10}, {    135,11}, {     79,10}, {    159,11}, \
-    {     95,10}, {    191,11}, {    111,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143, 9}, {    575,10}, \
-    {    303, 9}, {    607,12}, {     95,11}, {    191, 9}, \
-    {    767,10}, {    399,11}, {    207,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    543, 9}, {   1087,10}, \
-    {    575,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    335,10}, {    671,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    415,10}, {    831,12}, \
-    {    223,11}, {    447,13}, {    127,12}, {    255,11}, \
-    {    543,10}, {   1087,12}, {    287,11}, {    607,12}, \
-    {    319,11}, {    671,12}, {    351,11}, {    703,13}, \
-    {    191,12}, {    383,11}, {    767,10}, {   1535,12}, \
-    {    415,11}, {    863,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,12}, {    575,11}, {   1151,12}, {    607,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,12}, {    735,13}, \
-    {    383,12}, {    799,11}, {   1599,12}, {    863,13}, \
-    {    447,12}, {    927,14}, {    255,13}, {    511,12}, \
-    {   1087,13}, {    575,12}, {   1215,13}, {    639,12}, \
-    {   1343,13}, {    703,12}, {   1407,14}, {    383,13}, \
-    {    767,12}, {   1535,13}, {    831,12}, {   1727,13}, \
-    {    895,12}, {   1791,13}, {    959,15}, {    255,14}, \
-    {    511,13}, {   1087,12}, {   2175,13}, {   1215,14}, \
-    {    639,13}, {   1471,14}, {    767,13}, {   1663,14}, \
-    {    895,13}, {   1791,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2303,14}, {   1407,15}, \
-    {    767,14}, {   1791,16}, {    511,15}, {   1023,14}, \
-    {   2303,15}, {   1279,14}, {   2687,15}, {   1535,14}, \
-    {   3199,15}, {   1791,16}, {   1023,15}, {   2047,14}, \
-    {   4223,15}, {   2303,14}, {   4863,15}, {   2559,14}, \
-    {   5247,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 185
-#define SQR_FFT_THRESHOLD                 5568
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  61
-#define MULLO_MUL_N_THRESHOLD            15604
-
-#define DC_DIV_QR_THRESHOLD                 56
-#define DC_DIVAPPR_Q_THRESHOLD             218
-#define DC_BDIV_QR_THRESHOLD                52
-#define DC_BDIV_Q_THRESHOLD                 42
-
-#define INV_MULMOD_BNM1_THRESHOLD           62
-#define INV_NEWTON_THRESHOLD               226
-#define INV_APPR_THRESHOLD                 220
-
-#define BINV_NEWTON_THRESHOLD              327
-#define REDC_1_TO_REDC_2_THRESHOLD          51
-#define REDC_2_TO_REDC_N_THRESHOLD          66
-
-#define MU_DIV_QR_THRESHOLD               1752
-#define MU_DIVAPPR_Q_THRESHOLD            1718
-#define MUPI_DIV_QR_THRESHOLD              102
-#define MU_BDIV_QR_THRESHOLD              1528
-#define MU_BDIV_Q_THRESHOLD               1718
-
-#define POWM_SEC_TABLE  1,22,110,624,1985
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     147
-#define HGCD_APPR_THRESHOLD                181
-#define HGCD_REDUCE_THRESHOLD             3524
-#define GCD_DC_THRESHOLD                   622
-#define GCDEXT_DC_THRESHOLD                487
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                14
-#define GET_STR_PRECOMPUTE_THRESHOLD        29
-#define SET_STR_DC_THRESHOLD               268
-#define SET_STR_PRECOMPUTE_THRESHOLD      1718
-
-#define FAC_DSC_THRESHOLD                 1075
-#define FAC_ODD_THRESHOLD                   23
diff --git a/gmp/mpn/x86_64/k10/hamdist.asm b/gmp/mpn/x86_64/k10/hamdist.asm
deleted file mode 100644
index 44b67b5e4e..0000000000
--- a/gmp/mpn/x86_64/k10/hamdist.asm
+++ /dev/null
@@ -1,103 +0,0 @@
-dnl  AMD64 mpn_hamdist -- hamming distance.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C AMD K8,K9		 n/a
-C AMD K10		 2
-C Intel P4		 n/a
-C Intel core2		 n/a
-C Intel corei		 2.05
-C Intel atom		 n/a
-C VIA nano		 n/a
-
-C This is very straightforward 2-way unrolled code.
-
-C TODO
-C  * Write something less basic.  It should not be hard to reach 1.5 c/l with
-C    4-way unrolling.
-
-define(`ap',		`%rdi')
-define(`bp',		`%rsi')
-define(`n',		`%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_hamdist)
-	FUNC_ENTRY(3)
-	mov	(ap), %r8
-	xor	(bp), %r8
-
-	lea	(ap,n,8), ap			C point at A operand end
-	lea	(bp,n,8), bp			C point at B operand end
-	neg	n
-
-	bt	$0, R32(n)
-	jnc	L(2)
-
-L(1):	.byte	0xf3,0x49,0x0f,0xb8,0xc0	C popcnt %r8, %rax
-	xor	R32(%r10), R32(%r10)
-	add	$1, n
-	js	L(top)
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(2):	mov	8(ap,n,8), %r9
-	.byte	0xf3,0x49,0x0f,0xb8,0xc0	C popcnt %r8, %rax
-	xor	8(bp,n,8), %r9
-	.byte	0xf3,0x4d,0x0f,0xb8,0xd1	C popcnt %r9, %r10
-	add	$2, n
-	js	L(top)
-	lea	(%r10, %rax), %rax
-	FUNC_EXIT()
-	ret
-
-	ALIGN(16)
-L(top):	mov	(ap,n,8), %r8
-	lea	(%r10, %rax), %rax
-	mov	8(ap,n,8), %r9
-	xor	(bp,n,8), %r8
-	xor	8(bp,n,8), %r9
-	.byte	0xf3,0x49,0x0f,0xb8,0xc8	C popcnt %r8, %rcx
-	lea	(%rcx, %rax), %rax
-	.byte	0xf3,0x4d,0x0f,0xb8,0xd1	C popcnt %r9, %r10
-	add	$2, n
-	js	L(top)
-
-	lea	(%r10, %rax), %rax
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k10/lshift.asm b/gmp/mpn/x86_64/k10/lshift.asm
deleted file mode 100644
index a1cbc31f61..0000000000
--- a/gmp/mpn/x86_64/k10/lshift.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_lshift optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_lshift)
-include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
diff --git a/gmp/mpn/x86_64/k10/lshiftc.asm b/gmp/mpn/x86_64/k10/lshiftc.asm
deleted file mode 100644
index ac90edb76b..0000000000
--- a/gmp/mpn/x86_64/k10/lshiftc.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_lshiftc optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_lshiftc)
-include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
diff --git a/gmp/mpn/x86_64/k10/popcount.asm b/gmp/mpn/x86_64/k10/popcount.asm
deleted file mode 100644
index 3814aeabf4..0000000000
--- a/gmp/mpn/x86_64/k10/popcount.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-dnl  AMD64 mpn_popcount -- population count.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C		    cycles/limb
-C AMD K8,K9		 n/a
-C AMD K10		 1.125
-C Intel P4		 n/a
-C Intel core2		 n/a
-C Intel corei		 1.25
-C Intel atom		 n/a
-C VIA nano		 n/a
-
-C * The zero-offset of popcount is misassembled to the offset-less form, which
-C   is one byte shorter and therefore will mess up the switching code.
-C * The outdated gas used in FreeBSD and NetBSD cannot handle the POPCNT insn,
-C   which is the main reason for our usage of '.byte'.
-
-C TODO
-C  * Improve switching code, the current code sucks.
-
-define(`up',		`%rdi')
-define(`n',		`%rsi')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_popcount)
-	FUNC_ENTRY(2)
-
-ifelse(1,1,`
-	lea	(up,n,8), up
-
-C	mov	R32(n), R32(%rcx)
-C	neg	R32(%rcx)
-	imul	$-1, R32(n), R32(%rcx)
-	and	$8-1, R32(%rcx)
-
-	neg	n
-
-	mov	R32(%rcx), R32(%rax)
-	neg	%rax
-	lea	(up,%rax,8),up
-
-	xor	R32(%rax), R32(%rax)
-
-	lea	(%rcx,%rcx,4), %rcx
-
-	lea	L(top)(%rip), %rdx
-	lea	(%rdx,%rcx,2), %rdx
-	jmp	*%rdx
-',`
-	lea	(up,n,8), up
-
-	mov	R32(n), R32(%rcx)
-	neg	R32(%rcx)
-	and	$8-1, R32(%rcx)
-
-	neg	n
-
-	mov	R32(%rcx), R32(%rax)
-	shl	$3, R32(%rax)
-	sub	%rax, up
-
-	xor	R32(%rax), R32(%rax)
-
-C	add	R32(%rcx), R32(%rcx)	C 2x
-C	lea	(%rcx,%rcx,4), %rcx	C 10x
-	imul	$10, R32(%rcx)
-
-	lea	L(top)(%rip), %rdx
-	add	%rcx, %rdx
-	jmp	*%rdx
-')
-
-	ALIGN(32)
-L(top):
-C 0 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x00	C popcnt 0(up,n,8), %r8
-	add	%r8, %rax
-C 7 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x08	C popcnt 8(up,n,8), %r9
-	add	%r9, %rax
-C 6 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x10	C popcnt 16(up,n,8), %r8
-	add	%r8, %rax
-C 5 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x18	C popcnt 24(up,n,8), %r9
-	add	%r9, %rax
-C 4 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x20	C popcnt 32(up,n,8), %r8
-	add	%r8, %rax
-C 3 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x28	C popcnt 40(up,n,8), %r9
-	add	%r9, %rax
-C 2 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x30	C popcnt 48(up,n,8), %r8
-	add	%r8, %rax
-C 1 = n mod 8
-	.byte	0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x38	C popcnt 56(up,n,8), %r9
-	add	%r9, %rax
-
-	add	$8, n
-	js	L(top)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k10/rshift.asm b/gmp/mpn/x86_64/k10/rshift.asm
deleted file mode 100644
index 4c1c0d4cde..0000000000
--- a/gmp/mpn/x86_64/k10/rshift.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_rshift optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_rshift)
-include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
diff --git a/gmp/mpn/x86_64/k10/sec_tabselect.asm b/gmp/mpn/x86_64/k10/sec_tabselect.asm
deleted file mode 100644
index e4360341d9..0000000000
--- a/gmp/mpn/x86_64/k10/sec_tabselect.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_sec_tabselect.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sec_tabselect)
-include_mpn(`x86_64/fastsse/sec_tabselect.asm')
diff --git a/gmp/mpn/x86_64/k8/aorrlsh_n.asm b/gmp/mpn/x86_64/k8/aorrlsh_n.asm
deleted file mode 100644
index ff3a1842fd..0000000000
--- a/gmp/mpn/x86_64/k8/aorrlsh_n.asm
+++ /dev/null
@@ -1,217 +0,0 @@
-dnl  AMD64 mpn_addlsh_n and mpn_rsblsh_n.  R = V2^k +- U.
-
-dnl  Copyright 2006, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 2.87	< 3.85 for lshift + add_n
-C AMD K10	 2.75	< 3.85 for lshift + add_n
-C Intel P4	22	> 7.33 for lshift + add_n
-C Intel core2	 4.1	> 3.27 for lshift + add_n
-C Intel NHM	 4.4	> 3.75 for lshift + add_n
-C Intel SBR	 3.17	< 3.46 for lshift + add_n
-C Intel atom	 ?	? 8.75 for lshift + add_n
-C VIA nano	 4.7	< 6.25 for lshift + add_n
-
-C TODO
-C  * Can we propagate carry into rdx instead of using a special carry register?
-C    That could save enough insns to get to 10 cycles/iteration.
-
-define(`rp',       `%rdi')
-define(`up',       `%rsi')
-define(`vp_param', `%rdx')
-define(`n_param',  `%rcx')
-define(`cnt',      `%r8')
-
-define(`vp',    `%r12')
-define(`n',     `%rbp')
-
-ifdef(`OPERATION_addlsh_n',`
-  define(ADDSUB,       `add')
-  define(ADCSBB,       `adc')
-  define(func, mpn_addlsh_n)
-')
-ifdef(`OPERATION_rsblsh_n',`
-  define(ADDSUB,       `sub')
-  define(ADCSBB,       `sbb')
-  define(func, mpn_rsblsh_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-	push	%r12
-	push	%rbp
-	push	%rbx
-
-	mov	(vp_param), %rax	C load first V limb early
-
-	mov	$0, R32(n)
-	sub	n_param, n
-
-	lea	-16(up,n_param,8), up
-	lea	-16(rp,n_param,8), rp
-	lea	16(vp_param,n_param,8), vp
-
-	mov	n_param, %r9
-
-	mov	%r8, %rcx
-	mov	$1, R32(%r8)
-	shl	R8(%rcx), %r8
-
-	mul	%r8			C initial multiply
-
-	and	$3, R32(%r9)
-	jz	L(b0)
-	cmp	$2, R32(%r9)
-	jc	L(b1)
-	jz	L(b2)
-
-L(b3):	mov	%rax, %r11
-	ADDSUB	16(up,n,8), %r11
-	mov	-8(vp,n,8), %rax
-	sbb	R32(%rcx), R32(%rcx)
-	mov	%rdx, %rbx
-	mul	%r8
-	or	%rax, %rbx
-	mov	(vp,n,8), %rax
-	mov	%rdx, %r9
-	mul	%r8
-	or	%rax, %r9
-	add	$3, n
-	jnz	L(lo3)
-	jmp	L(cj3)
-
-L(b2):	mov	%rax, %rbx
-	mov	-8(vp,n,8), %rax
-	mov	%rdx, %r9
-	mul	%r8
-	or	%rax, %r9
-	add	$2, n
-	jz	L(cj2)
-	mov	%rdx, %r10
-	mov	-16(vp,n,8), %rax
-	mul	%r8
-	or	%rax, %r10
-	xor	R32(%rcx), R32(%rcx)	C clear carry register
-	jmp	L(lo2)
-
-L(b1):	mov	%rax, %r9
-	mov	%rdx, %r10
-	add	$1, n
-	jnz	L(gt1)
-	ADDSUB	8(up,n,8), %r9
-	jmp	L(cj1)
-L(gt1):	mov	-16(vp,n,8), %rax
-	mul	%r8
-	or	%rax, %r10
-	mov	%rdx, %r11
-	mov	-8(vp,n,8), %rax
-	mul	%r8
-	or	%rax, %r11
-	ADDSUB	8(up,n,8), %r9
-	ADCSBB	16(up,n,8), %r10
-	ADCSBB	24(up,n,8), %r11
-	mov	(vp,n,8), %rax
-	sbb	R32(%rcx), R32(%rcx)
-	jmp	L(lo1)
-
-L(b0):	mov	%rax, %r10
-	mov	%rdx, %r11
-	mov	-8(vp,n,8), %rax
-	mul	%r8
-	or	%rax, %r11
-	ADDSUB	16(up,n,8), %r10
-	ADCSBB	24(up,n,8), %r11
-	mov	(vp,n,8), %rax
-	sbb	R32(%rcx), R32(%rcx)
-	mov	%rdx, %rbx
-	mul	%r8
-	or	%rax, %rbx
-	mov	8(vp,n,8), %rax
-	add	$4, n
-	jz	L(end)
-
-	ALIGN(8)
-L(top):	mov	%rdx, %r9
-	mul	%r8
-	or	%rax, %r9
-	mov	%r10, -16(rp,n,8)
-L(lo3):	mov	%rdx, %r10
-	mov	-16(vp,n,8), %rax
-	mul	%r8
-	or	%rax, %r10
-	mov	%r11, -8(rp,n,8)
-L(lo2):	mov	%rdx, %r11
-	mov	-8(vp,n,8), %rax
-	mul	%r8
-	or	%rax, %r11
-	add	R32(%rcx), R32(%rcx)
-	ADCSBB	(up,n,8), %rbx
-	ADCSBB	8(up,n,8), %r9
-	ADCSBB	16(up,n,8), %r10
-	ADCSBB	24(up,n,8), %r11
-	mov	(vp,n,8), %rax
-	sbb	R32(%rcx), R32(%rcx)
-	mov	%rbx, (rp,n,8)
-L(lo1):	mov	%rdx, %rbx
-	mul	%r8
-	or	%rax, %rbx
-	mov	%r9, 8(rp,n,8)
-L(lo0):	mov	8(vp,n,8), %rax
-	add	$4, n
-	jnz	L(top)
-
-L(end):	mov	%rdx, %r9
-	mul	%r8
-	or	%rax, %r9
-	mov	%r10, -16(rp,n,8)
-L(cj3):	mov	%r11, -8(rp,n,8)
-L(cj2):	add	R32(%rcx), R32(%rcx)
-	ADCSBB	(up,n,8), %rbx
-	ADCSBB	8(up,n,8), %r9
-	mov	%rbx, (rp,n,8)
-L(cj1):	mov	%r9, 8(rp,n,8)
-	mov	%rdx, %rax
-	ADCSBB	$0, %rax
-	pop	%rbx
-	pop	%rbp
-	pop	%r12
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k8/div_qr_1n_pi1.asm b/gmp/mpn/x86_64/k8/div_qr_1n_pi1.asm
deleted file mode 100644
index 861402b222..0000000000
--- a/gmp/mpn/x86_64/k8/div_qr_1n_pi1.asm
+++ /dev/null
@@ -1,249 +0,0 @@
-dnl  x86-64 mpn_div_qr_1n_pi1
-dnl  -- Divide an mpn number by a normalized single-limb number,
-dnl     using a single-limb inverse.
-
-dnl  Contributed to the GNU project by Niels Möller
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C		c/l
-C AMD K8,K9	11
-C AMD K10	11
-C AMD bull	16
-C AMD pile	14.25
-C AMD steam	 ?
-C AMD bobcat	16
-C AMD jaguar	 ?
-C Intel P4	47.5	poor
-C Intel core	28.5	very poor
-C Intel NHM	29	very poor
-C Intel SBR	16	poor
-C Intel IBR	13.5
-C Intel HWL	12
-C Intel BWL	 ?
-C Intel atom	53	very poor
-C VIA nano	19
-
-
-C INPUT Parameters
-define(`QP', `%rdi')
-define(`UP', `%rsi')
-define(`UN_INPUT', `%rdx')
-define(`U1', `%rcx')	C Also in %rax
-define(`D', `%r8')
-define(`DINV', `%r9')
-
-C Invariants
-define(`B2', `%rbp')
-define(`B2md', `%rbx')
-
-C Variables
-define(`UN', `%r8')	C Overlaps D input
-define(`T', `%r10')
-define(`U0', `%r11')
-define(`U2', `%r12')
-define(`Q0', `%r13')
-define(`Q1', `%r14')
-define(`Q2', `%r15')
-
-ABI_SUPPORT(STD64)
-
-	ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_div_qr_1n_pi1)
-	FUNC_ENTRY(6)
-IFDOS(`	mov	56(%rsp), %r8	')
-IFDOS(`	mov	64(%rsp), %r9	')
-	dec	UN_INPUT
-	jnz	L(first)
-
-	C Just a single 2/1 division.
-	C T, U0 are allocated in scratch registers
-	lea	1(U1), T
-	mov	U1, %rax
-	mul	DINV
-	mov	(UP), U0
-	add	U0, %rax
-	adc	T, %rdx
-	mov	%rdx, T
-	imul	D, %rdx
-	sub	%rdx, U0
-	cmp	U0, %rax
-	lea	(U0, D), %rax
-	cmovnc	U0, %rax
-	sbb	$0, T
-	cmp	D, %rax
-	jc	L(single_div_done)
-	sub	D, %rax
-	add	$1, T
-L(single_div_done):
-	mov	T, (QP)
-	FUNC_EXIT
-	ret
-L(first):
-	C FIXME: Could delay some of these until we enter the loop.
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	push	%rbx
-	push	%rbp
-
-	mov	D, B2
-	imul	DINV, B2
-	neg	B2
-	mov	B2, B2md
-	sub	D, B2md
-
-	C D not needed until final reduction
-	push	D
-	mov	UN_INPUT, UN	C Clobbers D
-
-	mov	DINV, %rax
-	mul	U1
-	mov	%rax, Q0
-	add	U1, %rdx
-	mov	%rdx, T
-
-	mov	B2, %rax
-	mul	U1
-	mov	-8(UP, UN, 8), U0
-	mov	(UP, UN, 8), U1
-	mov	T, (QP, UN, 8)
-	add	%rax, U0
-	adc	%rdx, U1
-	sbb	U2, U2
-	dec	UN
-	mov	U1, %rax
-	jz	L(final)
-	mov	$0, R32(Q1)
-
-	ALIGN(16)
-
-	C Loop is 28 instructions, 30 K8/K10 decoder slots, should run
-	C in 10 cycles. At entry, %rax holds an extra copy of U1, Q1
-	C is zero, and carry holds an extra copy of U2.
-L(loop):
-	C {Q2, Q1, Q0} <-- DINV * U1 + B (Q0 + U2 DINV) + B^2 U2
-	C Remains to add in B (U1 + c)
-	cmovc	DINV, Q1
-	mov	U2, Q2
-	neg	Q2
-	mul	DINV
-	add	%rdx, Q1
-	adc	$0, Q2
-	add	Q0, Q1
-	mov	%rax, Q0
-	mov	B2, %rax
-	lea	(B2md, U0), T
-	adc	$0, Q2
-
-	C {U2, U1, U0} <-- (U0 + U2 B2 -c U) B + U1 B2 + u
-	mul	U1
-	and	B2, U2
-	add	U2, U0
-	cmovnc	U0, T
-
-	C {QP+UN, ...} <-- {QP+UN, ...} + {Q2, Q1} + U1 + c
-	adc	U1, Q1
-	mov	-8(UP, UN, 8), U0
-	adc	Q2, 8(QP, UN, 8)
-	jc	L(q_incr)
-L(q_incr_done):
-	add	%rax, U0
-	mov	T, %rax
-	adc	%rdx, %rax
-	mov	Q1, (QP, UN, 8)
-	mov	$0, R32(Q1)
-	sbb	U2, U2
-	dec	UN
-	mov	%rax, U1
-	jnz	L(loop)
-
-L(final):
-	pop	D
-
-	mov	U2, Q1
-	and	D, U2
-	sub	U2, %rax
-	neg	Q1
-
-	mov	%rax, U1
-	sub	D, %rax
-	cmovc	U1, %rax
-	sbb	$-1, Q1
-
-	lea	1(%rax), T
-	mul	DINV
-	add	U0, %rax
-	adc	T, %rdx
-	mov	%rdx, T
-	imul	D, %rdx
-	sub	%rdx, U0
-	cmp	U0, %rax
-	lea	(U0, D), %rax
-	cmovnc	U0, %rax
-	sbb	$0, T
-	cmp	D, %rax
-	jc	L(div_done)
-	sub	D, %rax
-	add	$1, T
-L(div_done):
-	add	T, Q0
-	mov	Q0, (QP)
-	adc	Q1, 8(QP)
-	jnc	L(done)
-L(final_q_incr):
-	addq	$1, 16(QP)
-	lea	8(QP), QP
-	jc	L(final_q_incr)
-
-L(done):
-	pop	%rbp
-	pop	%rbx
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	FUNC_EXIT
-	ret
-
-L(q_incr):
-	C U1 is not live, so use it for indexing
-	lea	16(QP, UN, 8), U1
-L(q_incr_loop):
-	addq	$1, (U1)
-	jnc	L(q_incr_done)
-	lea	8(U1), U1
-	jmp	L(q_incr_loop)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k8/gmp-mparam.h b/gmp/mpn/x86_64/k8/gmp-mparam.h
deleted file mode 100644
index df78c38923..0000000000
--- a/gmp/mpn/x86_64/k8/gmp-mparam.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2012, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-// #undef mpn_sublsh_n
-// #define mpn_sublsh_n(rp,up,vp,n,c)					\
-//    (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c))	\
-//      : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
-
-/* 2500 MHz K8 Brisbane */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        13
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        35
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      9
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           16
-
-#define MUL_TOOM22_THRESHOLD                28
-#define MUL_TOOM33_THRESHOLD                81
-#define MUL_TOOM44_THRESHOLD               242
-#define MUL_TOOM6H_THRESHOLD               345
-#define MUL_TOOM8H_THRESHOLD               482
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     153
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     161
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     175
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     166
-
-#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
-#define SQR_TOOM2_THRESHOLD                 34
-#define SQR_TOOM3_THRESHOLD                129
-#define SQR_TOOM4_THRESHOLD                527
-#define SQR_TOOM6_THRESHOLD                562
-#define SQR_TOOM8_THRESHOLD                  0  /* always */
-
-#define MULMID_TOOM42_THRESHOLD             36
-
-#define MULMOD_BNM1_THRESHOLD               18
-#define SQRMOD_BNM1_THRESHOLD               22
-
-#define MUL_FFT_MODF_THRESHOLD             654  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    654, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     15, 5}, {     31, 6}, {     27, 7}, {     15, 6}, \
-    {     31, 7}, {     19, 6}, {     39, 7}, {     29, 8}, \
-    {     15, 7}, {     33, 8}, {     17, 7}, {     37, 8}, \
-    {     19, 7}, {     39, 8}, {     23, 7}, {     47, 8}, \
-    {     25, 7}, {     51, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     27, 8}, {     57, 9}, {     31, 8}, \
-    {     65, 9}, {     35, 8}, {     71, 9}, {     39, 8}, \
-    {     79, 9}, {     43,10}, {     23, 9}, {     59, 8}, \
-    {    119,10}, {     31, 8}, {    125, 9}, {     71,10}, \
-    {     39, 9}, {     87,10}, {     47, 9}, {     99,10}, \
-    {     55, 9}, {    123,11}, {     31,10}, {     63, 9}, \
-    {    131,10}, {     71, 9}, {    143,10}, {     79, 9}, \
-    {    159,10}, {     87,11}, {     47,10}, {    119,11}, \
-    {     63,10}, {    143,11}, {     79,10}, {    175,11}, \
-    {     95,10}, {    199,11}, {    111,10}, {    223,12}, \
-    {     63,11}, {    143,10}, {    287, 9}, {    575,10}, \
-    {    295,11}, {    159,10}, {    319,11}, {    175,12}, \
-    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
-    {    415, 9}, {    831,11}, {    223,10}, {    447,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    271,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    575, 9}, {   1151,12}, {    159,11}, {    319,10}, \
-    {    639,11}, {    335,10}, {    671,11}, {    351,10}, \
-    {    703,11}, {    367,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    399,10}, {    799,11}, {    415,10}, \
-    {    831,12}, {    223,11}, {    447,10}, {    895,11}, \
-    {    479,13}, {    127,12}, {    255,11}, {    511,10}, \
-    {   1023,11}, {    543,10}, {   1087,12}, {    287,11}, \
-    {    575,10}, {   1151,11}, {    607,12}, {    319,11}, \
-    {    639,10}, {   1279,11}, {    671,12}, {    351,11}, \
-    {    703,10}, {   1407,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,11}, {    959,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,12}, {    575,11}, {   1151,12}, {    607,11}, \
-    {   1215,13}, {    319,12}, {    671,11}, {   1343,12}, \
-    {    735,13}, {    383,12}, {    799,11}, {   1599,12}, \
-    {    863,13}, {    447,12}, {    895,11}, {   1791,12}, \
-    {    991,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
-    {   1471,14}, {    383,13}, {    767,12}, {   1599,13}, \
-    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
-    {    959,12}, {   1919,14}, {    511,13}, {   1087,12}, \
-    {   2175,13}, {   1215,14}, {    639,13}, {   1471,14}, \
-    {    767,13}, {   1663,14}, {    895,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2175,14}, {   1151,13}, \
-    {   2367,14}, {   1279,13}, {   2559,14}, {   1407,15}, \
-    {  32768,16}, {  65536,17}, { 131072,18}, { 262144,19}, \
-    { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
-    {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 205
-#define MUL_FFT_THRESHOLD                11520
-
-#define SQR_FFT_MODF_THRESHOLD             570  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    570, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
-    {     12, 5}, {     25, 6}, {     13, 5}, {     27, 6}, \
-    {     14, 5}, {     29, 6}, {     16, 5}, {     33, 6}, \
-    {     29, 7}, {     15, 6}, {     31, 7}, {     16, 6}, \
-    {     33, 7}, {     17, 6}, {     35, 7}, {     33, 8}, \
-    {     17, 7}, {     37, 8}, {     19, 7}, {     40, 8}, \
-    {     21, 7}, {     43, 8}, {     23, 7}, {     47, 8}, \
-    {     25, 7}, {     51, 8}, {     29, 9}, {     15, 8}, \
-    {     37, 9}, {     19, 8}, {     43, 9}, {     23, 8}, \
-    {     51, 9}, {     27, 8}, {     55, 9}, {     31, 8}, \
-    {     63, 9}, {     35, 8}, {     71, 9}, {     39, 8}, \
-    {     79, 9}, {     43,10}, {     23, 9}, {     55,10}, \
-    {     31, 9}, {     71,10}, {     39, 9}, {     83,10}, \
-    {     47, 9}, {     99,10}, {     55, 9}, {    123,11}, \
-    {     31,10}, {     63, 9}, {    127,10}, {     71, 9}, \
-    {    143,10}, {     87,11}, {     47,10}, {    111,12}, \
-    {     31,11}, {     63,10}, {    143,11}, {     79,10}, \
-    {    167,11}, {     95,10}, {    199,11}, {    111,12}, \
-    {     63,11}, {    127, 9}, {    511,11}, {    143,10}, \
-    {    287, 9}, {    575, 8}, {   1151,11}, {    159,10}, \
-    {    319, 9}, {    639,11}, {    175,12}, {     95,11}, \
-    {    191,10}, {    383, 9}, {    767,11}, {    207,10}, \
-    {    415, 9}, {    831,10}, {    431,11}, {    223,10}, \
-    {    447,13}, {     63,12}, {    127,10}, {    511, 9}, \
-    {   1023,10}, {    543, 9}, {   1087,11}, {    287,10}, \
-    {    575, 9}, {   1151,12}, {    159,11}, {    319,10}, \
-    {    639, 9}, {   1279,11}, {    335,10}, {    671,11}, \
-    {    351,10}, {    703,11}, {    367,12}, {    191,11}, \
-    {    383,10}, {    767,11}, {    399,10}, {    799,11}, \
-    {    415,10}, {    831,11}, {    431,12}, {    223,11}, \
-    {    447,10}, {    895,11}, {    463,13}, {    127,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    639,10}, {   1279,11}, \
-    {    671,12}, {    351,11}, {    703,10}, {   1407,13}, \
-    {    191,12}, {    383,11}, {    767,10}, {   1535,11}, \
-    {    799,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,12}, {    479,14}, {    127,12}, {    511,11}, \
-    {   1023,12}, {    543,11}, {   1087,12}, {    575,11}, \
-    {   1151,12}, {    607,11}, {   1215,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    671,11}, {   1343,12}, \
-    {    703,11}, {   1407,12}, {    735,13}, {    383,12}, \
-    {    799,11}, {   1599,12}, {    863,13}, {    447,12}, \
-    {    959,13}, {    511,12}, {   1087,13}, {    575,12}, \
-    {   1215,13}, {    639,12}, {   1343,13}, {    703,12}, \
-    {   1471,14}, {    383,13}, {    767,12}, {   1599,13}, \
-    {    831,12}, {   1663,13}, {    895,12}, {   1791,13}, \
-    {    959,14}, {    511,13}, {   1023,12}, {   2047,13}, \
-    {   1215,14}, {    639,13}, {   1471,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1855,15}, {    511,14}, \
-    {   1023,13}, {   2111,14}, {   1151,13}, {   2303,14}, \
-    {   1407,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 214
-#define SQR_FFT_THRESHOLD                 5760
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  61
-#define MULLO_MUL_N_THRESHOLD            22906
-
-#define DC_DIV_QR_THRESHOLD                 51
-#define DC_DIVAPPR_Q_THRESHOLD             264
-#define DC_BDIV_QR_THRESHOLD                38
-#define DC_BDIV_Q_THRESHOLD                170
-
-#define INV_MULMOD_BNM1_THRESHOLD           67
-#define INV_NEWTON_THRESHOLD               246
-#define INV_APPR_THRESHOLD                 244
-
-#define BINV_NEWTON_THRESHOLD              252
-#define REDC_1_TO_REDC_2_THRESHOLD          35
-#define REDC_2_TO_REDC_N_THRESHOLD          84
-
-#define MU_DIV_QR_THRESHOLD               2089
-#define MU_DIVAPPR_Q_THRESHOLD            1752
-#define MUPI_DIV_QR_THRESHOLD               93
-#define MU_BDIV_QR_THRESHOLD              1718
-#define MU_BDIV_Q_THRESHOLD               1895
-
-#define POWM_SEC_TABLE  2,16,194,904,2177
-
-#define MATRIX22_STRASSEN_THRESHOLD         21
-#define HGCD_THRESHOLD                     148
-#define HGCD_APPR_THRESHOLD                185
-#define HGCD_REDUCE_THRESHOLD             4120
-#define GCD_DC_THRESHOLD                   562
-#define GCDEXT_DC_THRESHOLD                501
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                17
-#define GET_STR_PRECOMPUTE_THRESHOLD        29
-#define SET_STR_DC_THRESHOLD               268
-#define SET_STR_PRECOMPUTE_THRESHOLD      1787
-
-#define FAC_DSC_THRESHOLD                 1240
-#define FAC_ODD_THRESHOLD                    0  /* always */
diff --git a/gmp/mpn/x86_64/k8/mullo_basecase.asm b/gmp/mpn/x86_64/k8/mullo_basecase.asm
deleted file mode 100644
index fa00f4234a..0000000000
--- a/gmp/mpn/x86_64/k8/mullo_basecase.asm
+++ /dev/null
@@ -1,436 +0,0 @@
-dnl  AMD64 mpn_mullo_basecase.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjorn Granlund.
-
-C NOTES
-C   * There is a major stupidity in that we call mpn_mul_1 initially, for a
-C     large trip count.  Instead, we should start with mul_2 for any operand
-C     size congruence class.
-C   * Stop iterating addmul_2 earlier, falling into straight-line triangle code
-C     for the last 2-3 iterations.
-C   * Perhaps implement n=4 special code.
-C   * The reload of the outer loop jump address hurts branch prediction.
-C   * The addmul_2 loop ends with an MUL whose high part is not used upon loop
-C     exit.
-
-C INPUT PARAMETERS
-define(`rp',	   `%rdi')
-define(`up',	   `%rsi')
-define(`vp_param', `%rdx')
-define(`n',	   `%rcx')
-
-define(`vp',	`%r11')
-define(`outer_addr', `%r8')
-define(`j',	`%r9')
-define(`v0',	`%r13')
-define(`v1',	`%r14')
-define(`w0',	`%rbx')
-define(`w1',	`%r15')
-define(`w2',	`%rbp')
-define(`w3',	`%r10')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mullo_basecase)
-	FUNC_ENTRY(4)
-	cmp	$4, n
-	jge	L(gen)
-	mov	(up), %rax		C u0
-	mov	(vp_param), %r8		C v0
-
-	lea	L(tab)(%rip), %r9
-ifdef(`PIC',
-`	movslq	(%r9,%rcx,4), %r10
-	add	%r10, %r9
-	jmp	*%r9
-',`
-	jmp	*(%r9,n,8)
-')
-	JUMPTABSECT
-	ALIGN(8)
-L(tab):	JMPENT(	L(tab), L(tab))			C not allowed
-	JMPENT(	L(1), L(tab))			C 1
-	JMPENT(	L(2), L(tab))			C 2
-	JMPENT(	L(3), L(tab))			C 3
-dnl	JMPENT(	L(0m4), L(tab))			C 4
-dnl	JMPENT(	L(1m4), L(tab))			C 5
-dnl	JMPENT(	L(2m4), L(tab))			C 6
-dnl	JMPENT(	L(3m4), L(tab))			C 7
-dnl	JMPENT(	L(0m4), L(tab))			C 8
-dnl	JMPENT(	L(1m4), L(tab))			C 9
-dnl	JMPENT(	L(2m4), L(tab))			C 10
-dnl	JMPENT(	L(3m4), L(tab))			C 11
-	TEXT
-
-L(1):	imul	%r8, %rax
-	mov	%rax, (rp)
-	FUNC_EXIT()
-	ret
-
-L(2):	mov	8(vp_param), %r11
-	imul	%rax, %r11		C u0 x v1
-	mul	%r8			C u0 x v0
-	mov	%rax, (rp)
-	imul	8(up), %r8		C u1 x v0
-	lea	(%r11, %rdx), %rax
-	add	%r8, %rax
-	mov	%rax, 8(rp)
-	FUNC_EXIT()
-	ret
-
-L(3):	mov	8(vp_param), %r9	C v1
-	mov	16(vp_param), %r11
-	mul	%r8			C u0 x v0 -> <r1,r0>
-	mov	%rax, (rp)		C r0
-	mov	(up), %rax		C u0
-	mov	%rdx, %rcx		C r1
-	mul	%r9			C u0 x v1 -> <r2,r1>
-	imul	8(up), %r9		C u1 x v1 -> r2
-	mov	16(up), %r10
-	imul	%r8, %r10		C u2 x v0 -> r2
-	add	%rax, %rcx
-	adc	%rdx, %r9
-	add	%r10, %r9
-	mov	8(up), %rax		C u1
-	mul	%r8			C u1 x v0 -> <r2,r1>
-	add	%rax, %rcx
-	adc	%rdx, %r9
-	mov	%r11, %rax
-	imul	(up), %rax		C u0 x v2 -> r2
-	add	%rax, %r9
-	mov	%rcx, 8(rp)
-	mov	%r9, 16(rp)
-	FUNC_EXIT()
-	ret
-
-L(0m4):
-L(1m4):
-L(2m4):
-L(3m4):
-L(gen):	push	%rbx
-	push	%rbp
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(up), %rax
-	mov	(vp_param), v0
-	mov	vp_param, vp
-
-	lea	(rp,n,8), rp
-	lea	(up,n,8), up
-	neg	n
-
-	mul	v0
-
-	test	$1, R8(n)
-	jz	L(mul_2)
-
-L(mul_1):
-	lea	-8(rp), rp
-	lea	-8(up), up
-	test	$2, R8(n)
-	jnz	L(mul_1_prologue_3)
-
-L(mul_1_prologue_2):		C n = 7, 11, 15, ...
-	lea	-1(n), j
-	lea	L(addmul_outer_1)(%rip), outer_addr
-	mov	%rax, w0
-	mov	%rdx, w1
-	xor	R32(w2), R32(w2)
-	xor	R32(w3), R32(w3)
-	mov	16(up,n,8), %rax
-	jmp	L(mul_1_entry_2)
-
-L(mul_1_prologue_3):		C n = 5, 9, 13, ...
-	lea	1(n), j
-	lea	L(addmul_outer_3)(%rip), outer_addr
-	mov	%rax, w2
-	mov	%rdx, w3
-	xor	R32(w0), R32(w0)
-	jmp	L(mul_1_entry_0)
-
-	ALIGN(16)
-L(mul_1_top):
-	mov	w0, -16(rp,j,8)
-	add	%rax, w1
-	mov	(up,j,8), %rax
-	adc	%rdx, w2
-	xor	R32(w0), R32(w0)
-	mul	v0
-	mov	w1, -8(rp,j,8)
-	add	%rax, w2
-	adc	%rdx, w3
-L(mul_1_entry_0):
-	mov	8(up,j,8), %rax
-	mul	v0
-	mov	w2, (rp,j,8)
-	add	%rax, w3
-	adc	%rdx, w0
-	mov	16(up,j,8), %rax
-	mul	v0
-	mov	w3, 8(rp,j,8)
-	xor	R32(w2), R32(w2)	C zero
-	mov	w2, w3			C zero
-	add	%rax, w0
-	mov	24(up,j,8), %rax
-	mov	w2, w1			C zero
-	adc	%rdx, w1
-L(mul_1_entry_2):
-	mul	v0
-	add	$4, j
-	js	L(mul_1_top)
-
-	mov	w0, -16(rp)
-	add	%rax, w1
-	mov	w1, -8(rp)
-	adc	%rdx, w2
-
-	imul	(up), v0
-	add	v0, w2
-	mov	w2, (rp)
-
-	add	$1, n
-	jz	L(ret)
-
-	mov	8(vp), v0
-	mov	16(vp), v1
-
-	lea	16(up), up
-	lea	8(vp), vp
-	lea	24(rp), rp
-
-	jmp	*outer_addr
-
-
-L(mul_2):
-	mov	8(vp), v1
-	test	$2, R8(n)
-	jz	L(mul_2_prologue_3)
-
-	ALIGN(16)
-L(mul_2_prologue_1):
-	lea	0(n), j
-	mov	%rax, w3
-	mov	%rdx, w0
-	xor	R32(w1), R32(w1)
-	mov	(up,n,8), %rax
-	lea	L(addmul_outer_3)(%rip), outer_addr
-	jmp	L(mul_2_entry_1)
-
-	ALIGN(16)
-L(mul_2_prologue_3):
-	lea	2(n), j
-	mov	$0, R32(w3)
-	mov	%rax, w1
-	mov	(up,n,8), %rax
-	mov	%rdx, w2
-	lea	L(addmul_outer_1)(%rip), outer_addr
-	jmp	L(mul_2_entry_3)
-
-	ALIGN(16)
-L(mul_2_top):
-	mov	-32(up,j,8), %rax
-	mul	v1
-	add	%rax, w0
-	adc	%rdx, w1
-	mov	-24(up,j,8), %rax
-	xor	R32(w2), R32(w2)
-	mul	v0
-	add	%rax, w0
-	mov	-24(up,j,8), %rax
-	adc	%rdx, w1
-	adc	$0, R32(w2)
-	mul	v1
-	add	%rax, w1
-	mov	w0, -24(rp,j,8)
-	adc	%rdx, w2
-	mov	-16(up,j,8), %rax
-	mul	v0
-	mov	$0, R32(w3)
-	add	%rax, w1
-	adc	%rdx, w2
-	mov	-16(up,j,8), %rax
-	adc	$0, R32(w3)
-L(mul_2_entry_3):
-	mov	$0, R32(w0)
-	mov	w1, -16(rp,j,8)
-	mul	v1
-	add	%rax, w2
-	mov	-8(up,j,8), %rax
-	adc	%rdx, w3
-	mov	$0, R32(w1)
-	mul	v0
-	add	%rax, w2
-	mov	-8(up,j,8), %rax
-	adc	%rdx, w3
-	adc	R32(w1), R32(w0)
-	mul	v1
-	add	%rax, w3
-	mov	w2, -8(rp,j,8)
-	adc	%rdx, w0
-	mov	(up,j,8), %rax
-	mul	v0
-	add	%rax, w3
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-L(mul_2_entry_1):
-	add	$4, j
-	mov	w3, -32(rp,j,8)
-	js	L(mul_2_top)
-
-	imul	-16(up), v1
-	add	v1, w0
-	imul	-8(up), v0
-	add	v0, w0
-	mov	w0, -8(rp)
-
-	add	$2, n
-	jz	L(ret)
-
-	mov	16(vp), v0
-	mov	24(vp), v1
-
-	lea	16(vp), vp
-	lea	16(rp), rp
-
-	jmp	*outer_addr
-
-
-L(addmul_outer_1):
-	lea	-2(n), j
-	mov	-16(up,n,8), %rax
-	mul	v0
-	mov	%rax, w3
-	mov	-16(up,n,8), %rax
-	mov	%rdx, w0
-	xor	R32(w1), R32(w1)
-	lea	L(addmul_outer_3)(%rip), outer_addr
-	jmp	L(addmul_entry_1)
-
-L(addmul_outer_3):
-	lea	0(n), j
-	mov	-16(up,n,8), %rax
-	xor	R32(w3), R32(w3)
-	mul	v0
-	mov	%rax, w1
-	mov	-16(up,n,8), %rax
-	mov	%rdx, w2
-	lea	L(addmul_outer_1)(%rip), outer_addr
-	jmp	L(addmul_entry_3)
-
-	ALIGN(16)
-L(addmul_top):
-	add	w3, -32(rp,j,8)
-	adc	%rax, w0
-	mov	-24(up,j,8), %rax
-	adc	%rdx, w1
-	xor	R32(w2), R32(w2)
-	mul	v0
-	add	%rax, w0
-	mov	-24(up,j,8), %rax
-	adc	%rdx, w1
-	adc	R32(w2), R32(w2)
-	mul	v1
-	xor	R32(w3), R32(w3)
-	add	w0, -24(rp,j,8)
-	adc	%rax, w1
-	mov	-16(up,j,8), %rax
-	adc	%rdx, w2
-	mul	v0
-	add	%rax, w1
-	mov	-16(up,j,8), %rax
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-L(addmul_entry_3):
-	mul	v1
-	add	w1, -16(rp,j,8)
-	adc	%rax, w2
-	mov	-8(up,j,8), %rax
-	adc	%rdx, w3
-	mul	v0
-	xor	R32(w0), R32(w0)
-	add	%rax, w2
-	adc	%rdx, w3
-	mov	$0, R32(w1)
-	mov	-8(up,j,8), %rax
-	adc	R32(w1), R32(w0)
-	mul	v1
-	add	w2, -8(rp,j,8)
-	adc	%rax, w3
-	adc	%rdx, w0
-	mov	(up,j,8), %rax
-	mul	v0
-	add	%rax, w3
-	mov	(up,j,8), %rax
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-L(addmul_entry_1):
-	mul	v1
-	add	$4, j
-	js	L(addmul_top)
-
-	add	w3, -32(rp)
-	adc	%rax, w0
-
-	imul	-24(up), v0
-	add	v0, w0
-	add	w0, -24(rp)
-
-	add	$2, n
-	jns	L(ret)
-
-	lea	16(vp), vp
-
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	lea	-16(up), up
-
-	jmp	*outer_addr
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k8/mulmid_basecase.asm b/gmp/mpn/x86_64/k8/mulmid_basecase.asm
deleted file mode 100644
index 86f1414ed8..0000000000
--- a/gmp/mpn/x86_64/k8/mulmid_basecase.asm
+++ /dev/null
@@ -1,559 +0,0 @@
-dnl  AMD64 mpn_mulmid_basecase
-
-dnl  Contributed by David Harvey.
-
-dnl  Copyright 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C	     cycles/limb
-C K8,K9:	 2.375  (2.5 when un - vn is "small")
-C K10:		 ?
-C P4:		 ?
-C P6-15:	 ?
-
-C INPUT PARAMETERS
-define(`rp',      `%rdi')
-define(`up',      `%rsi')
-define(`un_param',`%rdx')
-define(`vp_param',`%rcx')
-define(`vn',      `%r8')
-
-define(`v0', `%r12')
-define(`v1', `%r9')
-
-define(`w0', `%rbx')
-define(`w1', `%rcx')
-define(`w2', `%rbp')
-define(`w3', `%r10')
-
-define(`n',  `%r11')
-define(`outer_addr', `%r14')
-define(`un',  `%r13')
-define(`vp',  `%r15')
-
-define(`vp_inner', `%r10')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mulmid_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	vp_param, vp
-
-	C use un for row length (= un_param - vn + 1)
-	lea	1(un_param), un
-	sub	vn, un
-
-	lea	(rp,un,8), rp
-
-	cmp	$4, un		C TODO: needs tuning
-	jc	L(diagonal)
-
-	lea	(up,un_param,8), up
-
-	test	$1, vn
-	jz	L(mul_2)
-
-C ===========================================================
-C     mul_1 for vp[0] if vn is odd
-
-L(mul_1):
-	mov	R32(un), R32(w0)
-
-	neg	un
-	mov	(up,un,8), %rax
-	mov	(vp), v0
-	mul	v0
-
-	and	$-4, un		C round down to multiple of 4
-	mov	un, n
-
-	and	$3, R32(w0)
-	jz	L(mul_1_prologue_0)
-	cmp	$2, R32(w0)
-	jc	L(mul_1_prologue_1)
-	jz	L(mul_1_prologue_2)
-
-L(mul_1_prologue_3):
-	mov	%rax, w3
-	mov	%rdx, w0
-	lea	L(addmul_prologue_3)(%rip), outer_addr
-	jmp	L(mul_1_entry_3)
-
-	ALIGN(16)
-L(mul_1_prologue_0):
-	mov	%rax, w2
-	mov	%rdx, w3		C note already w0 == 0
-	lea	L(addmul_prologue_0)(%rip), outer_addr
-	jmp	L(mul_1_entry_0)
-
-	ALIGN(16)
-L(mul_1_prologue_1):
-	add	$4, n
-	mov	%rax, w1
-	mov	%rdx, w2
-	mov	$0, R32(w3)
-	mov	(up,n,8), %rax
-	lea	L(addmul_prologue_1)(%rip), outer_addr
-	jmp	L(mul_1_entry_1)
-
-	ALIGN(16)
-L(mul_1_prologue_2):
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	24(up,n,8), %rax
-	mov	$0, R32(w2)
-	mov	$0, R32(w3)
-	lea	L(addmul_prologue_2)(%rip), outer_addr
-	jmp	L(mul_1_entry_2)
-
-
-	C this loop is 10 c/loop = 2.5 c/l on K8
-
-	ALIGN(16)
-L(mul_1_top):
-	mov	w0, -16(rp,n,8)
-	add	%rax, w1
-	mov	(up,n,8), %rax
-	adc	%rdx, w2
-L(mul_1_entry_1):
-	mov	$0, R32(w0)
-	mul	v0
-	mov	w1, -8(rp,n,8)
-	add	%rax, w2
-	adc	%rdx, w3
-L(mul_1_entry_0):
-	mov	8(up,n,8), %rax
-	mul	v0
-	mov	w2, (rp,n,8)
-	add	%rax, w3
-	adc	%rdx, w0
-L(mul_1_entry_3):
-	mov	16(up,n,8), %rax
-	mul	v0
-	mov	w3, 8(rp,n,8)
-	mov	$0, R32(w2)		C zero
-	mov	w2, w3			C zero
-	add	%rax, w0
-	mov	24(up,n,8), %rax
-	mov	w2, w1			C zero
-	adc	%rdx, w1
-L(mul_1_entry_2):
-	mul	v0
-	add	$4, n
-	js	L(mul_1_top)
-
-	mov	w0, -16(rp)
-	add	%rax, w1
-	mov	w1, -8(rp)
-	mov	w2, 8(rp)		C zero last limb of output
-	adc	%rdx, w2
-	mov	w2, (rp)
-
-	dec	vn
-	jz	L(ret)
-
-	lea	-8(up), up
-	lea	8(vp), vp
-
-	mov	un, n
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	jmp	*outer_addr
-
-C ===========================================================
-C     mul_2 for vp[0], vp[1] if vn is even
-
-	ALIGN(16)
-L(mul_2):
-	mov	R32(un), R32(w0)
-
-	neg	un
-	mov	-8(up,un,8), %rax
-	mov	(vp), v0
-	mov	8(vp), v1
-	mul	v1
-
-	and	$-4, un		C round down to multiple of 4
-	mov	un, n
-
-	and	$3, R32(w0)
-	jz	L(mul_2_prologue_0)
-	cmp	$2, R32(w0)
-	jc	L(mul_2_prologue_1)
-	jz	L(mul_2_prologue_2)
-
-L(mul_2_prologue_3):
-	mov	%rax, w1
-	mov	%rdx, w2
-	lea	L(addmul_prologue_3)(%rip), outer_addr
-	jmp	L(mul_2_entry_3)
-
-	ALIGN(16)
-L(mul_2_prologue_0):
-	mov	%rax, w0
-	mov	%rdx, w1
-	lea	L(addmul_prologue_0)(%rip), outer_addr
-	jmp	L(mul_2_entry_0)
-
-	ALIGN(16)
-L(mul_2_prologue_1):
-	mov	%rax, w3
-	mov	%rdx, w0
-	mov	$0, R32(w1)
-	lea	L(addmul_prologue_1)(%rip), outer_addr
-	jmp	L(mul_2_entry_1)
-
-	ALIGN(16)
-L(mul_2_prologue_2):
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	$0, R32(w0)
-	mov	16(up,n,8), %rax
-	lea	L(addmul_prologue_2)(%rip), outer_addr
-	jmp	L(mul_2_entry_2)
-
-
-	C this loop is 18 c/loop = 2.25 c/l on K8
-
-	ALIGN(16)
-L(mul_2_top):
-	mov     -8(up,n,8), %rax
-	mul     v1
-	add     %rax, w0
-	adc     %rdx, w1
-L(mul_2_entry_0):
-	mov     $0, R32(w2)
-	mov     (up,n,8), %rax
-	mul     v0
-	add     %rax, w0
-	mov     (up,n,8), %rax
-	adc     %rdx, w1
-	adc     $0, R32(w2)
-	mul     v1
-	add     %rax, w1
-	mov     w0, (rp,n,8)
-	adc     %rdx, w2
-L(mul_2_entry_3):
-	mov     8(up,n,8), %rax
-	mul     v0
-	mov     $0, R32(w3)
-	add     %rax, w1
-	adc     %rdx, w2
-	mov     $0, R32(w0)
-	adc     $0, R32(w3)
-	mov     8(up,n,8), %rax
-	mov     w1, 8(rp,n,8)
-	mul     v1
-	add     %rax, w2
-	mov     16(up,n,8), %rax
-	adc     %rdx, w3
-L(mul_2_entry_2):
-	mov     $0, R32(w1)
-	mul     v0
-	add     %rax, w2
-	mov     16(up,n,8), %rax
-	adc     %rdx, w3
-	adc     $0, R32(w0)
-	mul     v1
-	add     %rax, w3
-	mov     w2, 16(rp,n,8)
-	adc     %rdx, w0
-L(mul_2_entry_1):
-	mov     24(up,n,8), %rax
-	mul     v0
-	add     %rax, w3
-	adc     %rdx, w0
-	adc     $0, R32(w1)
-	add     $4, n
-	mov     w3, -8(rp,n,8)
-	jnz     L(mul_2_top)
-
-	mov	w0, (rp)
-	mov	w1, 8(rp)
-
-	sub	$2, vn
-	jz	L(ret)
-
-	lea	16(vp), vp
-	lea	-16(up), up
-
-	mov	un, n
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	jmp	*outer_addr
-
-C ===========================================================
-C     addmul_2 for remaining vp's
-
-	ALIGN(16)
-L(addmul_prologue_0):
-	mov	-8(up,n,8), %rax
-	mul	v1
-	mov	%rax, w1
-	mov	%rdx, w2
-	mov	$0, R32(w3)
-	jmp	L(addmul_entry_0)
-
-	ALIGN(16)
-L(addmul_prologue_1):
-	mov	16(up,n,8), %rax
-	mul	v1
-	mov	%rax, w0
-	mov	%rdx, w1
-	mov	$0, R32(w2)
-	mov	24(up,n,8), %rax
-	jmp	L(addmul_entry_1)
-
-	ALIGN(16)
-L(addmul_prologue_2):
-	mov	8(up,n,8), %rax
-	mul	v1
-	mov	%rax, w3
-	mov	%rdx, w0
-	mov	$0, R32(w1)
-	jmp	L(addmul_entry_2)
-
-	ALIGN(16)
-L(addmul_prologue_3):
-	mov	(up,n,8), %rax
-	mul	v1
-	mov	%rax, w2
-	mov	%rdx, w3
-	mov	$0, R32(w0)
-	mov	$0, R32(w1)
-	jmp	L(addmul_entry_3)
-
-	C this loop is 19 c/loop = 2.375 c/l on K8
-
-	ALIGN(16)
-L(addmul_top):
-	mov	$0, R32(w3)
-	add	%rax, w0
-	mov	-8(up,n,8), %rax
-	adc	%rdx, w1
-	adc	$0, R32(w2)
-	mul	v1
-	add	w0, -8(rp,n,8)
-	adc	%rax, w1
-	adc	%rdx, w2
-L(addmul_entry_0):
-	mov	(up,n,8), %rax
-	mul	v0
-	add	%rax, w1
-	mov	(up,n,8), %rax
-	adc	%rdx, w2
-	adc	$0, R32(w3)
-	mul	v1
-	add	w1, (rp,n,8)
-	mov	$0, R32(w1)
-	adc	%rax, w2
-	mov	$0, R32(w0)
-	adc	%rdx, w3
-L(addmul_entry_3):
-	mov	8(up,n,8), %rax
-	mul	v0
-	add	%rax, w2
-	mov	8(up,n,8), %rax
-	adc	%rdx, w3
-	adc	$0, R32(w0)
-	mul	v1
-	add	w2, 8(rp,n,8)
-	adc	%rax, w3
-	adc	%rdx, w0
-L(addmul_entry_2):
-	mov	16(up,n,8), %rax
-	mul	v0
-	add	%rax, w3
-	mov	16(up,n,8), %rax
-	adc	%rdx, w0
-	adc	$0, R32(w1)
-	mul	v1
-	add	w3, 16(rp,n,8)
-	nop			C don't ask...
-	adc	%rax, w0
-	mov	$0, R32(w2)
-	mov	24(up,n,8), %rax
-	adc	%rdx, w1
-L(addmul_entry_1):
-	mul	v0
-	add	$4, n
-	jnz	L(addmul_top)
-
-	add	%rax, w0
-	adc	%rdx, w1
-	adc	$0, R32(w2)
-
-	add	w0, -8(rp)
-	adc	w1, (rp)
-	adc	w2, 8(rp)
-
-	sub	$2, vn
-	jz	L(ret)
-
-	lea	16(vp), vp
-	lea	-16(up), up
-
-	mov	un, n
-	mov	(vp), v0
-	mov	8(vp), v1
-
-	jmp	*outer_addr
-
-C ===========================================================
-C     accumulate along diagonals if un - vn is small
-
-	ALIGN(16)
-L(diagonal):
-	xor	R32(w0), R32(w0)
-	xor	R32(w1), R32(w1)
-	xor	R32(w2), R32(w2)
-
-	neg	un
-
-	mov	R32(vn), %eax
-	and	$3, %eax
-	jz	L(diag_prologue_0)
-	cmp	$2, %eax
-	jc	L(diag_prologue_1)
-	jz	L(diag_prologue_2)
-
-L(diag_prologue_3):
-	lea	-8(vp), vp
-	mov	vp, vp_inner
-	add	$1, vn
-	mov	vn, n
-	lea	L(diag_entry_3)(%rip), outer_addr
-	jmp	L(diag_entry_3)
-
-L(diag_prologue_0):
-	mov	vp, vp_inner
-	mov	vn, n
-	lea	0(%rip), outer_addr
-	mov     -8(up,n,8), %rax
-	jmp	L(diag_entry_0)
-
-L(diag_prologue_1):
-	lea	8(vp), vp
-	mov	vp, vp_inner
-	add	$3, vn
-	mov	vn, n
-	lea	0(%rip), outer_addr
-	mov     -8(vp_inner), %rax
-	jmp	L(diag_entry_1)
-
-L(diag_prologue_2):
-	lea	-16(vp), vp
-	mov	vp, vp_inner
-	add	$2, vn
-	mov	vn, n
-	lea	0(%rip), outer_addr
-	mov	16(vp_inner), %rax
-	jmp	L(diag_entry_2)
-
-
-	C this loop is 10 c/loop = 2.5 c/l on K8
-
-	ALIGN(16)
-L(diag_top):
-	add     %rax, w0
-	adc     %rdx, w1
-	mov     -8(up,n,8), %rax
-	adc     $0, w2
-L(diag_entry_0):
-	mulq    (vp_inner)
-	add     %rax, w0
-	adc     %rdx, w1
-	adc     $0, w2
-L(diag_entry_3):
-	mov     -16(up,n,8), %rax
-	mulq    8(vp_inner)
-	add     %rax, w0
-	mov     16(vp_inner), %rax
-	adc     %rdx, w1
-	adc     $0, w2
-L(diag_entry_2):
-	mulq    -24(up,n,8)
-	add     %rax, w0
-	mov     24(vp_inner), %rax
-	adc     %rdx, w1
-	lea     32(vp_inner), vp_inner
-	adc     $0, w2
-L(diag_entry_1):
-	mulq    -32(up,n,8)
-	sub     $4, n
-	jnz	L(diag_top)
-
-	add	%rax, w0
-	adc	%rdx, w1
-	adc	$0, w2
-
-	mov	w0, (rp,un,8)
-
-	inc	un
-	jz	L(diag_end)
-
-	mov	vn, n
-	mov	vp, vp_inner
-
-	lea	8(up), up
-	mov	w1, w0
-	mov	w2, w1
-	xor	R32(w2), R32(w2)
-
-	jmp	*outer_addr
-
-L(diag_end):
-	mov	w1, (rp)
-	mov	w2, 8(rp)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k8/redc_1.asm b/gmp/mpn/x86_64/k8/redc_1.asm
deleted file mode 100644
index 74538986f9..0000000000
--- a/gmp/mpn/x86_64/k8/redc_1.asm
+++ /dev/null
@@ -1,590 +0,0 @@
-dnl  X86-64 mpn_redc_1 optimised for AMD K8-K10.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2004, 2008, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C AMD bull	 ?
-C AMD pile	 ?
-C AMD steam	 ?
-C AMD bobcat	 ?
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 ?
-C Intel NHM	 ?
-C Intel SBR	 ?
-C Intel IBR	 ?
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C The inner loops of this code are the result of running a code generation and
-C optimisation tool suite written by David Harvey and Torbjörn Granlund.
-
-C TODO
-C  * Micro-optimise, none performed thus far.
-C  * This looks different from other current redc_1.asm variants.  Consider
-C    adapting this to the mainstream style.
-C  * Is this code really faster than more approaches which compute q0 later?
-C    Is the use of a jump jump table faster?  Or is the edge of this due to the
-C    inlined add_n code?
-C  * Put initial m[0] x q0 computation in header.
-C  * Put basecases at the file's end, single them out before the pushes.
-
-define(`rp',          `%rdi')   C rcx
-define(`up',          `%rsi')   C rdx
-define(`mp_param',    `%rdx')   C r8
-define(`n',           `%rcx')   C r9
-define(`u0inv',       `%r8')    C stack
-
-define(`i',           `%r11')
-define(`nneg',        `%r12')
-define(`mp',          `%r13')
-define(`q0',          `%rbp')
-define(`vp',          `%rdx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_redc_1)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbp
-	mov	(up), q0		C up[0]
-	push	%rbx
-	imul	u0inv, q0		C first q0, for all execution paths
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	n, nneg
-	neg	nneg
-	lea	(mp_param,n,8), mp	C mp += n
-	lea	-16(up,n,8), up		C up += n
-
-	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	lea	4(%rax), %r9
-	cmp	$4, R32(n)
-	cmovg	%r9, %rax
-	lea	L(tab)(%rip), %r9
-ifdef(`PIC',`
-	movslq	(%r9,%rax,4), %rax
-	add	%r9, %rax
-	jmp	*%rax
-',`
-	jmp	*(%r9,%rax,8)
-')
-
-	JUMPTABSECT
-	ALIGN(8)
-L(tab):	JMPENT(	L(0m4), L(tab))
-	JMPENT(	L(1), L(tab))
-	JMPENT(	L(2), L(tab))
-	JMPENT(	L(3), L(tab))
-	JMPENT(	L(0m4), L(tab))
-	JMPENT(	L(1m4), L(tab))
-	JMPENT(	L(2m4), L(tab))
-	JMPENT(	L(3m4), L(tab))
-	TEXT
-
-	ALIGN(16)
-L(1):	mov	(mp_param), %rax
-	mul	q0
-	add	8(up), %rax
-	adc	16(up), %rdx
-	mov	%rdx, (rp)
-	mov	$0, R32(%rax)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-
-	ALIGN(16)
-L(2):	mov	(mp_param), %rax
-	mul	q0
-	xor	R32(%r14), R32(%r14)
-	mov	%rax, %r10
-	mov	-8(mp), %rax
-	mov	%rdx, %r9
-	mul	q0
-	add	(up), %r10
-	adc	%rax, %r9
-	adc	%rdx, %r14
-	add	8(up), %r9
-	adc	$0, %r14
-	mov	%r9, q0
-	imul	u0inv, q0
-	mov	-16(mp), %rax
-	mul	q0
-	xor	R32(%rbx), R32(%rbx)
-	mov	%rax, %r10
-	mov	-8(mp), %rax
-	mov	%rdx, %r11
-	mul	q0
-	add	%r9, %r10
-	adc	%rax, %r11
-	adc	%rdx, %rbx
-	add	16(up), %r11
-	adc	$0, %rbx
-	xor	R32(%rax), R32(%rax)
-	add	%r11, %r14
-	adc	24(up), %rbx
-	mov	%r14, (rp)
-	mov	%rbx, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-
-L(3):	mov	(mp_param), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, %r10
-	mov	-16(mp), %rax
-	mul	q0
-	xor	R32(%r9), R32(%r9)
-	xor	R32(%r14), R32(%r14)
-	add	-8(up), %rbx
-	adc	%rax, %r10
-	mov	-8(mp), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	(up), %r10
-	mov	%r10, (up)
-	adc	%rax, %r9
-	adc	%rdx, %r14
-	mov	%r10, q0
-	imul	u0inv, q0
-	add	%r9, 8(up)
-	adc	$0, %r14
-	mov	%r14, -8(up)
-
-	mov	-24(mp), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, %r10
-	mov	-16(mp), %rax
-	mul	q0
-	xor	R32(%r9), R32(%r9)
-	xor	R32(%r14), R32(%r14)
-	add	(up), %rbx
-	adc	%rax, %r10
-	mov	-8(mp), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	8(up), %r10
-	mov	%r10, 8(up)
-	adc	%rax, %r9
-	adc	%rdx, %r14
-	mov	%r10, q0
-	imul	u0inv, q0
-	add	%r9, 16(up)
-	adc	$0, %r14
-	mov	%r14, (up)
-
-	mov	-24(mp), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, %r10
-	mov	-16(mp), %rax
-	mul	q0
-	xor	R32(%r9), R32(%r9)
-	xor	R32(%r14), R32(%r14)
-	add	8(up), %rbx
-	adc	%rax, %r10
-	mov	-8(mp), %rax
-	adc	%rdx, %r9
-	mul	q0
-	add	16(up), %r10
-	adc	%rax, %r9
-	adc	%rdx, %r14
-	add	24(up), %r9
-	adc	$0, %r14
-
-	xor	R32(%rax), R32(%rax)
-	add	-8(up), %r10
-	adc	(up), %r9
-	adc	32(up), %r14
-	mov	%r10, (rp)
-	mov	%r9, 8(rp)
-	mov	%r14, 16(rp)
-	adc	R32(%rax), R32(%rax)
-	jmp	L(ret)
-
-
-	ALIGN(16)
-L(2m4):
-L(lo2):	mov	(mp,nneg,8), %rax
-	mul	q0
-	xor	R32(%r14), R32(%r14)
-	xor	R32(%rbx), R32(%rbx)
-	mov	%rax, %r10
-	mov	8(mp,nneg,8), %rax
-	mov	24(up,nneg,8), %r15
-	mov	%rdx, %r9
-	mul	q0
-	add	16(up,nneg,8), %r10
-	adc	%rax, %r9
-	mov	16(mp,nneg,8), %rax
-	adc	%rdx, %r14
-	mul	q0
-	mov	$0, R32(%r10)		C xor?
-	lea	2(nneg), i
-	add	%r9, %r15
-	imul	u0inv, %r15
-	jmp	 L(e2)
-
-	ALIGN(16)
-L(li2):	add	%r10, (up,i,8)
-	adc	%rax, %r9
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r14
-	xor	R32(%r10), R32(%r10)
-	mul	q0
-L(e2):	add	%r9, 8(up,i,8)
-	adc	%rax, %r14
-	adc	%rdx, %rbx
-	mov	8(mp,i,8), %rax
-	mul	q0
-	add	%r14, 16(up,i,8)
-	adc	%rax, %rbx
-	adc	%rdx, %r10
-	mov	16(mp,i,8), %rax
-	mul	q0
-	add	%rbx, 24(up,i,8)
-	mov	$0, R32(%r14)		C zero
-	mov	%r14, %rbx		C zero
-	adc	%rax, %r10
-	mov	24(mp,i,8), %rax
-	mov	%r14, %r9		C zero
-	adc	%rdx, %r9
-	mul	q0
-	add	$4, i
-	js	 L(li2)
-
-L(le2):	add	%r10, (up)
-	adc	%rax, %r9
-	adc	%r14, %rdx
-	add	%r9, 8(up)
-	adc	$0, %rdx
-	mov	%rdx, 16(up,nneg,8)	C up[0]
-	add	$8, up
-	mov	%r15, q0
-	dec	n
-	jnz	L(lo2)
-
-	mov	nneg, n
-	sar	$2, n
-	lea	32(up,nneg,8), up
-	lea	(up,nneg,8), vp
-
-	mov	-16(up), %r8
-	mov	-8(up), %r9
-	add	-16(vp), %r8
-	adc	-8(vp), %r9
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	lea	16(rp), rp
-	jmp	L(addx)
-
-
-	ALIGN(16)
-L(1m4):
-L(lo1):	mov	(mp,nneg,8), %rax
-	xor	%r9, %r9
-	xor	R32(%rbx), R32(%rbx)
-	mul	q0
-	mov	%rax, %r9
-	mov	8(mp,nneg,8), %rax
-	mov	24(up,nneg,8), %r15
-	mov	%rdx, %r14
-	mov	$0, R32(%r10)		C xor?
-	mul	q0
-	add	16(up,nneg,8), %r9
-	adc	%rax, %r14
-	adc	%rdx, %rbx
-	mov	16(mp,nneg,8), %rax
-	mul	q0
-	lea	1(nneg), i
-	add	%r14, %r15
-	imul	u0inv, %r15
-	jmp	 L(e1)
-
-	ALIGN(16)
-L(li1):	add	%r10, (up,i,8)
-	adc	%rax, %r9
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r14
-	xor	R32(%r10), R32(%r10)
-	mul	q0
-	add	%r9, 8(up,i,8)
-	adc	%rax, %r14
-	adc	%rdx, %rbx
-	mov	8(mp,i,8), %rax
-	mul	q0
-L(e1):	add	%r14, 16(up,i,8)
-	adc	%rax, %rbx
-	adc	%rdx, %r10
-	mov	16(mp,i,8), %rax
-	mul	q0
-	add	%rbx, 24(up,i,8)
-	mov	$0, R32(%r14)		C zero
-	mov	%r14, %rbx		C zero
-	adc	%rax, %r10
-	mov	24(mp,i,8), %rax
-	mov	%r14, %r9		C zero
-	adc	%rdx, %r9
-	mul	q0
-	add	$4, i
-	js	 L(li1)
-
-L(le1):	add	%r10, (up)
-	adc	%rax, %r9
-	adc	%r14, %rdx
-	add	%r9, 8(up)
-	adc	$0, %rdx
-	mov	%rdx, 16(up,nneg,8)	C up[0]
-	add	$8, up
-	mov	%r15, q0
-	dec	n
-	jnz	L(lo1)
-
-	mov	nneg, n
-	sar	$2, n
-	lea	24(up,nneg,8), up
-	lea	(up,nneg,8), vp
-
-	mov	-8(up), %r8
-	add	-8(vp), %r8
-	mov	%r8, (rp)
-	lea	8(rp), rp
-	jmp	L(addx)
-
-
-	ALIGN(16)
-L(0m4):
-L(lo0):	mov	(mp,nneg,8), %rax
-	mov	nneg, i
-	mul	q0
-	xor	R32(%r10), R32(%r10)
-	mov	%rax, %r14
-	mov	%rdx, %rbx
-	mov	8(mp,nneg,8), %rax
-	mov	24(up,nneg,8), %r15
-	mul	q0
-	add	16(up,nneg,8), %r14
-	adc	%rax, %rbx
-	adc	%rdx, %r10
-	add	%rbx, %r15
-	imul	u0inv, %r15
-	jmp	L(e0)
-
-	ALIGN(16)
-L(li0):	add	%r10, (up,i,8)
-	adc	%rax, %r9
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r14
-	xor	R32(%r10), R32(%r10)
-	mul	q0
-	add	%r9, 8(up,i,8)
-	adc	%rax, %r14
-	adc	%rdx, %rbx
-	mov	8(mp,i,8), %rax
-	mul	q0
-	add	%r14, 16(up,i,8)
-	adc	%rax, %rbx
-	adc	%rdx, %r10
-L(e0):	mov	16(mp,i,8), %rax
-	mul	q0
-	add	%rbx, 24(up,i,8)
-	mov	$0, R32(%r14)		C zero
-	mov	%r14, %rbx		C zero
-	adc	%rax, %r10
-	mov	24(mp,i,8), %rax
-	mov	%r14, %r9		C zero
-	adc	%rdx, %r9
-	mul	q0
-	add	$4, i
-	js	 L(li0)
-
-L(le0):	add	%r10, (up)
-	adc	%rax, %r9
-	adc	%r14, %rdx
-	add	%r9, 8(up)
-	adc	$0, %rdx
-	mov	%rdx, 16(up,nneg,8)	C up[0]
-	add	$8, up
-	mov	%r15, q0
-	dec	n
-	jnz	L(lo0)
-
-	mov	nneg, n
-	sar	$2, n
-	clc
-	lea	16(up,nneg,8), up
-	lea	(up,nneg,8), vp
-	jmp	L(addy)
-
-
-	ALIGN(16)
-L(3m4):
-L(lo3):	mov	(mp,nneg,8), %rax
-	mul	q0
-	mov	%rax, %rbx
-	mov	%rdx, %r10
-	mov	8(mp,nneg,8), %rax
-	mov	24(up,nneg,8), %r15
-	mul	q0
-	add	16(up,nneg,8), %rbx	C result is zero, might carry
-	mov	$0, R32(%rbx)		C zero
-	mov	%rbx, %r14		C zero
-	adc	%rax, %r10
-	mov	16(mp,nneg,8), %rax
-	mov	%r14, %r9		C zero
-	adc	%rdx, %r9
-	add	%r10, %r15
-	mul	q0
-	lea	3(nneg), i
-	imul	u0inv, %r15
-C	jmp	L(li3)
-
-	ALIGN(16)
-L(li3):	add	%r10, (up,i,8)
-	adc	%rax, %r9
-	mov	(mp,i,8), %rax
-	adc	%rdx, %r14
-	xor	R32(%r10), R32(%r10)
-	mul	q0
-	add	%r9, 8(up,i,8)
-	adc	%rax, %r14
-	adc	%rdx, %rbx
-	mov	8(mp,i,8), %rax
-	mul	q0
-	add	%r14, 16(up,i,8)
-	adc	%rax, %rbx
-	adc	%rdx, %r10
-	mov	16(mp,i,8), %rax
-	mul	q0
-	add	%rbx, 24(up,i,8)
-	mov	$0, R32(%r14)		C zero
-	mov	%r14, %rbx		C zero
-	adc	%rax, %r10
-	mov	24(mp,i,8), %rax
-	mov	%r14, %r9		C zero
-	adc	%rdx, %r9
-	mul	q0
-	add	$4, i
-	js	 L(li3)
-
-L(le3):	add	%r10, (up)
-	adc	%rax, %r9
-	adc	%r14, %rdx
-	add	%r9, 8(up)
-	adc	$0, %rdx
-	mov	%rdx, 16(up,nneg,8)	C up[0]
-	mov	%r15, q0
-	lea	8(up), up
-	dec	n
-	jnz	L(lo3)
-
-
-C ==== Addition code ====
-	mov	nneg, n
-	sar	$2, n
-	lea	40(up,nneg,8), up
-	lea	(up,nneg,8), vp
-
-	mov	-24(up), %r8
-	mov	-16(up), %r9
-	mov	-8(up), %r10
-	add	-24(vp), %r8
-	adc	-16(vp), %r9
-	adc	-8(vp), %r10
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	lea	24(rp), rp
-
-L(addx):inc	n
-	jz	L(ad3)
-
-L(addy):mov	(up), %r8
-	mov	8(up), %r9
-	inc	n
-	jmp	L(mid)
-
-C	ALIGN(16)
-L(al3):	adc	(vp), %r8
-	adc	8(vp), %r9
-	adc	16(vp), %r10
-	adc	24(vp), %r11
-	mov	%r8, (rp)
-	lea	32(up), up
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	inc	n
-	mov	%r11, 24(rp)
-	lea	32(vp), vp
-	mov	(up), %r8
-	mov	8(up), %r9
-	lea	32(rp), rp
-L(mid):	mov	16(up), %r10
-	mov	24(up), %r11
-	jnz	L(al3)
-
-L(ae3):	adc	(vp), %r8
-	adc	8(vp), %r9
-	adc	16(vp), %r10
-	adc	24(vp), %r11
-	mov	%r8, (rp)
-	mov	%r9, 8(rp)
-	mov	%r10, 16(rp)
-	mov	%r11, 24(rp)
-
-L(ad3):	mov	R32(n), R32(%rax)	C zero
-	adc	R32(%rax), R32(%rax)
-
-L(ret):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/logops_n.asm b/gmp/mpn/x86_64/logops_n.asm
index b277f58962..1022b61376 100644
--- a/gmp/mpn/x86_64/logops_n.asm
+++ b/gmp/mpn/x86_64/logops_n.asm
@@ -1,45 +1,30 @@
 dnl  AMD64 logops.
 
-dnl  Copyright 2004-2006, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 1.5	with fluctuations for variant 2 and 3
-C AMD K10	 1.5	with fluctuations for all variants
-C Intel P4	 2.8/3.35/3.60 (variant1/variant2/variant3)
-C Intel core2	 2
-C Intel NHM	 2
-C Intel SBR	 1.5/1.75/1.75
-C Intel atom	 3.75
-C VIA nano	 3.25
+C K8,K9:	 1.5
+C K10:		 1.75-2 (fluctuating)
+C P4:		 2.8/3.35/3.60 (variant1/variant2/variant3)
+C P6-15:	 2.0
 
 ifdef(`OPERATION_and_n',`
   define(`func',`mpn_and_n')
@@ -83,8 +68,6 @@ define(`up',`%rsi')
 define(`vp',`%rdx')
 define(`n',`%rcx')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
 
 ASM_START()
 
@@ -92,16 +75,15 @@ ifdef(`VARIANT_1',`
 	TEXT
 	ALIGN(32)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	movq	(vp), %r8
-	movl	R32(%rcx), R32(%rax)
+	movl	%ecx, %eax
 	leaq	(vp,n,8), vp
 	leaq	(up,n,8), up
 	leaq	(rp,n,8), rp
 	negq	n
-	andl	$3, R32(%rax)
+	andl	$3, %eax
 	je	L(b00)
-	cmpl	$2, R32(%rax)
+	cmpl	$2, %eax
 	jc	L(b01)
 	je	L(b10)
 
@@ -131,8 +113,7 @@ L(e10):	movq	24(vp,n,8), %r9
 	movq	%r9, 24(rp,n,8)
 	addq	$4, n
 	jnc	L(oop)
-L(ret):	FUNC_EXIT()
-	ret
+L(ret):	ret
 EPILOGUE()
 ')
 
@@ -140,17 +121,16 @@ ifdef(`VARIANT_2',`
 	TEXT
 	ALIGN(32)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	movq	(vp), %r8
 	notq	%r8
-	movl	R32(%rcx), R32(%rax)
+	movl	%ecx, %eax
 	leaq	(vp,n,8), vp
 	leaq	(up,n,8), up
 	leaq	(rp,n,8), rp
 	negq	n
-	andl	$3, R32(%rax)
+	andl	$3, %eax
 	je	L(b00)
-	cmpl	$2, R32(%rax)
+	cmpl	$2, %eax
 	jc	L(b01)
 	je	L(b10)
 
@@ -184,8 +164,7 @@ L(e10):	movq	24(vp,n,8), %r9
 	movq	%r9, 24(rp,n,8)
 	addq	$4, n
 	jnc	L(oop)
-L(ret):	FUNC_EXIT()
-	ret
+L(ret):	ret
 EPILOGUE()
 ')
 
@@ -193,16 +172,15 @@ ifdef(`VARIANT_3',`
 	TEXT
 	ALIGN(32)
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	movq	(vp), %r8
-	movl	R32(%rcx), R32(%rax)
+	movl	%ecx, %eax
 	leaq	(vp,n,8), vp
 	leaq	(up,n,8), up
 	leaq	(rp,n,8), rp
 	negq	n
-	andl	$3, R32(%rax)
+	andl	$3, %eax
 	je	L(b00)
-	cmpl	$2, R32(%rax)
+	cmpl	$2, %eax
 	jc	L(b01)
 	je	L(b10)
 
@@ -238,7 +216,6 @@ L(e10):	movq	24(vp,n,8), %r9
 	movq	%r9, 24(rp,n,8)
 	addq	$4, n
 	jnc	L(oop)
-L(ret):	FUNC_EXIT()
-	ret
+L(ret):	ret
 EPILOGUE()
 ')
diff --git a/gmp/mpn/x86_64/lshift.asm b/gmp/mpn/x86_64/lshift.asm
index f368944b85..11fe59c24e 100644
--- a/gmp/mpn/x86_64/lshift.asm
+++ b/gmp/mpn/x86_64/lshift.asm
@@ -1,45 +1,31 @@
 dnl  AMD64 mpn_lshift -- mpn left shift.
 
-dnl  Copyright 2003, 2005, 2007, 2009, 2011, 2012 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb   cycles/limb cnt=1
-C AMD K8,K9	 2.375		 1.375
-C AMD K10	 2.375		 1.375
-C Intel P4	 8		10.5
-C Intel core2	 2.11		 4.28
-C Intel corei	 ?		 ?
-C Intel atom	 5.75		 3.5
-C VIA nano	 3.5		 2.25
+C K8,K9:	 2.375		 1.375
+C K10:		 2.375		 1.375
+C P4:		 8		10.5
+C P6-15 (Core2): 2.11		 4.28
+C P6-28 (Atom):	 5.75		 3.5
 
 
 C INPUT PARAMETERS
@@ -48,19 +34,15 @@ define(`up',	`%rsi')
 define(`n',	`%rdx')
 define(`cnt',	`%rcx')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(mpn_lshift)
-	FUNC_ENTRY(4)
-	cmp	$1, R8(%rcx)
+	cmp	$1, %cl
 	jne	L(gen)
 
 C For cnt=1 we want to work from lowest limb towards higher limbs.
-C Check for bad overlap (up=rp is OK!) up=rp+1..rp+n-1 is bad.
+C Check for bad overlap (up=rp is OK!) up=1..rp+n-1 is bad.
 C FIXME: this could surely be done more cleverly.
 
 	mov    rp, %rax
@@ -95,30 +77,27 @@ L(t1):	mov	(up), %r8
 	dec	n
 	jne	L(t1)
 
-	inc	R32(%rax)
-	dec	R32(%rax)
+	inc	%eax
+	dec	%eax
 	jne	L(n00)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
+	adc	%eax, %eax
 	ret
-L(e1):	test	R32(%rax), R32(%rax)	C clear cy
+L(e1):	test	%eax, %eax			C clear cy
 L(n00):	mov	(up), %r8
-	dec	R32(%rax)
+	dec	%eax
 	jne	L(n01)
 	adc	%r8, %r8
 	mov	%r8, (rp)
-L(ret):	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
+L(ret):	adc	%eax, %eax
 	ret
-L(n01):	dec	R32(%rax)
+L(n01):	dec	%eax
 	mov	8(up), %r9
 	jne	L(n10)
 	adc	%r8, %r8
 	adc	%r9, %r9
 	mov	%r8, (rp)
 	mov	%r9, 8(rp)
-	adc	R32(%rax), R32(%rax)
-	FUNC_EXIT()
+	adc	%eax, %eax
 	ret
 L(n10):	mov	16(up), %r10
 	adc	%r8, %r8
@@ -127,15 +106,14 @@ L(n10):	mov	16(up), %r10
 	mov	%r8, (rp)
 	mov	%r9, 8(rp)
 	mov	%r10, 16(rp)
-	adc	$-1, R32(%rax)
-	FUNC_EXIT()
+	adc	$-1, %eax
 	ret
 
-L(gen):	neg	R32(%rcx)		C put rsh count in cl
+L(gen):	neg	%ecx			C put rsh count in cl
 	mov	-8(up,n,8), %rax
-	shr	R8(%rcx), %rax		C function return value
+	shr	%cl, %rax		C function return value
 
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 	lea	1(n), R32(%r8)
 	and	$3, R32(%r8)
 	je	L(rlx)			C jump for n = 3, 7, 11, ...
@@ -144,10 +122,10 @@ L(gen):	neg	R32(%rcx)		C put rsh count in cl
 	jne	L(1)
 C	n = 4, 8, 12, ...
 	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
-	neg	R32(%rcx)		C put rsh count in cl
+	shl	%cl, %r10
+	neg	%ecx			C put rsh count in cl
 	mov	-16(up,n,8), %r8
-	shr	R8(%rcx), %r8
+	shr	%cl, %r8
 	or	%r8, %r10
 	mov	%r10, -8(rp,n,8)
 	dec	n
@@ -157,91 +135,90 @@ L(1):	dec	R32(%r8)
 	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
 C	n = 2, 6, 10, 16, ...
 	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
-	neg	R32(%rcx)		C put rsh count in cl
+	shl	%cl, %r10
+	neg	%ecx			C put rsh count in cl
 	mov	-16(up,n,8), %r8
-	shr	R8(%rcx), %r8
+	shr	%cl, %r8
 	or	%r8, %r10
 	mov	%r10, -8(rp,n,8)
 	dec	n
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 L(1x):
 	cmp	$1, n
 	je	L(ast)
 	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
+	shl	%cl, %r10
 	mov	-16(up,n,8), %r11
-	shl	R8(%rcx), %r11
-	neg	R32(%rcx)		C put rsh count in cl
+	shl	%cl, %r11
+	neg	%ecx			C put rsh count in cl
 	mov	-16(up,n,8), %r8
 	mov	-24(up,n,8), %r9
-	shr	R8(%rcx), %r8
+	shr	%cl, %r8
 	or	%r8, %r10
-	shr	R8(%rcx), %r9
+	shr	%cl, %r9
 	or	%r9, %r11
 	mov	%r10, -8(rp,n,8)
 	mov	%r11, -16(rp,n,8)
 	sub	$2, n
 
-L(rll):	neg	R32(%rcx)		C put lsh count in cl
+L(rll):	neg	%ecx			C put lsh count in cl
 L(rlx):	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
+	shl	%cl, %r10
 	mov	-16(up,n,8), %r11
-	shl	R8(%rcx), %r11
+	shl	%cl, %r11
 
 	sub	$4, n			C				      4
 	jb	L(end)			C				      2
 	ALIGN(16)
 L(top):
 	C finish stuff from lsh block
-	neg	R32(%rcx)		C put rsh count in cl
+	neg	%ecx			C put rsh count in cl
 	mov	16(up,n,8), %r8
 	mov	8(up,n,8), %r9
-	shr	R8(%rcx), %r8
+	shr	%cl, %r8
 	or	%r8, %r10
-	shr	R8(%rcx), %r9
+	shr	%cl, %r9
 	or	%r9, %r11
 	mov	%r10, 24(rp,n,8)
 	mov	%r11, 16(rp,n,8)
 	C start two new rsh
 	mov	0(up,n,8), %r8
 	mov	-8(up,n,8), %r9
-	shr	R8(%rcx), %r8
-	shr	R8(%rcx), %r9
+	shr	%cl, %r8
+	shr	%cl, %r9
 
 	C finish stuff from rsh block
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 	mov	8(up,n,8), %r10
 	mov	0(up,n,8), %r11
-	shl	R8(%rcx), %r10
+	shl	%cl, %r10
 	or	%r10, %r8
-	shl	R8(%rcx), %r11
+	shl	%cl, %r11
 	or	%r11, %r9
 	mov	%r8, 8(rp,n,8)
 	mov	%r9, 0(rp,n,8)
 	C start two new lsh
 	mov	-8(up,n,8), %r10
 	mov	-16(up,n,8), %r11
-	shl	R8(%rcx), %r10
-	shl	R8(%rcx), %r11
+	shl	%cl, %r10
+	shl	%cl, %r11
 
 	sub	$4, n
 	jae	L(top)			C				      2
 L(end):
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	8(up), %r8
-	shr	R8(%rcx), %r8
+	neg	%ecx			C put rsh count in cl
+	mov	16(up,n,8), %r8
+	shr	%cl, %r8
 	or	%r8, %r10
-	mov	(up), %r9
-	shr	R8(%rcx), %r9
+	mov	8(up,n,8), %r9
+	shr	%cl, %r9
 	or	%r9, %r11
-	mov	%r10, 16(rp)
-	mov	%r11, 8(rp)
+	mov	%r10, 24(rp,n,8)
+	mov	%r11, 16(rp,n,8)
 
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 L(ast):	mov	(up), %r10
-	shl	R8(%rcx), %r10
+	shl	%cl, %r10
 	mov	%r10, (rp)
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/lshiftc.asm b/gmp/mpn/x86_64/lshiftc.asm
deleted file mode 100644
index c4ba04a173..0000000000
--- a/gmp/mpn/x86_64/lshiftc.asm
+++ /dev/null
@@ -1,182 +0,0 @@
-dnl  AMD64 mpn_lshiftc -- mpn left shift with complement.
-
-dnl  Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	 2.75
-C AMD K10	 2.75
-C Intel P4	 ?
-C Intel core2	 ?
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 3.75
-
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`n',	`%rdx')
-define(`cnt',	`%rcx')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_lshiftc)
-	FUNC_ENTRY(4)
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	-8(up,n,8), %rax
-	shr	R8(%rcx), %rax		C function return value
-
-	neg	R32(%rcx)		C put lsh count in cl
-	lea	1(n), R32(%r8)
-	and	$3, R32(%r8)
-	je	L(rlx)			C jump for n = 3, 7, 11, ...
-
-	dec	R32(%r8)
-	jne	L(1)
-C	n = 4, 8, 12, ...
-	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	-16(up,n,8), %r8
-	shr	R8(%rcx), %r8
-	or	%r8, %r10
-	not	%r10
-	mov	%r10, -8(rp,n,8)
-	dec	n
-	jmp	L(rll)
-
-L(1):	dec	R32(%r8)
-	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
-C	n = 2, 6, 10, 16, ...
-	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	-16(up,n,8), %r8
-	shr	R8(%rcx), %r8
-	or	%r8, %r10
-	not	%r10
-	mov	%r10, -8(rp,n,8)
-	dec	n
-	neg	R32(%rcx)		C put lsh count in cl
-L(1x):
-	cmp	$1, n
-	je	L(ast)
-	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
-	mov	-16(up,n,8), %r11
-	shl	R8(%rcx), %r11
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	-16(up,n,8), %r8
-	mov	-24(up,n,8), %r9
-	shr	R8(%rcx), %r8
-	or	%r8, %r10
-	shr	R8(%rcx), %r9
-	or	%r9, %r11
-	not	%r10
-	not	%r11
-	mov	%r10, -8(rp,n,8)
-	mov	%r11, -16(rp,n,8)
-	sub	$2, n
-
-L(rll):	neg	R32(%rcx)		C put lsh count in cl
-L(rlx):	mov	-8(up,n,8), %r10
-	shl	R8(%rcx), %r10
-	mov	-16(up,n,8), %r11
-	shl	R8(%rcx), %r11
-
-	sub	$4, n			C				      4
-	jb	L(end)			C				      2
-	ALIGN(16)
-L(top):
-	C finish stuff from lsh block
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	16(up,n,8), %r8
-	mov	8(up,n,8), %r9
-	shr	R8(%rcx), %r8
-	or	%r8, %r10
-	shr	R8(%rcx), %r9
-	or	%r9, %r11
-	not	%r10
-	not	%r11
-	mov	%r10, 24(rp,n,8)
-	mov	%r11, 16(rp,n,8)
-	C start two new rsh
-	mov	0(up,n,8), %r8
-	mov	-8(up,n,8), %r9
-	shr	R8(%rcx), %r8
-	shr	R8(%rcx), %r9
-
-	C finish stuff from rsh block
-	neg	R32(%rcx)		C put lsh count in cl
-	mov	8(up,n,8), %r10
-	mov	0(up,n,8), %r11
-	shl	R8(%rcx), %r10
-	or	%r10, %r8
-	shl	R8(%rcx), %r11
-	or	%r11, %r9
-	not	%r8
-	not	%r9
-	mov	%r8, 8(rp,n,8)
-	mov	%r9, 0(rp,n,8)
-	C start two new lsh
-	mov	-8(up,n,8), %r10
-	mov	-16(up,n,8), %r11
-	shl	R8(%rcx), %r10
-	shl	R8(%rcx), %r11
-
-	sub	$4, n
-	jae	L(top)			C				      2
-L(end):
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	8(up), %r8
-	shr	R8(%rcx), %r8
-	or	%r8, %r10
-	mov	(up), %r9
-	shr	R8(%rcx), %r9
-	or	%r9, %r11
-	not	%r10
-	not	%r11
-	mov	%r10, 16(rp)
-	mov	%r11, 8(rp)
-
-	neg	R32(%rcx)		C put lsh count in cl
-L(ast):	mov	(up), %r10
-	shl	R8(%rcx), %r10
-	not	%r10
-	mov	%r10, (rp)
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/lshsub_n.asm b/gmp/mpn/x86_64/lshsub_n.asm
index 4d428c0bd2..dc8576b220 100644
--- a/gmp/mpn/x86_64/lshsub_n.asm
+++ b/gmp/mpn/x86_64/lshsub_n.asm
@@ -1,44 +1,30 @@
 dnl  AMD64 mpn_lshsub_n.  R = 2^k(U - V).
 
-dnl  Copyright 2006, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2006 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 3.15	(mpn_sub_n + mpn_lshift costs about 4 c/l)
-C AMD K10	 3.15	(mpn_sub_n + mpn_lshift costs about 4 c/l)
-C Intel P4	16.5
-C Intel core2	 4.35
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 3.15	(mpn_sub_n + mpn_lshift costs about 4 c/l)
+C K10:		 3.15	(mpn_sub_n + mpn_lshift costs about 4 c/l)
+C P4:		16.5
+C P6-15:	 4.35
 
 C This was written quickly and not optimized at all, but it runs very well on
 C K8.  But perhaps one could get under 3 c/l.  Ideas:
@@ -53,17 +39,12 @@ define(`rp',	`%rdi')
 define(`up',	`%rsi')
 define(`vp',	`%rdx')
 define(`n',	`%rcx')
-define(`cnt',	`%r8')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`cnt'	`%r8')
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_lshsub_n)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
 
 	push	%r12
 	push	%r13
@@ -72,32 +53,32 @@ IFDOS(`	mov	56(%rsp), %r8d	')
 	push	%rbx
 
 	mov	n, %rax
-	xor	R32(%rbx), R32(%rbx)	C clear carry save register
-	mov	R32(%r8), R32(%rcx)	C shift count
-	xor	R32(%r15), R32(%r15)	C limb carry
+	xor	%ebx, %ebx		C clear carry save register
+	mov	%r8d, %ecx		C shift count
+	xor	%r15d, %r15d		C limb carry
 
-	mov	R32(%rax), R32(%r11)
-	and	$3, R32(%r11)
+	mov	%eax, %r11d
+	and	$3, %r11d
 	je	L(4)
-	sub	$1, R32(%r11)
+	sub	$1, %r11d
 
 L(oopette):
-	add	R32(%rbx), R32(%rbx)	C restore carry flag
+	add	%ebx, %ebx		C restore carry flag
 	mov	0(up), %r8
 	lea	8(up), up
 	sbb	0(vp), %r8
 	mov	%r8, %r12
-	sbb	R32(%rbx), R32(%rbx)	C save carry flag
-	shl	R8(%rcx), %r8
+	sbb	%ebx, %ebx		C save carry flag
+	shl	%cl, %r8
 	or	%r15, %r8
 	mov	%r12, %r15
 	lea	8(vp), vp
-	neg	R8(%rcx)
-	shr	R8(%rcx), %r15
-	neg	R8(%rcx)
+	neg	%cl
+	shr	%cl, %r15
+	neg	%cl
 	mov	%r8, 0(rp)
 	lea	8(rp), rp
-	sub	$1, R32(%r11)
+	sub	$1, %r11d
 	jnc	L(oopette)
 
 L(4):
@@ -106,7 +87,7 @@ L(4):
 
 	ALIGN(16)
 L(oop):
-	add	R32(%rbx), R32(%rbx)	C restore carry flag
+	add	%ebx, %ebx		C restore carry flag
 
 	mov	0(up), %r8
 	mov	8(up), %r9
@@ -123,29 +104,29 @@ L(oop):
 	mov	%r10, %r14
 	sbb	24(vp), %r11
 
-	sbb	R32(%rbx), R32(%rbx)	C save carry flag
+	sbb	%ebx, %ebx		C save carry flag
 
-	shl	R8(%rcx), %r8
-	shl	R8(%rcx), %r9
-	shl	R8(%rcx), %r10
+	shl	%cl, %r8
+	shl	%cl, %r9
+	shl	%cl, %r10
 	or	%r15, %r8
 	mov	%r11, %r15
-	shl	R8(%rcx), %r11
+	shl	%cl, %r11
 
 	lea	32(vp), vp
 
-	neg	R8(%rcx)
+	neg	%cl
 
-	shr	R8(%rcx), %r12
-	shr	R8(%rcx), %r13
-	shr	R8(%rcx), %r14
-	shr	R8(%rcx), %r15		C used next loop
+	shr	%cl, %r12
+	shr	%cl, %r13
+	shr	%cl, %r14
+	shr	%cl, %r15		C used next loop
 
 	or	%r12, %r9
 	or	%r13, %r10
 	or	%r14, %r11
 
-	neg	R8(%rcx)
+	neg	%cl
 
 	mov	%r8, 0(rp)
 	mov	%r9, 8(rp)
@@ -157,8 +138,8 @@ L(oop):
 	sub	$4, %rax
 	jnc	L(oop)
 L(end):
-	neg	R32(%rbx)
-	shl	R8(%rcx), %rbx
+	neg	%ebx
+	shl	%cl, %rbx
 	adc	%r15, %rbx
 	mov	%rbx, %rax
 	pop	%rbx
@@ -167,6 +148,5 @@ L(end):
 	pop	%r13
 	pop	%r12
 
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/missing-call.m4 b/gmp/mpn/x86_64/missing-call.m4
deleted file mode 100644
index c024f0ed77..0000000000
--- a/gmp/mpn/x86_64/missing-call.m4
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl  AMD64 MULX/ADX simulation support, function call version.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-define(`adox',`
-	push	$1
-	push	$2
-	call	__gmp_adox
-	pop	$2
-')
-
-define(`adcx',`
-	push	$1
-	push	$2
-	call	__gmp_adcx
-	pop	$2
-')
-
-define(`mulx',`
-	push	$1
-	call	__gmp_mulx
-	pop	$2
-	pop	$3
-')
diff --git a/gmp/mpn/x86_64/missing-inline.m4 b/gmp/mpn/x86_64/missing-inline.m4
deleted file mode 100644
index bd1df1313f..0000000000
--- a/gmp/mpn/x86_64/missing-inline.m4
+++ /dev/null
@@ -1,100 +0,0 @@
-dnl  AMD64 MULX/ADX simulation support, inline version.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-define(`adox',`
-	push	$2
-	push	%rcx
-	push	%rbx
-	push	%rax
-	mov	$1, %rcx
-	pushfq
-	pushfq
-C copy 0(%rsp):11 to 0(%rsp):0
-	mov	(%rsp), %rbx
-	shr	%rbx
-	bt	$`'10, %rbx
-	adc	%rbx, %rbx
-	mov	%rbx, (%rsp)
-C put manipulated flags into eflags, execute a plain adc
-	popfq
-	adc	%rcx, 32(%rsp)
-C copy CF to 0(%rsp):11
-	mov	(%rsp), %rbx
-	sbb	R32(%rax), R32(%rax)
-	and	$`'0x800, R32(%rax)
-	and	$`'0xfffffffffffff7ff, %rbx
-	or	%rax, %rbx
-	mov	%rbx, (%rsp)
-C put manipulated flags into eflags
-	popfq
-	pop	%rax
-	pop	%rbx
-	pop	%rcx
-	pop	$2
-')
-
-define(`adcx',`
-	push	$2
-	push	%rcx
-	push	%rbx
-	push	%rax
-	mov	$1, %rcx
-	pushfq
-	adc	%rcx, 32(%rsp)
-	mov	(%rsp), %rbx
-	sbb	R32(%rax), R32(%rax)
-	and	$`'0xfffffffffffffffe, %rbx
-	sub	%rax, %rbx
-	mov	%rbx, (%rsp)
-	popfq
-	pop	%rax
-	pop	%rbx
-	pop	%rcx
-	pop	$2
-')
-
-define(`mulx',`
-	lea	-16(%rsp), %rsp
-	push	%rax
-	push	%rdx
-	pushfq			C preserve all flags
-	mov	$1, %rax
-	mul	%rdx
-	mov	%rax, 24(%rsp)
-	mov	%rdx, 32(%rsp)
-	popfq			C restore eflags
-	pop	%rdx
-	pop	%rax
-	pop	$2
-	pop	$3
-')
diff --git a/gmp/mpn/x86_64/missing.asm b/gmp/mpn/x86_64/missing.asm
deleted file mode 100644
index 9b65c89dd4..0000000000
--- a/gmp/mpn/x86_64/missing.asm
+++ /dev/null
@@ -1,130 +0,0 @@
-
-	dnl  AMD64 MULX/ADX simulation support.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-ASM_START()
-
-C Fake the MULX instruction
-C
-C Accept the single explicit parameter on the stack, return the two result
-C words on the stack.  This calling convention means that we need to move the
-C return address up.
-C
-PROLOGUE(__gmp_mulx)
-	lea	-8(%rsp), %rsp
-	push	%rax
-	push	%rdx
-	pushfq				C preserve all flags
-	mov	32(%rsp), %rax		C move retaddr...
-	mov	%rax, 24(%rsp)		C ...up the stack
-	mov	40(%rsp), %rax		C input parameter
-	mul	%rdx
-	mov	%rax, 32(%rsp)
-	mov	%rdx, 40(%rsp)
-	popfq				C restore eflags
-	pop	%rdx
-	pop	%rax
-	ret
-EPILOGUE()
-PROTECT(__gmp_mulx)
-
-
-C Fake the ADOX instruction
-C
-C Accept the two parameters on the stack, return the result word on the stack.
-C This calling convention means that we need to move the return address down.
-C
-PROLOGUE(__gmp_adox)
-	push	%rcx
-	push	%rbx
-	push	%rax
-	mov	32(%rsp), %rcx		C src2
-	mov	24(%rsp), %rax		C move retaddr...
-	mov	%rax, 32(%rsp)		C ...down the stack
-	pushfq
-C copy 0(%rsp):11 to 0(%rsp):0
-	mov	(%rsp), %rbx
-	shr	%rbx
-	bt	$10, %rbx
-	adc	%rbx, %rbx
-	push	%rbx
-C put manipulated flags into eflags, execute a plain adc
-	popfq
-	adc	%rcx, 48(%rsp)
-C copy CF to 0(%rsp):11
-	pop	%rbx
-	sbb	R32(%rax), R32(%rax)
-	and	$0x800, R32(%rax)
-	and	$0xfffffffffffff7ff, %rbx
-	or	%rax, %rbx
-	push	%rbx
-C put manipulated flags into eflags
-	popfq
-	pop	%rax
-	pop	%rbx
-	pop	%rcx
-	lea	8(%rsp), %rsp
-	ret
-EPILOGUE()
-PROTECT(__gmp_adox)
-
-
-C Fake the ADCX instruction
-C
-C Accept the two parameters on the stack, return the result word on the stack.
-C This calling convention means that we need to move the return address down.
-C
-PROLOGUE(__gmp_adcx)
-	push	%rcx
-	push	%rbx
-	push	%rax
-	mov	32(%rsp), %rcx		C src2
-	mov	24(%rsp), %rax		C move retaddr...
-	mov	%rax, 32(%rsp)		C ...down the stack
-	pushfq
-	adc	%rcx, 48(%rsp)
-	pop	%rbx
-	sbb	R32(%rax), R32(%rax)
-	and	$`'0xfffffffffffffffe, %rbx
-	sub	%rax, %rbx
-	push	%rbx
-	popfq
-	pop	%rax
-	pop	%rbx
-	pop	%rcx
-	lea	8(%rsp), %rsp
-	ret
-EPILOGUE()
-PROTECT(__gmp_adcx)
diff --git a/gmp/mpn/x86_64/mod_1_1.asm b/gmp/mpn/x86_64/mod_1_1.asm
deleted file mode 100644
index 4a7c45a58b..0000000000
--- a/gmp/mpn/x86_64/mod_1_1.asm
+++ /dev/null
@@ -1,235 +0,0 @@
-dnl  AMD64 mpn_mod_1_1p
-
-dnl  Contributed to the GNU project by Torbjörn Granlund and Niels Möller.
-
-dnl  Copyright 2009-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 6
-C AMD K10	 6
-C Intel P4	26
-C Intel core2	12.5
-C Intel NHM	11.3
-C Intel SBR	 8.4	(slowdown, old code took 8.0)
-C Intel atom	26
-C VIA nano	13
-
-define(`B2mb',   `%r10')
-define(`B2modb', `%r11')
-define(`ap',     `%rdi')
-define(`n',      `%rsi')
-define(`pre',    `%r8')
-define(`b',      `%rbx')
-
-define(`r0',     `%rbp') C r1 kept in %rax
-define(`r2',	 `%rcx')  C kept negated. Also used as shift count
-define(`t0',     `%r9')
-
-C mp_limb_t
-C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
-C                       %rdi         %rsi         %rdx                %rcx
-C The pre array contains bi, cnt, B1modb, B2modb
-C Note: This implementation needs B1modb only when cnt > 0
-
-C The iteration is almost as follows,
-C
-C   r_2 B^3 + r_1 B^2 + r_0 B + u = r_1 B2modb + (r_0 + r_2 B2mod) B + u
-C
-C where r2 is a single bit represented as a mask. But to make sure that the
-C result fits in two limbs and a bit, carry from the addition
-C
-C   r_0 + r_2 B2mod
-C
-C is handled specially. On carry, we subtract b to cancel the carry,
-C and we use instead the value
-C
-C   r_0 + B2mb (mod B)
-C
-C This addition can be issued early since it doesn't depend on r2, and it is
-C the source of the cmov in the loop.
-C
-C We have the invariant that r_2 B^2 + r_1 B + r_0 < B^2 + B b
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mod_1_1p)
-	FUNC_ENTRY(4)
-	push	%rbp
-	push	%rbx
-	mov	%rdx, b
-	mov	%rcx, pre
-
-	mov	-8(ap, n, 8), %rax
-	cmp	$3, n
-	jnc	L(first)
-	mov	-16(ap, n, 8), r0
-	jmp	L(reduce_two)
-
-L(first):
-	C First iteration, no r2
-	mov	24(pre), B2modb
-	mul	B2modb
-	mov	-24(ap, n, 8), r0
-	add	%rax, r0
-	mov	-16(ap, n, 8), %rax
-	adc	%rdx, %rax
-	sbb	r2, r2
-	sub	$4, n
-	jc	L(reduce_three)
-
-	mov	B2modb, B2mb
-	sub	b, B2mb
-
-	ALIGN(16)
-L(top):	and	B2modb, r2
-	lea	(B2mb, r0), t0
-	mul	B2modb
-	add	r0, r2
-	mov	(ap, n, 8), r0
-	cmovc	t0, r2
-	add	%rax, r0
-	mov	r2, %rax
-	adc	%rdx, %rax
-	sbb	r2, r2
-	sub	$1, n
-	jnc	L(top)
-
-L(reduce_three):
-	C Eliminate r2
-	and	b, r2
-	sub	r2, %rax
-
-L(reduce_two):
-	mov	8(pre), R32(%rcx)
-	test	R32(%rcx), R32(%rcx)
-	jz	L(normalized)
-
-	C Unnormalized, use B1modb to reduce to size < B (b+1)
-	mulq	16(pre)
-	xor	t0, t0
-	add	%rax, r0
-	adc	%rdx, t0
-	mov	t0, %rax
-
-	C Left-shift to normalize
-ifdef(`SHLD_SLOW',`
-	shl	R8(%rcx), %rax
-	mov	r0, t0
-	neg	R32(%rcx)
-	shr	R8(%rcx), t0
-	or	t0, %rax
-	neg	R32(%rcx)
-',`
-	shld	R8(%rcx), r0, %rax
-')
-	shl	R8(%rcx), r0
-	jmp	L(udiv)
-
-L(normalized):
-	mov	%rax, t0
-	sub	b, t0
-	cmovnc	t0, %rax
-
-L(udiv):
-	lea	1(%rax), t0
-	mulq	(pre)
-	add	r0, %rax
-	adc	t0, %rdx
-	imul	b, %rdx
-	sub	%rdx, r0
-	cmp	r0, %rax
-	lea	(b, r0), %rax
-	cmovnc	r0, %rax
-	cmp	b, %rax
-	jnc	L(fix)
-L(ok):	shr	R8(%rcx), %rax
-
-	pop	%rbx
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-L(fix):	sub	b, %rax
-	jmp	L(ok)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_mod_1_1p_cps)
-	FUNC_ENTRY(2)
-	push	%rbp
-	bsr	%rsi, %rcx
-	push	%rbx
-	mov	%rdi, %rbx
-	push	%r12
-	xor	$63, R32(%rcx)
-	mov	%rsi, %r12
-	mov	R32(%rcx), R32(%rbp)
-	sal	R8(%rcx), %r12
-IFSTD(`	mov	%r12, %rdi	')	C pass parameter
-IFDOS(`	mov	%r12, %rcx	')	C pass parameter
-	CALL(	mpn_invert_limb)
-	neg	%r12
-	mov	%r12, %r8
-	mov	%rax, (%rbx)		C store bi
-	mov	%rbp, 8(%rbx)		C store cnt
-	imul	%rax, %r12
-	mov	%r12, 24(%rbx)		C store B2modb
-	mov	R32(%rbp), R32(%rcx)
-	test	R32(%rcx), R32(%rcx)
-	jz	L(z)
-
-	mov	$1, R32(%rdx)
-ifdef(`SHLD_SLOW',`
-	C Destroys %rax, unlike shld. Otherwise, we could do B1modb
-	C before B2modb, and get rid of the move %r12, %r8 above.
-
-	shl	R8(%rcx), %rdx
-	neg	R32(%rcx)
-	shr	R8(%rcx), %rax
-	or	%rax, %rdx
-	neg	R32(%rcx)
-',`
-	shld	R8(%rcx), %rax, %rdx
-')
-	imul	%rdx, %r8
-	shr	R8(%rcx), %r8
-	mov	%r8, 16(%rbx)		C store B1modb
-L(z):
-	pop	%r12
-	pop	%rbx
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/mod_1_2.asm b/gmp/mpn/x86_64/mod_1_2.asm
deleted file mode 100644
index 02dd917791..0000000000
--- a/gmp/mpn/x86_64/mod_1_2.asm
+++ /dev/null
@@ -1,238 +0,0 @@
-dnl  AMD64 mpn_mod_1s_2p
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 4
-C AMD K10	 4
-C Intel P4	19
-C Intel core2	 8
-C Intel NHM	 6.5
-C Intel SBR	 4.5
-C Intel atom	28
-C VIA nano	 8
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_2p)
-	FUNC_ENTRY(4)
-	push	%r14
-	test	$1, R8(%rsi)
-	mov	%rdx, %r14
-	push	%r13
-	mov	%rcx, %r13
-	push	%r12
-	push	%rbp
-	push	%rbx
-	mov	16(%rcx), %r10
-	mov	24(%rcx), %rbx
-	mov	32(%rcx), %rbp
-	je	L(b0)
-	dec	%rsi
-	je	L(one)
-	mov	-8(%rdi,%rsi,8), %rax
-	mul	%r10
-	mov	%rax, %r9
-	mov	%rdx, %r8
-	mov	(%rdi,%rsi,8), %rax
-	add	-16(%rdi,%rsi,8), %r9
-	adc	$0, %r8
-	mul	%rbx
-	add	%rax, %r9
-	adc	%rdx, %r8
-	jmp	L(11)
-
-L(b0):	mov	-8(%rdi,%rsi,8), %r8
-	mov	-16(%rdi,%rsi,8), %r9
-
-L(11):	sub	$4, %rsi
-	jb	L(ed2)
-	lea	40(%rdi,%rsi,8), %rdi
-	mov	-40(%rdi), %r11
-	mov	-32(%rdi), %rax
-	jmp	L(m0)
-
-	ALIGN(16)
-L(top):	mov	-24(%rdi), %r9
-	add	%rax, %r11
-	mov	-16(%rdi), %rax
-	adc	%rdx, %r12
-	mul	%r10
-	add	%rax, %r9
-	mov	%r11, %rax
-	mov	%rdx, %r8
-	adc	$0, %r8
-	mul	%rbx
-	add	%rax, %r9
-	mov	%r12, %rax
-	adc	%rdx, %r8
-	mul	%rbp
-	sub	$2, %rsi
-	jb	L(ed1)
-	mov	-40(%rdi), %r11
-	add	%rax, %r9
-	mov	-32(%rdi), %rax
-	adc	%rdx, %r8
-L(m0):	mul	%r10
-	add	%rax, %r11
-	mov	%r9, %rax
-	mov	%rdx, %r12
-	adc	$0, %r12
-	mul	%rbx
-	add	%rax, %r11
-	lea	-32(%rdi), %rdi		C ap -= 4
-	mov	%r8, %rax
-	adc	%rdx, %r12
-	mul	%rbp
-	sub	$2, %rsi
-	jae	L(top)
-
-L(ed0):	mov	%r11, %r9
-	mov	%r12, %r8
-L(ed1):	add	%rax, %r9
-	adc	%rdx, %r8
-L(ed2):	mov	8(%r13), R32(%rdi)		C cnt
-	mov	%r8, %rax
-	mov	%r9, %r8
-	mul	%r10
-	add	%rax, %r8
-	adc	$0, %rdx
-L(1):	xor	R32(%rcx), R32(%rcx)
-	mov	%r8, %r9
-	sub	R32(%rdi), R32(%rcx)
-	shr	R8(%rcx), %r9
-	mov	R32(%rdi), R32(%rcx)
-	sal	R8(%rcx), %rdx
-	or	%rdx, %r9
-	sal	R8(%rcx), %r8
-	mov	%r9, %rax
-	mulq	(%r13)
-	mov	%rax, %rsi
-	inc	%r9
-	add	%r8, %rsi
-	adc	%r9, %rdx
-	imul	%r14, %rdx
-	sub	%rdx, %r8
-	lea	(%r8,%r14), %rax
-	cmp	%r8, %rsi
-	cmovc	%rax, %r8
-	mov	%r8, %rax
-	sub	%r14, %rax
-	cmovc	%r8, %rax
-	mov	R32(%rdi), R32(%rcx)
-	shr	R8(%rcx), %rax
-	pop	%rbx
-	pop	%rbp
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	FUNC_EXIT()
-	ret
-L(one):
-	mov	(%rdi), %r8
-	mov	8(%rcx), R32(%rdi)
-	xor	%rdx, %rdx
-	jmp	L(1)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_2p_cps)
-	FUNC_ENTRY(2)
-	push	%rbp
-	bsr	%rsi, %rcx
-	push	%rbx
-	mov	%rdi, %rbx
-	push	%r12
-	xor	$63, R32(%rcx)
-	mov	%rsi, %r12
-	mov	R32(%rcx), R32(%rbp)	C preserve cnt over call
-	sal	R8(%rcx), %r12		C b << cnt
-IFSTD(`	mov	%r12, %rdi	')	C pass parameter
-IFDOS(`	mov	%r12, %rcx	')	C pass parameter
-	CALL(	mpn_invert_limb)
-	mov	%r12, %r8
-	mov	%rax, %r11
-	mov	%rax, (%rbx)		C store bi
-	mov	%rbp, 8(%rbx)		C store cnt
-	neg	%r8
-	mov	R32(%rbp), R32(%rcx)
-	mov	$1, R32(%rsi)
-ifdef(`SHLD_SLOW',`
-	shl	R8(%rcx), %rsi
-	neg	R32(%rcx)
-	mov	%rax, %rbp
-	shr	R8(%rcx), %rax
-	or	%rax, %rsi
-	mov	%rbp, %rax
-	neg	R32(%rcx)
-',`
-	shld	R8(%rcx), %rax, %rsi	C FIXME: Slow on Atom and Nano
-')
-	imul	%r8, %rsi
-	mul	%rsi
-
-	add	%rsi, %rdx
-	shr	R8(%rcx), %rsi
-	mov	%rsi, 16(%rbx)		C store B1modb
-
-	not	%rdx
-	imul	%r12, %rdx
-	lea	(%rdx,%r12), %rsi
-	cmp	%rdx, %rax
-	cmovnc	%rdx, %rsi
-	mov	%r11, %rax
-	mul	%rsi
-
-	add	%rsi, %rdx
-	shr	R8(%rcx), %rsi
-	mov	%rsi, 24(%rbx)		C store B2modb
-
-	not	%rdx
-	imul	%r12, %rdx
-	add	%rdx, %r12
-	cmp	%rdx, %rax
-	cmovnc	%rdx, %r12
-
-	shr	R8(%rcx), %r12
-	mov	%r12, 32(%rbx)		C store B3modb
-
-	pop	%r12
-	pop	%rbx
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/mod_1_4.asm b/gmp/mpn/x86_64/mod_1_4.asm
deleted file mode 100644
index 3ce83dc42e..0000000000
--- a/gmp/mpn/x86_64/mod_1_4.asm
+++ /dev/null
@@ -1,269 +0,0 @@
-dnl  AMD64 mpn_mod_1s_4p
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2009-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 3
-C AMD K10	 3
-C Intel P4	15.5
-C Intel core2	 5
-C Intel corei	 4
-C Intel atom	23
-C VIA nano	 4.75
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p)
-	FUNC_ENTRY(4)
-	push	%r15
-	push	%r14
-	push	%r13
-	push	%r12
-	push	%rbp
-	push	%rbx
-
-	mov	%rdx, %r15
-	mov	%rcx, %r14
-	mov	16(%rcx), %r11		C B1modb
-	mov	24(%rcx), %rbx		C B2modb
-	mov	32(%rcx), %rbp		C B3modb
-	mov	40(%rcx), %r13		C B4modb
-	mov	48(%rcx), %r12		C B5modb
-	xor	R32(%r8), R32(%r8)
-	mov	R32(%rsi), R32(%rdx)
-	and	$3, R32(%rdx)
-	je	L(b0)
-	cmp	$2, R32(%rdx)
-	jc	L(b1)
-	je	L(b2)
-
-L(b3):	lea	-24(%rdi,%rsi,8), %rdi
-	mov	8(%rdi), %rax
-	mul	%r11
-	mov	(%rdi), %r9
-	add	%rax, %r9
-	adc	%rdx, %r8
-	mov	16(%rdi), %rax
-	mul	%rbx
-	jmp	L(m0)
-
-	ALIGN(8)
-L(b0):	lea	-32(%rdi,%rsi,8), %rdi
-	mov	8(%rdi), %rax
-	mul	%r11
-	mov	(%rdi), %r9
-	add	%rax, %r9
-	adc	%rdx, %r8
-	mov	16(%rdi), %rax
-	mul	%rbx
-	add	%rax, %r9
-	adc	%rdx, %r8
-	mov	24(%rdi), %rax
-	mul	%rbp
-	jmp	L(m0)
-
-	ALIGN(8)
-L(b1):	lea	-8(%rdi,%rsi,8), %rdi
-	mov	(%rdi), %r9
-	jmp	L(m1)
-
-	ALIGN(8)
-L(b2):	lea	-16(%rdi,%rsi,8), %rdi
-	mov	8(%rdi), %r8
-	mov	(%rdi), %r9
-	jmp	L(m1)
-
-	ALIGN(16)
-L(top):	mov	-24(%rdi), %rax
-	mov	-32(%rdi), %r10
-	mul	%r11			C up[1] * B1modb
-	add	%rax, %r10
-	mov	-16(%rdi), %rax
-	mov	$0, R32(%rcx)
-	adc	%rdx, %rcx
-	mul	%rbx			C up[2] * B2modb
-	add	%rax, %r10
-	mov	-8(%rdi), %rax
-	adc	%rdx, %rcx
-	sub	$32, %rdi
-	mul	%rbp			C up[3] * B3modb
-	add	%rax, %r10
-	mov	%r13, %rax
-	adc	%rdx, %rcx
-	mul	%r9			C rl * B4modb
-	add	%rax, %r10
-	mov	%r12, %rax
-	adc	%rdx, %rcx
-	mul	%r8			C rh * B5modb
-	mov	%r10, %r9
-	mov	%rcx, %r8
-L(m0):	add	%rax, %r9
-	adc	%rdx, %r8
-L(m1):	sub	$4, %rsi
-	ja	L(top)
-
-L(end):	mov	8(%r14), R32(%rsi)
-	mov	%r8, %rax
-	mul	%r11
-	mov	%rax, %r8
-	add	%r9, %r8
-	adc	$0, %rdx
-	xor	R32(%rcx), R32(%rcx)
-	sub	R32(%rsi), R32(%rcx)
-	mov	%r8, %rdi
-	shr	R8(%rcx), %rdi
-	mov	R32(%rsi), R32(%rcx)
-	sal	R8(%rcx), %rdx
-	or	%rdx, %rdi
-	mov	%rdi, %rax
-	mulq	(%r14)
-	mov	%r15, %rbx
-	mov	%rax, %r9
-	sal	R8(%rcx), %r8
-	inc	%rdi
-	add	%r8, %r9
-	adc	%rdi, %rdx
-	imul	%rbx, %rdx
-	sub	%rdx, %r8
-	lea	(%r8,%rbx), %rax
-	cmp	%r8, %r9
-	cmovc	%rax, %r8
-	mov	%r8, %rax
-	sub	%rbx, %rax
-	cmovc	%r8, %rax
-	shr	R8(%rcx), %rax
-	pop	%rbx
-	pop	%rbp
-	pop	%r12
-	pop	%r13
-	pop	%r14
-	pop	%r15
-	FUNC_EXIT()
-	ret
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p_cps)
-	FUNC_ENTRY(2)
-	push	%rbp
-	bsr	%rsi, %rcx
-	push	%rbx
-	mov	%rdi, %rbx
-	push	%r12
-	xor	$63, R32(%rcx)
-	mov	%rsi, %r12
-	mov	R32(%rcx), R32(%rbp)	C preserve cnt over call
-	sal	R8(%rcx), %r12		C b << cnt
-IFSTD(`	mov	%r12, %rdi	')	C pass parameter
-IFDOS(`	mov	%r12, %rcx	')	C pass parameter
-	CALL(	mpn_invert_limb)
-	mov	%r12, %r8
-	mov	%rax, %r11
-	mov	%rax, (%rbx)		C store bi
-	mov	%rbp, 8(%rbx)		C store cnt
-	neg	%r8
-	mov	R32(%rbp), R32(%rcx)
-	mov	$1, R32(%rsi)
-ifdef(`SHLD_SLOW',`
-	shl	R8(%rcx), %rsi
-	neg	R32(%rcx)
-	mov	%rax, %rbp
-	shr	R8(%rcx), %rax
-	or	%rax, %rsi
-	mov	%rbp, %rax
-	neg	R32(%rcx)
-',`
-	shld	R8(%rcx), %rax, %rsi	C FIXME: Slow on Atom and Nano
-')
-	imul	%r8, %rsi
-	mul	%rsi
-
-	add	%rsi, %rdx
-	shr	R8(%rcx), %rsi
-	mov	%rsi, 16(%rbx)		C store B1modb
-
-	not	%rdx
-	imul	%r12, %rdx
-	lea	(%rdx,%r12), %rsi
-	cmp	%rdx, %rax
-	cmovnc	%rdx, %rsi
-	mov	%r11, %rax
-	mul	%rsi
-
-	add	%rsi, %rdx
-	shr	R8(%rcx), %rsi
-	mov	%rsi, 24(%rbx)		C store B2modb
-
-	not	%rdx
-	imul	%r12, %rdx
-	lea	(%rdx,%r12), %rsi
-	cmp	%rdx, %rax
-	cmovnc	%rdx, %rsi
-	mov	%r11, %rax
-	mul	%rsi
-
-	add	%rsi, %rdx
-	shr	R8(%rcx), %rsi
-	mov	%rsi, 32(%rbx)		C store B3modb
-
-	not	%rdx
-	imul	%r12, %rdx
-	lea	(%rdx,%r12), %rsi
-	cmp	%rdx, %rax
-	cmovnc	%rdx, %rsi
-	mov	%r11, %rax
-	mul	%rsi
-
-	add	%rsi, %rdx
-	shr	R8(%rcx), %rsi
-	mov	%rsi, 40(%rbx)		C store B4modb
-
-	not	%rdx
-	imul	%r12, %rdx
-	add	%rdx, %r12
-	cmp	%rdx, %rax
-	cmovnc	%rdx, %r12
-
-	shr	R8(%rcx), %r12
-	mov	%r12, 48(%rbx)		C store B5modb
-
-	pop	%r12
-	pop	%rbx
-	pop	%rbp
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/mod_34lsub1.asm b/gmp/mpn/x86_64/mod_34lsub1.asm
index 62bdcfac69..34df5bb5b7 100644
--- a/gmp/mpn/x86_64/mod_34lsub1.asm
+++ b/gmp/mpn/x86_64/mod_34lsub1.asm
@@ -1,194 +1,155 @@
 dnl  AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
 
-dnl  Copyright 2000-2002, 2004, 2005, 2007, 2009-2012 Free Software Foundation,
+dnl  Copyright 2000, 2001, 2002, 2004, 2005, 2007 Free Software Foundation,
 dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C	    cycles/limb
-C AMD K8,K9	 0.67	   0.583 is possible with zero-reg instead of $0, 4-way
-C AMD K10	 0.67	   this seems hard to beat
-C AMD bd1	 1
-C AMD bobcat	 1.07
-C Intel P4	 7.35	   terrible, use old code
-C Intel core2	 1.25	   1+epsilon with huge unrolling
-C Intel NHM	 1.15	   this seems hard to beat
-C Intel SBR	 0.93
-C Intel atom	 2.5
-C VIA nano	 1.25	   this seems hard to beat
+C	     cycles/limb
+C K8,K9:	 1.0
+C K10:		 1.12
+C P4:		 3.25
+C P6-15 (Core2): 1.5
+C P6-28 (Atom):	 2.5
+
 
 C INPUT PARAMETERS
-define(`ap',	%rdi)
-define(`n',	%rsi)
+C up	rdi
+C n	rsi
 
 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
 
 C TODO
-C  * Review feed-in and wind-down code.
+C  * Apply the movzwl tricks to the x86/k7 code
+C  * Review feed-in and wind-down code.  In particular, try to avoid adcq and
+C    sbbq to placate Pentium4.
+C  * More unrolling and/or index addressing could bring time to under 1 c/l
+C    for Athlon64, approaching 0.67 c/l seems possible.
+C  * There are recurrencies on the carry registers (r8, r9, r10) that might
+C    be the limiting factor for the Pentium4 speed.  Splitting these into 6
+C    registers would help.
+C  * For ultimate Athlon64 performance, a sequence like this might be best.
+C    It should reach 0.5 c/l (limited by L1 cache bandwidth).
+C
+C	addq	(%rdi), %rax
+C	adcq	8(%rdi), %rcx
+C	adcq	16(%rdi), %rdx
+C	adcq	$0, %r8
+C	addq	24(%rdi), %rax
+C	adcq	32(%rdi), %rcx
+C	adcq	40(%rdi), %rdx
+C	adcq	$0, %r8
+C	...
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
 
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(mpn_mod_34lsub1)
-	FUNC_ENTRY(2)
 
 	mov	$0x0000FFFFFFFFFFFF, %r11
 
-	mov	(ap), %rax
-
-	cmp	$2, %rsi
+	sub	$2, %rsi
 	ja	L(gt2)
 
-	jb	L(one)
+	mov	(%rdi), %rax
+	nop
+	jb	L(1)
 
-	mov	8(ap), %rsi
+	mov	8(%rdi), %rsi
 	mov	%rax, %rdx
 	shr	$48, %rax		C src[0] low
 
 	and	%r11, %rdx		C src[0] high
 	add	%rdx, %rax
-	mov	R32(%rsi), R32(%rdx)
+	mov	%esi, %edx
 
 	shr	$32, %rsi		C src[1] high
 	add	%rsi, %rax
 
 	shl	$16, %rdx		C src[1] low
 	add	%rdx, %rax
-L(one):	FUNC_EXIT()
-	ret
 
+L(1):	ret
 
-C Don't change this, the wind-down code is not able to handle greater values
-define(UNROLL,3)
 
-L(gt2):	mov	8(ap), %rcx
-	mov	16(ap), %rdx
-	xor	%r9, %r9
-	add	$24, ap
-	sub	$eval(UNROLL*3+3), %rsi
-	jc	L(end)
 	ALIGN(16)
-L(top):
-	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
-	adc	$0, %r9
-forloop(i,1,UNROLL-1,`dnl
-	add	eval(i*24)(ap), %rax
-	adc	eval(i*24+8)(ap), %rcx
-	adc	eval(i*24+16)(ap), %rdx
-	adc	$0, %r9
-')dnl
-	add	$eval(UNROLL*24), ap
-	sub	$eval(UNROLL*3), %rsi
-	jnc	L(top)
-
-L(end):
-	lea	L(tab)(%rip), %r8
-ifdef(`PIC',
-`	movslq	36(%r8,%rsi,4), %r10
-	add	%r10, %r8
-	jmp	*%r8
-',`
-	jmp	*72(%r8,%rsi,8)
-')
-	JUMPTABSECT
-	ALIGN(8)
-L(tab):	JMPENT(	L(0), L(tab))
-	JMPENT(	L(1), L(tab))
-	JMPENT(	L(2), L(tab))
-	JMPENT(	L(3), L(tab))
-	JMPENT(	L(4), L(tab))
-	JMPENT(	L(5), L(tab))
-	JMPENT(	L(6), L(tab))
-	JMPENT(	L(7), L(tab))
-	JMPENT(	L(8), L(tab))
-	TEXT
+L(gt2):	xor	%eax, %eax
+	xor	%ecx, %ecx
+	xor	%edx, %edx
+	xor	%r8, %r8
+	xor	%r9, %r9
+	xor	%r10, %r10
 
-L(6):	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
-	adc	$0, %r9
-	add	$24, ap
-L(3):	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
-	jmp	L(cj1)
-
-L(7):	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
+L(top):	add	(%rdi), %rax
+	adc	$0, %r10
+	add	8(%rdi), %rcx
+	adc	$0, %r8
+	add	16(%rdi), %rdx
 	adc	$0, %r9
-	add	$24, ap
-L(4):	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
-	adc	$0, %r9
-	add	$24, ap
-L(1):	add	(ap), %rax
-	adc	$0, %rcx
-	jmp	L(cj2)
-
-L(8):	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
-	adc	$0, %r9
-	add	$24, ap
-L(5):	add	(ap), %rax
-	adc	8(ap), %rcx
-	adc	16(ap), %rdx
+
+	sub	$3,%rsi
+	jng	L(end)
+
+	add	24(%rdi), %rax
+	adc	$0, %r10
+	add	32(%rdi), %rcx
+	adc	$0, %r8
+	add	40(%rdi), %rdx
+	lea	48(%rdi), %rdi
 	adc	$0, %r9
-	add	$24, ap
-L(2):	add	(ap), %rax
-	adc	8(ap), %rcx
 
-L(cj2):	adc	$0, %rdx
-L(cj1):	adc	$0, %r9
-L(0):	add	%r9, %rax
-	adc	$0, %rcx
-	adc	$0, %rdx
-	adc	$0, %rax
+	sub	$3,%rsi
+	jg	L(top)
+
+
+	add	$-24, %rdi
+L(end):	add	%r9, %rax
+	adc	%r10, %rcx
+	adc	%r8, %rdx
+
+	inc	%rsi
+	mov	$0x1, %r10d
+	js	L(combine)
+
+	mov	$0x10000, %r10d
+	adc	24(%rdi), %rax
+	dec	%rsi
+	js	L(combine)
+
+	adc	32(%rdi), %rcx
+	mov	$0x100000000, %r10
 
+L(combine):
+	sbb	%rsi, %rsi		C carry
 	mov	%rax, %rdi		C 0mod3
 	shr	$48, %rax		C 0mod3 high
 
+	and	%r10, %rsi		C carry masked
 	and	%r11, %rdi		C 0mod3 low
-	mov	R32(%rcx), R32(%r10)	C 1mod3
+	mov	%ecx, %r10d		C 1mod3
 
+	add	%rsi, %rax		C apply carry
 	shr	$32, %rcx		C 1mod3 high
 
 	add	%rdi, %rax		C apply 0mod3 low
-	movzwl	%dx, R32(%rdi)		C 2mod3
+	movzwl	%dx, %edi		C 2mod3
 	shl	$16, %r10		C 1mod3 low
 
 	add	%rcx, %rax		C apply 1mod3 high
@@ -200,6 +161,5 @@ L(0):	add	%r9, %rax
 	add	%rdx, %rax		C apply 2mod3 high
 	add	%rdi, %rax		C apply 2mod3 low
 
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/mode1o.asm b/gmp/mpn/x86_64/mode1o.asm
index 2cd2b08848..c5f2bc7990 100644
--- a/gmp/mpn/x86_64/mode1o.asm
+++ b/gmp/mpn/x86_64/mode1o.asm
@@ -1,131 +1,139 @@
-dnl  AMD64 mpn_modexact_1_odd -- Hensel norm remainder.
+dnl  AMD64 mpn_modexact_1_odd -- exact division style remainder.
 
-dnl  Copyright 2000-2006, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	10
-C AMD K10	10
-C Intel P4	33
-C Intel core2	13
-C Intel corei	14.5
-C Intel atom	35
-C VIA nano	 ?
+C K8,K9:	10
+C K10:		10
+C P4:		33
+C P6-15 (Core2):13
+C P6-28 (Atom):	35
 
 
+C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
+C                               mp_limb_t divisor);
+C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
+C                                mp_limb_t divisor, mp_limb_t carry);
+C
+C
 C The dependent chain in the main loop is
 C
 C                            cycles
-C	sub	%rdx, %rax	1
-C	imul	%r9, %rax	4
-C	mul	%r8		5
+C	subq	%rdx, %rax	1
+C	imulq	%r9, %rax	4
+C	mulq	%r8		5
 C			      ----
 C       total		       10
 C
-C The mov load from src seems to need to be scheduled back before the jz to
-C achieve this speed, out-of-order execution apparently can't completely hide
-C the latency otherwise.
+C The movq load from src seems to need to be scheduled back before the jz to
+C achieve this speed, out-of-order execution apparently can't completely
+C hide the latency otherwise.
 C
-C The l=src[i]-cbit step is rotated back too, since that allows us to avoid it
-C for the first iteration (where there's no cbit).
+C The l=src[i]-cbit step is rotated back too, since that allows us to avoid
+C it for the first iteration (where there's no cbit).
 C
-C The code alignment used (32-byte) for the loop also seems necessary.  Without
-C that the non-PIC case has adc crossing the 0x60 offset, apparently making it
-C run at 11 cycles instead of 10.
-
+C The code alignment used (32-byte) for the loop also seems necessary.
+C Without that the non-PIC case has adcq crossing the 0x60 offset,
+C apparently making it run at 11 cycles instead of 10.
+C
+C Not done:
+C
+C divq for size==1 was measured at about 79 cycles, compared to the inverse
+C at about 25 cycles (both including function call overheads), so that's not
+C used.
+C
+C Enhancements:
+C
+C For PIC, we shouldn't really need the GOT fetch for binvert_limb_table,
+C it'll be in rodata or text in libgmp.so and can be accessed directly %rip
+C relative.  This would be for small model only (something we don't
+C presently detect, but which is all that gcc 3.3.3 supports), since 8-byte
+C PC-relative relocations are apparently not available.  Some rough
+C experiments with binutils 2.13 looked worrylingly like it might come out
+C with an unwanted text segment relocation though, even with ".protected".
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
 
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(mpn_modexact_1_odd)
-	FUNC_ENTRY(3)
-	mov	$0, R32(%rcx)
-IFDOS(`	jmp	L(ent)		')
+
+	movl	$0, %ecx
 
 PROLOGUE(mpn_modexact_1c_odd)
-	FUNC_ENTRY(4)
-L(ent):
+
 	C rdi	src
 	C rsi	size
 	C rdx	divisor
 	C rcx	carry
 
-	mov	%rdx, %r8		C d
-	shr	R32(%rdx)		C d/2
-
-	LEA(	binvert_limb_table, %r9)
+	movq	%rdx, %r8		C d
+	shrl	%edx			C d/2
+ifdef(`PIC',`
+	movq	binvert_limb_table@GOTPCREL(%rip), %r9
+',`
+	movabsq	$binvert_limb_table, %r9
+')
 
-	and	$127, R32(%rdx)
-	mov	%rcx, %r10		C initial carry
+	andl	$127, %edx
+	movq	%rcx, %r10		C initial carry
 
-	movzbl	(%r9,%rdx), R32(%rdx)	C inv 8 bits
+	movzbl	(%r9,%rdx), %edx	C inv 8 bits
 
-	mov	(%rdi), %rax		C src[0]
-	lea	(%rdi,%rsi,8), %r11	C src end
-	mov	%r8, %rdi		C d, made available to imull
+	movq	(%rdi), %rax		C src[0]
+	leaq	(%rdi,%rsi,8), %r11	C src end
+	movq	%r8, %rdi		C d, made available to imull
 
-	lea	(%rdx,%rdx), R32(%rcx)	C 2*inv
-	imul	R32(%rdx), R32(%rdx)	C inv*inv
+	leal	(%rdx,%rdx), %ecx	C 2*inv
+	imull	%edx, %edx		C inv*inv
 
-	neg	%rsi			C -size
+	negq	%rsi			C -size
 
-	imul	R32(%rdi), R32(%rdx)	C inv*inv*d
+	imull	%edi, %edx		C inv*inv*d
 
-	sub	R32(%rdx), R32(%rcx)	C inv = 2*inv - inv*inv*d, 16 bits
+	subl	%edx, %ecx		C inv = 2*inv - inv*inv*d, 16 bits
 
-	lea	(%rcx,%rcx), R32(%rdx)	C 2*inv
-	imul	R32(%rcx), R32(%rcx)	C inv*inv
+	leal	(%rcx,%rcx), %edx	C 2*inv
+	imull	%ecx, %ecx		C inv*inv
 
-	imul	R32(%rdi), R32(%rcx)	C inv*inv*d
+	imull	%edi, %ecx		C inv*inv*d
 
-	sub	R32(%rcx), R32(%rdx)	C inv = 2*inv - inv*inv*d, 32 bits
-	xor	R32(%rcx), R32(%rcx)	C initial cbit
+	subl	%ecx, %edx		C inv = 2*inv - inv*inv*d, 32 bits
+	xorl	%ecx, %ecx		C initial cbit
 
-	lea	(%rdx,%rdx), %r9	C 2*inv
-	imul	%rdx, %rdx		C inv*inv
+	leaq	(%rdx,%rdx), %r9	C 2*inv
+	imulq	%rdx, %rdx		C inv*inv
 
-	imul	%r8, %rdx		C inv*inv*d
+	imulq	%r8, %rdx		C inv*inv*d
 
-	sub	%rdx, %r9		C inv = 2*inv - inv*inv*d, 64 bits
-	mov	%r10, %rdx		C initial climb
+	subq	%rdx, %r9		C inv = 2*inv - inv*inv*d, 64 bits
+	movq	%r10, %rdx		C initial climb
 
 	ASSERT(e,`	C d*inv == 1 mod 2^64
-	mov	%r8, %r10
-	imul	%r9, %r10
-	cmp	$1, %r10')
+	movq	%r8, %r10
+	imulq	%r9, %r10
+	cmpq	$1, %r10')
 
-	inc	%rsi
+	incq	%rsi
 	jz	L(one)
 
 
@@ -140,31 +148,30 @@ L(top):
 	C r9	inverse
 	C r11	src end ptr
 
-	sub	%rdx, %rax		C l = src[i]-cbit - climb
+	subq	%rdx, %rax		C l = src[i]-cbit - climb
 
-	adc	$0, %rcx		C more cbit
-	imul	%r9, %rax		C q = l * inverse
+	adcq	$0, %rcx		C more cbit
+	imulq	%r9, %rax		C q = l * inverse
 
-	mul	%r8			C climb = high (q * d)
+	mulq	%r8			C climb = high (q * d)
 
-	mov	(%r11,%rsi,8), %rax	C src[i+1]
-	sub	%rcx, %rax		C next l = src[i+1] - cbit
-	setc	R8(%rcx)		C new cbit
+	movq	(%r11,%rsi,8), %rax	C src[i+1]
+	subq	%rcx, %rax		C next l = src[i+1] - cbit
+	setc	%cl			C new cbit
 
-	inc	%rsi
+	incq	%rsi
 	jnz	L(top)
 
 
 L(one):
-	sub	%rdx, %rax		C l = src[i]-cbit - climb
+	subq	%rdx, %rax		C l = src[i]-cbit - climb
 
-	adc	$0, %rcx		C more cbit
-	imul	%r9, %rax		C q = l * inverse
+	adcq	$0, %rcx		C more cbit
+	imulq	%r9, %rax		C q = l * inverse
 
-	mul	%r8			C climb = high (q * d)
+	mulq	%r8			C climb = high (q * d)
 
-	lea	(%rcx,%rdx), %rax	C climb+cbit
-	FUNC_EXIT()
+	leaq	(%rcx,%rdx), %rax	C climb+cbit
 	ret
 
 EPILOGUE(mpn_modexact_1c_odd)
diff --git a/gmp/mpn/x86_64/mul_1.asm b/gmp/mpn/x86_64/mul_1.asm
index b032afc9dd..da96a14c76 100644
--- a/gmp/mpn/x86_64/mul_1.asm
+++ b/gmp/mpn/x86_64/mul_1.asm
@@ -1,97 +1,64 @@
 dnl  AMD64 mpn_mul_1.
 
-dnl  Copyright 2003-2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 2.5
-C AMD bd1	 5.0
-C AMD bobcat	 5.5
-C Intel P4	12.3
-C Intel core2	 4.0
-C Intel NHM	 3.75
-C Intel SBR	 2.95
-C Intel atom	19.8
-C VIA nano	 4.25
-
-C The loop of this code is the result of running a code generation and
+C K8,K9:	 2.5
+C K10:		 2.5
+C P4:		 12.3
+C P6-15:	 4.0
+C P6-15 (Core2): 4.0
+C P6-28 (Atom):	19.8
+
+C The inner loop of this code is the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
 
-C TODO
-C  * The loop is great, but the prologue and epilogue code was quickly written.
-C    Tune it!
+C TODO:
+C  * The inner loop is great, but the prologue and epilogue code was
+C    quickly written.  Tune it!
 
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`vl',      `%rcx')   C r9
+C INPUT PARAMETERS
+define(`rp',	 `%rdi')
+define(`up',	 `%rsi')
+define(`n_param',`%rdx')
+define(`vl',	 `%rcx')
 
-define(`n',       `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`vl', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``r11'')	') dnl
+define(`n',	`%r11')
 
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_mul_1c)
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
 	push	%rbx
-IFSTD(`	mov	%r8, %r10')
-IFDOS(`	mov	64(%rsp), %r10')	C 40 + 3*8  (3 push insns)
+	mov	%r8, %r10
 	jmp	L(common)
 EPILOGUE()
 
 PROLOGUE(mpn_mul_1)
-IFDOS(``push	%rsi		'')
-IFDOS(``push	%rdi		'')
-IFDOS(``mov	%rdx, %rsi	'')
-
 	push	%rbx
 	xor	%r10, %r10
 L(common):
 	mov	(up), %rax		C read first u limb early
-IFSTD(`	mov	n_param, %rbx   ')	C move away n from rdx, mul uses it
-IFDOS(`	mov	n, %rbx         ')
+	mov	n_param, %rbx		C move away n from rdx, mul uses it
 	mul	vl
-IFSTD(`	mov	%rbx, n         ')
+	mov	%rbx, %r11
 
 	add	%r10, %rax
 	adc	$0, %rdx
@@ -146,7 +113,7 @@ L(top):	mov	%r10, (rp,n,8)
 	add	%rax, %r9
 	mov	(up,n,8), %rax
 	adc	%rdx, %r8
-	mov	$0, R32(%r10)
+	mov	$0, %r10d
 L(L1):	mul	vl
 	mov	%r9, 8(rp,n,8)
 	add	%rax, %r8
@@ -159,11 +126,11 @@ L(L0):	mov	8(up,n,8), %rax
 L(L3):	mov	16(up,n,8), %rax
 	mul	vl
 	mov	%rbx, 24(rp,n,8)
-	mov	$0, R32(%r8)		C zero
-	mov	%r8, %rbx		C zero
+	mov	$0, %r8d		# zero
+	mov	%r8, %rbx		# zero
 	add	%rax, %r10
 	mov	24(up,n,8), %rax
-	mov	%r8, %r9		C zero
+	mov	%r8, %r9		# zero
 	adc	%rdx, %r9
 L(L2):	mul	vl
 	add	$4, n
@@ -177,7 +144,5 @@ L(L2):	mul	vl
 L(ret):	mov	%rdx, %rax
 
 	pop	%rbx
-IFDOS(``pop	%rdi		'')
-IFDOS(``pop	%rsi		'')
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/mul_2.asm b/gmp/mpn/x86_64/mul_2.asm
index f408c52250..a8ad00069f 100644
--- a/gmp/mpn/x86_64/mul_2.asm
+++ b/gmp/mpn/x86_64/mul_2.asm
@@ -1,44 +1,30 @@
 dnl  AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and
 dnl  store the result in a third limb vector.
 
-dnl  Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.275
-C AMD K10	 2.275
-C Intel P4	13.5
-C Intel core2	 4.0
-C Intel corei	 3.8
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 2.275
+C K10:		 2.275
+C P4:		 ?
+C P6-15:	 4.0
 
 C This code is the result of running a code generation and optimization tool
 C suite written by David Harvey and Torbjorn Granlund.
@@ -64,14 +50,10 @@ define(`w2', `%rbp')
 define(`w3', `%r10')
 define(`n',  `%r11')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_mul_2)
-	FUNC_ENTRY(4)
 	push	%rbx
 	push	%rbp
 
@@ -187,6 +169,5 @@ L(m22):	mul	v1
 
 	pop	%rbp
 	pop	%rbx
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/k8/mul_basecase.asm b/gmp/mpn/x86_64/mul_basecase.asm
index ca2efb9b2f..09ec4d14ae 100644
--- a/gmp/mpn/x86_64/k8/mul_basecase.asm
+++ b/gmp/mpn/x86_64/mul_basecase.asm
@@ -2,44 +2,30 @@ dnl  AMD64 mpn_mul_basecase.
 
 dnl  Contributed to the GNU project by Torbjorn Granlund and David Harvey.
 
-dnl  Copyright 2008, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 C	     cycles/limb
-C AMD K8,K9	 2.375
-C AMD K10	 2.375
-C Intel P4	15-16
-C Intel core2	 4.45
-C Intel corei	 4.35
-C Intel atom	 ?
-C VIA nano	 4.5
+C K8,K9:	 2.375
+C K10:		 2.375
+C P4:		 ?
+C P6-15:	 4.45
 
 C The inner loops of this code are the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
@@ -47,6 +33,7 @@ C optimization tool suite written by David Harvey and Torbjorn Granlund.
 C TODO
 C  * Use fewer registers.  (how??? I can't see it -- david)
 C  * Avoid some "mov $0,r" and instead use "xor r,r".
+C  * Don't align loops to a 32-byte boundaries.
 C  * Can the top of each L(addmul_outer_n) prologue be folded into the
 C    mul_1/mul_2 prologues, saving a LEA (%rip)? It would slow down the
 C    case where vn = 1 or 2; is it worth it?
@@ -70,15 +57,10 @@ define(`n',  `%r11')
 define(`outer_addr', `%r14')
 define(`un',  `%r13')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_mul_basecase)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
 	push	%rbx
 	push	%rbp
 	push	%r12
@@ -111,13 +93,7 @@ L(mul_1):
 	cmp	$2, R32(w0)
 	jc	L(mul_1_prologue_1)
 	jz	L(mul_1_prologue_2)
-
-L(mul_1_prologue_3):
-	add	$-1, n
-	lea	L(addmul_outer_3)(%rip), outer_addr
-	mov	%rax, w3
-	mov	%rdx, w0
-	jmp	L(mul_1_entry_3)
+	jmp	L(mul_1_prologue_3)
 
 L(mul_1_prologue_0):
 	mov	%rax, w2
@@ -149,6 +125,13 @@ L(mul_1_prologue_2):
 	xor	R32(w3), R32(w3)
 	jmp	L(mul_1_entry_2)
 
+L(mul_1_prologue_3):
+	add	$-1, n
+	lea	L(addmul_outer_3)(%rip), outer_addr
+	mov	%rax, w3
+	mov	%rdx, w0
+	jmp	L(mul_1_entry_3)
+
 
 	C this loop is 10 c/loop = 2.5 c/l on K8, for all up/rp alignments
 
@@ -309,7 +292,7 @@ L(mul_2_entry_1):
 	mov	w3, -32(rp,n,8)
 	js	L(mul_2_top)
 
-	mov	-32(up,n,8), %rax	C FIXME: n is constant
+	mov	-32(up,n,8), %rax
 	mul	v1
 	add	%rax, w0
 	mov	w0, (rp)
@@ -463,7 +446,6 @@ L(ret):	pop	%r15
 	pop	%r12
 	pop	%rbp
 	pop	%rbx
-	FUNC_EXIT()
 	ret
 
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/mulx/adx/addmul_1.asm b/gmp/mpn/x86_64/mulx/adx/addmul_1.asm
deleted file mode 100644
index ea607899a4..0000000000
--- a/gmp/mpn/x86_64/mulx/adx/addmul_1.asm
+++ /dev/null
@@ -1,149 +0,0 @@
-dnl  AMD64 mpn_addmul_1 for CPUs with mulx and adx.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 -
-C AMD K10	 -
-C AMD bd1	 -
-C AMD bobcat	 -
-C Intel P4	 -
-C Intel PNR	 -
-C Intel NHM	 -
-C Intel SBR	 -
-C Intel HWL	 -
-C Intel BWL	 ?
-C Intel atom	 -
-C VIA nano	 -
-
-define(`rp',      `%rdi')	dnl rcx
-define(`up',      `%rsi')	dnl rdx
-define(`n_param', `%rdx')	dnl r8
-define(`v0_param',`%rcx')	dnl r9
-
-define(`n',       `%rcx')	dnl
-define(`v0',      `%rdx')	dnl
-
-C Testing mechanism for running this on older AMD64 processors
-ifelse(FAKE_MULXADX,1,`
-  include(CONFIG_TOP_SRCDIR`/mpn/x86_64/missing-call.m4')
-',`
-  define(`adox',	``adox'	$1, $2')
-  define(`adcx',	``adcx'	$1, $2')
-  define(`mulx',	``mulx'	$1, $2, $3')
-')
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_addmul_1)
-	mov	(up), %r8
-
-	push	%rbx
-	push	%r12
-	push	%r13
-
-	lea	(up,n_param,8), up
-	lea	-16(rp,n_param,8), rp
-	mov	R32(n_param), R32(%rax)
-	xchg	v0_param, v0		C FIXME: is this insn fast?
-
-	neg	n
-
-	and	$3, R8(%rax)
-	jz	L(b0)
-	cmp	$2, R8(%rax)
-	jl	L(b1)
-	jz	L(b2)
-
-L(b3):	mulx(	(up,n,8), %r11, %r10)
-	mulx(	8(up,n,8), %r13, %r12)
-	mulx(	16(up,n,8), %rbx, %rax)
-	dec	n
-	jmp	L(lo3)
-
-L(b0):	mulx(	(up,n,8), %r9, %r8)
-	mulx(	8(up,n,8), %r11, %r10)
-	mulx(	16(up,n,8), %r13, %r12)
-	jmp	L(lo0)
-
-L(b2):	mulx(	(up,n,8), %r13, %r12)
-	mulx(	8(up,n,8), %rbx, %rax)
-	lea	2(n), n
-	jrcxz	L(wd2)
-L(gt2):	mulx(	(up,n,8), %r9, %r8)
-	jmp	L(lo2)
-
-L(b1):	and	R8(%rax), R8(%rax)
-	mulx(	(up,n,8), %rbx, %rax)
-	lea	1(n), n
-	jrcxz	L(wd1)
-	mulx(	(up,n,8), %r9, %r8)
-	mulx(	8(up,n,8), %r11, %r10)
-	jmp	L(lo1)
-
-L(end):	adcx(	%r10, %r13)
-	mov	%r11, -8(rp)
-L(wd2):	adox(	(rp), %r13)
-	adcx(	%r12, %rbx)
-	mov	%r13, (rp)
-L(wd1):	adox(	8(rp), %rbx)
-	adcx(	%rcx, %rax)
-	adox(	%rcx, %rax)
-	mov	%rbx, 8(rp)
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-	ret
-
-L(top):	jrcxz	L(end)
-	mulx(	(up,n,8), %r9, %r8)
-	adcx(	%r10, %r13)
-	mov	%r11, -8(rp,n,8)
-L(lo2):	adox(	(rp,n,8), %r13)
-	mulx(	8(up,n,8), %r11, %r10)
-	adcx(	%r12, %rbx)
-	mov	%r13, (rp,n,8)
-L(lo1):	adox(	8(rp,n,8), %rbx)
-	mulx(	16(up,n,8), %r13, %r12)
-	adcx(	%rax, %r9)
-	mov	%rbx, 8(rp,n,8)
-L(lo0):	adox(	16(rp,n,8), %r9)
-	mulx(	24(up,n,8), %rbx, %rax)
-	adcx(	%r8, %r11)
-	mov	%r9, 16(rp,n,8)
-L(lo3):	adox(	24(rp,n,8), %r11)
-	lea	4(n), n
-	jmp	L(top)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/mulx/aorsmul_1.asm b/gmp/mpn/x86_64/mulx/aorsmul_1.asm
deleted file mode 100644
index 285c07335e..0000000000
--- a/gmp/mpn/x86_64/mulx/aorsmul_1.asm
+++ /dev/null
@@ -1,161 +0,0 @@
-dnl  AMD64 mpn_addmul_1 and mpn_submul_1 for CPUs with mulx.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 -
-C AMD K10	 -
-C AMD bd1	 -
-C AMD bd2	 ?
-C AMD bobcat	 -
-C AMD jaguar	 ?
-C Intel P4	 -
-C Intel PNR	 -
-C Intel NHM	 -
-C Intel SBR	 -
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 -
-C VIA nano	 -
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0_param',`%rcx')   C r9
-
-define(`n',       `%rcx')
-define(`v0',      `%rdx')
-
-ifdef(`OPERATION_addmul_1',`
-      define(`ADDSUB',        `add')
-      define(`ADCSBB',        `adc')
-      define(`func',  `mpn_addmul_1')
-')
-ifdef(`OPERATION_submul_1',`
-      define(`ADDSUB',        `sub')
-      define(`ADCSBB',        `sbb')
-      define(`func',  `mpn_submul_1')
-')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
-
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`vl', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``r11'')	') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	mov	(up), %r8
-
-	push	%rbx
-	push	%r12
-	push	%r13
-
-	lea	(up,n_param,8), up
-	lea	-32(rp,n_param,8), rp
-	mov	R32(n_param), R32(%rax)
-	xchg	v0_param, v0		C FIXME: is this insn fast?
-
-	neg	n
-
-	and	$3, R8(%rax)
-	jz	L(b0)
-	cmp	$2, R8(%rax)
-	jz	L(b2)
-	jg	L(b3)
-
-L(b1):	mulx	%r8, %rbx, %rax
-	sub	$-1, n
-	jz	L(wd1)
-	mulx	(up,n,8), %r9, %r8
-	mulx	8(up,n,8), %r11, %r10
-	test	R32(%rax), R32(%rax)		C clear cy
-	jmp	L(lo1)
-
-L(b0):	mulx	%r8, %r9, %r8
-	mulx	8(up,n,8), %r11, %r10
-	mulx	16(up,n,8), %r13, %r12
-	xor	R32(%rax), R32(%rax)
-	jmp	L(lo0)
-
-L(b3):	mulx	%r8, %r11, %r10
-	mulx	8(up,n,8), %r13, %r12
-	mulx	16(up,n,8), %rbx, %rax
-	add	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax
-	sub	$-3, n
-	jz	L(wd3)
-	test	R32(%rax), R32(%rax)		C clear cy
-	jmp	L(lo3)
-
-L(b2):	mulx	%r8, %r13, %r12
-	mulx	8(up,n,8), %rbx, %rax
-	add	%r12, %rbx
-	adc	$0, %rax
-	sub	$-2, n
-	jz	L(wd2)
-	mulx	(up,n,8), %r9, %r8
-	test	R32(%rax), R32(%rax)		C clear cy
-	jmp	L(lo2)
-
-L(top):	ADDSUB	%r9, (rp,n,8)
-L(lo3):	mulx	(up,n,8), %r9, %r8
-	ADCSBB	%r11, 8(rp,n,8)
-L(lo2):	mulx	8(up,n,8), %r11, %r10
-	ADCSBB	%r13, 16(rp,n,8)
-L(lo1):	mulx	16(up,n,8), %r13, %r12
-	ADCSBB	%rbx, 24(rp,n,8)
-	adc	%rax, %r9
-L(lo0):	mulx	24(up,n,8), %rbx, %rax
-	adc	%r8, %r11
-	adc	%r10, %r13
-	adc	%r12, %rbx
-	adc	$0, %rax		C rax = carry limb
-	add	$4, n
-	js	L(top)
-
-L(end):	ADDSUB	%r9, (rp)
-L(wd3):	ADCSBB	%r11, 8(rp)
-L(wd2):	ADCSBB	%r13, 16(rp)
-L(wd1):	ADCSBB	%rbx, 24(rp)
-	adc	n, %rax
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/mulx/mul_1.asm b/gmp/mpn/x86_64/mulx/mul_1.asm
deleted file mode 100644
index 34a044dcdc..0000000000
--- a/gmp/mpn/x86_64/mulx/mul_1.asm
+++ /dev/null
@@ -1,154 +0,0 @@
-dnl  AMD64 mpn_mul_1 for CPUs with mulx.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 -
-C AMD K10	 -
-C AMD bd1	 -
-C AMD bd2	 ?
-C AMD bobcat	 -
-C AMD jaguar	 ?
-C Intel P4	 -
-C Intel PNR	 -
-C Intel NHM	 -
-C Intel SBR	 -
-C Intel HWL	 ?
-C Intel BWL	 ?
-C Intel atom	 -
-C VIA nano	 -
-
-define(`rp',      `%rdi')   C rcx
-define(`up',      `%rsi')   C rdx
-define(`n_param', `%rdx')   C r8
-define(`v0_param',`%rcx')   C r9
-
-define(`n',       `%rcx')
-define(`v0',      `%rdx')
-
-IFDOS(`	define(`up', ``%rsi'')	') dnl
-IFDOS(`	define(`rp', ``%rcx'')	') dnl
-IFDOS(`	define(`v0', ``%r9'')	') dnl
-IFDOS(`	define(`r9', ``rdi'')	') dnl
-IFDOS(`	define(`n',  ``%r8'')	') dnl
-IFDOS(`	define(`r8', ``r11'')	') dnl
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_mul_1c)
-	jmp	L(ent)
-EPILOGUE()
-PROLOGUE(mpn_mul_1)
-	xor	R32(%r8), R32(%r8)	C carry-in limb
-L(ent):	mov	(up), %r9
-
-	push	%rbx
-	push	%r12
-	push	%r13
-
-	lea	(up,n_param,8), up
-	lea	-32(rp,n_param,8), rp
-	mov	R32(n_param), R32(%rax)
-	xchg	v0_param, v0		C FIXME: is this insn fast?
-
-	neg	n
-
-	and	$3, R8(%rax)
-	jz	L(b0)
-	cmp	$2, R8(%rax)
-	jz	L(b2)
-	jg	L(b3)
-
-L(b1):	mov	%r8, %r12
-	mulx	%r9, %rbx, %rax
-	sub	$-1, n
-	jz	L(wd1)
-	mulx	(up,n,8), %r9, %r8
-	mulx	8(up,n,8), %r11, %r10
-	add	%r12, %rbx
-	jmp	L(lo1)
-
-L(b3):	mulx	%r9, %r11, %r10
-	mulx	8(up,n,8), %r13, %r12
-	mulx	16(up,n,8), %rbx, %rax
-	sub	$-3, n
-	jz	L(wd3)
-	add	%r8, %r11
-	jmp	L(lo3)
-
-L(b2):	mov	%r8, %r10		C carry-in limb
-	mulx	%r9, %r13, %r12
-	mulx	8(up,n,8), %rbx, %rax
-	sub	$-2, n
-	jz	L(wd2)
-	mulx	(up,n,8), %r9, %r8
-	add	%r10, %r13
-	jmp	L(lo2)
-
-L(b0):	mov	%r8, %rax		C carry-in limb
-	mulx	%r9, %r9, %r8
-	mulx	8(up,n,8), %r11, %r10
-	mulx	16(up,n,8), %r13, %r12
-	add	%rax, %r9
-	jmp	L(lo0)
-
-L(top):	jrcxz	L(end)
-	adc	%r8, %r11
-	mov	%r9, (rp,n,8)
-L(lo3):	mulx	(up,n,8), %r9, %r8
-	adc	%r10, %r13
-	mov	%r11, 8(rp,n,8)
-L(lo2):	mulx	8(up,n,8), %r11, %r10
-	adc	%r12, %rbx
-	mov	%r13, 16(rp,n,8)
-L(lo1):	mulx	16(up,n,8), %r13, %r12
-	adc	%rax, %r9
-	mov	%rbx, 24(rp,n,8)
-L(lo0):	mulx	24(up,n,8), %rbx, %rax
-	lea	4(n), n
-	jmp	L(top)
-
-L(end):	mov	%r9, (rp)
-L(wd3):	adc	%r8, %r11
-	mov	%r11, 8(rp)
-L(wd2):	adc	%r10, %r13
-	mov	%r13, 16(rp)
-L(wd1):	adc	%r12, %rbx
-	adc	n, %rax
-	mov	%rbx, 24(rp)
-
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-	ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/nano/copyd.asm b/gmp/mpn/x86_64/nano/copyd.asm
deleted file mode 100644
index f0dc54a55e..0000000000
--- a/gmp/mpn/x86_64/nano/copyd.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyd optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyd)
-include_mpn(`x86_64/fastsse/copyd-palignr.asm')
diff --git a/gmp/mpn/x86_64/nano/copyi.asm b/gmp/mpn/x86_64/nano/copyi.asm
deleted file mode 100644
index 9c26e00c52..0000000000
--- a/gmp/mpn/x86_64/nano/copyi.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_copyi optimised for Intel Sandy Bridge.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_copyi)
-include_mpn(`x86_64/fastsse/copyi-palignr.asm')
diff --git a/gmp/mpn/x86_64/nano/dive_1.asm b/gmp/mpn/x86_64/nano/dive_1.asm
deleted file mode 100644
index e9a07631c4..0000000000
--- a/gmp/mpn/x86_64/nano/dive_1.asm
+++ /dev/null
@@ -1,166 +0,0 @@
-dnl  AMD64 mpn_divexact_1 -- mpn by limb exact division.
-
-dnl  Copyright 2001, 2002, 2004-2006, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C	       norm	       unorm
-C AMD K8,K9	11		11
-C AMD K10	11		11
-C Intel P4	 ?
-C Intel core2	13.5		13.25
-C Intel corei	14.25
-C Intel atom	34		36
-C VIA nano	19.25		19.25
-
-
-C INPUT PARAMETERS
-C rp		rdi
-C up		rsi
-C n		rdx
-C divisor	rcx
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_divexact_1)
-	FUNC_ENTRY(4)
-	push	%rbx
-
-	mov	%rcx, %rax
-	xor	R32(%rcx), R32(%rcx)	C shift count
-	mov	%rdx, %r8
-
-	bt	$0, R32(%rax)
-	jc	L(odd)			C skip bsfq unless divisor is even
-	bsf	%rax, %rcx
-	shr	R8(%rcx), %rax
-L(odd):	mov	%rax, %rbx
-	shr	R32(%rax)
-	and	$127, R32(%rax)		C d/2, 7 bits
-
-	LEA(	binvert_limb_table, %rdx)
-
-	movzbl	(%rdx,%rax), R32(%rax)	C inv 8 bits
-
-	mov	%rbx, %r11		C d without twos
-
-	lea	(%rax,%rax), R32(%rdx)	C 2*inv
-	imul	R32(%rax), R32(%rax)	C inv*inv
-	imul	R32(%rbx), R32(%rax)	C inv*inv*d
-	sub	R32(%rax), R32(%rdx)	C inv = 2*inv - inv*inv*d, 16 bits
-
-	lea	(%rdx,%rdx), R32(%rax)	C 2*inv
-	imul	R32(%rdx), R32(%rdx)	C inv*inv
-	imul	R32(%rbx), R32(%rdx)	C inv*inv*d
-	sub	R32(%rdx), R32(%rax)	C inv = 2*inv - inv*inv*d, 32 bits
-
-	lea	(%rax,%rax), %r10	C 2*inv
-	imul	%rax, %rax		C inv*inv
-	imul	%rbx, %rax		C inv*inv*d
-	sub	%rax, %r10		C inv = 2*inv - inv*inv*d, 64 bits
-
-	lea	(%rsi,%r8,8), %rsi	C up end
-	lea	-8(%rdi,%r8,8), %rdi	C rp end
-	neg	%r8			C -n
-
-	mov	(%rsi,%r8,8), %rax	C up[0]
-
-	inc	%r8
-	jz	L(one)
-
-	test	R32(%rcx), R32(%rcx)
-	jnz	L(unorm)		C branch if count != 0
-	xor	R32(%rbx), R32(%rbx)
-	jmp	L(nent)
-
-	ALIGN(8)
-L(ntop):mul	%r11			C carry limb in rdx	0 10
-	mov	-8(%rsi,%r8,8), %rax	C
-	sub	%rbx, %rax		C apply carry bit
-	setc	%bl			C
-	sub	%rdx, %rax		C apply carry limb	5
-	adc	$0, %rbx		C			6
-L(nent):imul	%r10, %rax		C			6
-	mov	%rax, (%rdi,%r8,8)	C
-	inc	%r8			C
-	jnz	L(ntop)
-
-	mov	-8(%rsi), %r9		C up high limb
-	jmp	L(com)
-
-L(unorm):
-	mov	(%rsi,%r8,8), %r9	C up[1]
-	shr	R8(%rcx), %rax		C
-	neg	R32(%rcx)
-	shl	R8(%rcx), %r9		C
-	neg	R32(%rcx)
-	or	%r9, %rax
-	xor	R32(%rbx), R32(%rbx)
-	jmp	L(uent)
-
-	ALIGN(8)
-L(utop):mul	%r11			C carry limb in rdx	0 10
-	mov	(%rsi,%r8,8), %rax	C
-	shl	R8(%rcx), %rax		C
-	neg	R32(%rcx)
-	or	%r9, %rax
-	sub	%rbx, %rax		C apply carry bit
-	setc	%bl			C
-	sub	%rdx, %rax		C apply carry limb	5
-	adc	$0, %rbx		C			6
-L(uent):imul	%r10, %rax		C			6
-	mov	(%rsi,%r8,8), %r9	C
-	shr	R8(%rcx), %r9		C
-	neg	R32(%rcx)
-	mov	%rax, (%rdi,%r8,8)	C
-	inc	%r8			C
-	jnz	L(utop)
-
-L(com):	mul	%r11			C carry limb in rdx
-	sub	%rbx, %r9		C apply carry bit
-	sub	%rdx, %r9		C apply carry limb
-	imul	%r10, %r9
-	mov	%r9, (%rdi)
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-
-L(one):	shr	R8(%rcx), %rax
-	imul	%r10, %rax
-	mov	%rax, (%rdi)
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/nano/gcd_1.asm b/gmp/mpn/x86_64/nano/gcd_1.asm
deleted file mode 100644
index 3d8e5c7ab1..0000000000
--- a/gmp/mpn/x86_64/nano/gcd_1.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  AMD64 mpn_gcd_1.
-
-dnl  Copyright 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_gcd_1)
-include_mpn(`x86_64/core2/gcd_1.asm')
diff --git a/gmp/mpn/x86_64/nano/gmp-mparam.h b/gmp/mpn/x86_64/nano/gmp-mparam.h
deleted file mode 100644
index fde69dbb7f..0000000000
--- a/gmp/mpn/x86_64/nano/gmp-mparam.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/* VIA Nano gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2010, 2012, 2014 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
-
-#define SHLD_SLOW 1
-#define SHRD_SLOW 1
-
-/* 1600 MHz Nano 2xxx */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.2 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        18
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        20
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD      8
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           22
-
-#define MUL_TOOM22_THRESHOLD                27
-#define MUL_TOOM33_THRESHOLD                38
-#define MUL_TOOM44_THRESHOLD               324
-#define MUL_TOOM6H_THRESHOLD               450
-#define MUL_TOOM8H_THRESHOLD               632
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD     207
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD     211
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD     219
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     315
-
-#define SQR_BASECASE_THRESHOLD              10
-#define SQR_TOOM2_THRESHOLD                 52
-#define SQR_TOOM3_THRESHOLD                 73
-#define SQR_TOOM4_THRESHOLD                387
-#define SQR_TOOM6_THRESHOLD                662
-#define SQR_TOOM8_THRESHOLD                781
-
-#define MULMID_TOOM42_THRESHOLD             32
-
-#define MULMOD_BNM1_THRESHOLD               14
-#define SQRMOD_BNM1_THRESHOLD               15
-
-#define MUL_FFT_MODF_THRESHOLD             376  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    376, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     23, 7}, {     12, 6}, {     25, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     19, 7}, {     39, 8}, \
-    {     21, 9}, {     11, 8}, {     27, 9}, {     15, 8}, \
-    {     35, 9}, {     19, 8}, {     41, 9}, {     23, 8}, \
-    {     49, 9}, {     27,10}, {     15, 9}, {     43,10}, \
-    {     23, 9}, {     55,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     83,10}, {     47, 9}, \
-    {     95,10}, {     79,11}, {     47,10}, {    103,12}, \
-    {     31,11}, {     63,10}, {    143,11}, {     79,10}, \
-    {    159, 9}, {    319,10}, {    175,11}, {     95, 9}, \
-    {    383, 8}, {    767,10}, {    207,11}, {    111,12}, \
-    {     63,11}, {    127,10}, {    255,11}, {    143, 9}, \
-    {    575, 8}, {   1151,10}, {    303,11}, {    159,10}, \
-    {    319, 9}, {    639, 8}, {   1279,10}, {    335,12}, \
-    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
-    {    207,10}, {    415, 9}, {    831, 8}, {   1663,10}, \
-    {    447,13}, {     63,12}, {    127,11}, {    255,10}, \
-    {    511, 9}, {   1023,11}, {    271,10}, {    543, 9}, \
-    {   1087,10}, {    575, 9}, {   1215,12}, {    159,11}, \
-    {    319,10}, {    639, 9}, {   1279,11}, {    335,10}, \
-    {    671, 9}, {   1343,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    383,10}, {    767, 9}, \
-    {   1535,10}, {    831, 9}, {   1663,12}, {    223,11}, \
-    {    447,10}, {    895,13}, {    127,12}, {    255,11}, \
-    {    511,10}, {   1023,11}, {    543,10}, {   1087,12}, \
-    {    287,11}, {    575,10}, {   1151,11}, {    607,10}, \
-    {   1215,12}, {    319,11}, {    639,10}, {   1279,11}, \
-    {    671,10}, {   1343,12}, {    351,11}, {    703,10}, \
-    {   1407,13}, {    191,12}, {    383,11}, {    767,10}, \
-    {   1535,12}, {    415,11}, {    831,10}, {   1663,12}, \
-    {    447,11}, {    895,10}, {   1791,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,12}, {    543,11}, \
-    {   1087,12}, {    575,11}, {   1151,12}, {    607,11}, \
-    {   1215,13}, {    319,12}, {    639,11}, {   1279,12}, \
-    {    671,11}, {   1343,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
-    {   1663,13}, {    447,12}, {    895,11}, {   1791,13}, \
-    {    511,12}, {   1023,11}, {   2047,12}, {   1087,13}, \
-    {    575,12}, {   1151,11}, {   2303,12}, {   1215,13}, \
-    {    639,12}, {   1279,11}, {   2559,12}, {   1343,13}, \
-    {    703,12}, {   1407,14}, {    383,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    895,12}, \
-    {   1791,13}, {    959,14}, {    511,13}, {   1023,12}, \
-    {   2047,13}, {   1087,12}, {   2175,13}, {   1151,12}, \
-    {   2303,13}, {   1215,14}, {    639,13}, {   1279,12}, \
-    {   2559,13}, {   1407,12}, {   2815,13}, {   1471,14}, \
-    {    767,13}, {   1535,12}, {   3071,13}, {   1663,14}, \
-    {    895,13}, {   1791,12}, {   3583,13}, {   1919,15}, \
-    {    511,14}, {   1023,13}, {   2047,12}, {   4095,13}, \
-    {   2175,14}, {   1151,13}, {   2303,12}, {   4607,13}, \
-    {   2431,14}, {   1279,13}, {   2559,12}, {   5119,14}, \
-    {   1407,13}, {   2815,12}, {   5631,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 224
-#define MUL_FFT_THRESHOLD                 3520
-
-#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    340, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
-    {     21, 7}, {     11, 6}, {     23, 7}, {     21, 8}, \
-    {     11, 7}, {     24, 8}, {     13, 7}, {     27, 8}, \
-    {     15, 7}, {     31, 8}, {     21, 9}, {     11, 8}, \
-    {     27, 9}, {     15, 8}, {     33, 9}, {     19, 8}, \
-    {     41, 9}, {     23, 8}, {     47, 9}, {     27,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     51,11}, \
-    {     15,10}, {     31, 9}, {     67,10}, {     39, 9}, \
-    {     79,10}, {     47, 9}, {     95,10}, {     55,11}, \
-    {     31,10}, {     63, 9}, {    127,10}, {     71, 9}, \
-    {    143,10}, {     79,11}, {     47,10}, {     95, 9}, \
-    {    191,10}, {    103,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    135, 7}, {   1087, 9}, \
-    {    287,11}, {     79, 9}, {    319, 8}, {    639,10}, \
-    {    167,11}, {     95,10}, {    191, 9}, {    383, 8}, \
-    {    767,11}, {    111,12}, {     63,11}, {    127,10}, \
-    {    255, 9}, {    511, 8}, {   1023,10}, {    271, 9}, \
-    {    543, 8}, {   1087,11}, {    143, 9}, {    575, 8}, \
-    {   1151,10}, {    303, 9}, {    639, 8}, {   1279,10}, \
-    {    335, 9}, {    671,10}, {    351, 9}, {    703,12}, \
-    {     95,11}, {    191,10}, {    383, 9}, {    767,11}, \
-    {    207,10}, {    415, 9}, {    831,13}, {     63,12}, \
-    {    127,11}, {    255,10}, {    511, 9}, {   1023,11}, \
-    {    271,10}, {    543, 9}, {   1087,10}, {    575, 9}, \
-    {   1151,11}, {    303,10}, {    607, 9}, {   1215,12}, \
-    {    159,11}, {    319,10}, {    639, 9}, {   1279,10}, \
-    {    671, 9}, {   1343,11}, {    351,10}, {    703, 9}, \
-    {   1407,12}, {    191,11}, {    383,10}, {    767, 9}, \
-    {   1535,11}, {    415,10}, {    831, 9}, {   1663,12}, \
-    {    223,11}, {    447,10}, {    959,13}, {    127,12}, \
-    {    255,11}, {    511,10}, {   1023,11}, {    543,10}, \
-    {   1087,11}, {    575,10}, {   1215,12}, {    319,11}, \
-    {    639,10}, {   1279,11}, {    671,10}, {   1343,12}, \
-    {    351,11}, {    703,10}, {   1407,13}, {    191,12}, \
-    {    383,11}, {    767,10}, {   1535,12}, {    415,11}, \
-    {    831,10}, {   1663,12}, {    447,11}, {    895,10}, \
-    {   1791,12}, {    479,11}, {    959,14}, {    127,12}, \
-    {    511,11}, {   1023,12}, {    543,11}, {   1087,12}, \
-    {    575,11}, {   1151,12}, {    607,11}, {   1215,13}, \
-    {    319,12}, {    639,11}, {   1279,12}, {    671,11}, \
-    {   1343,12}, {    703,11}, {   1407,13}, {    383,12}, \
-    {    767,11}, {   1535,12}, {    831,11}, {   1663,13}, \
-    {    447,12}, {    895,11}, {   1791,12}, {    959,13}, \
-    {    511,12}, {   1023,11}, {   2047,12}, {   1087,13}, \
-    {    575,12}, {   1215,13}, {    639,12}, {   1343,13}, \
-    {    703,12}, {   1407,11}, {   2815,13}, {    767,12}, \
-    {   1535,13}, {    831,12}, {   1663,13}, {    895,12}, \
-    {   1791,13}, {    959,14}, {    511,13}, {   1023,12}, \
-    {   2047,13}, {   1087,12}, {   2175,13}, {   1215,14}, \
-    {    639,13}, {   1279,12}, {   2559,13}, {   1407,12}, \
-    {   2815,14}, {    767,13}, {   1535,12}, {   3071,13}, \
-    {   1663,14}, {    895,13}, {   1791,12}, {   3583,13}, \
-    {   1919,15}, {    511,14}, {   1023,13}, {   2047,12}, \
-    {   4095,13}, {   2175,14}, {   1151,13}, {   2303,12}, \
-    {   4607,14}, {   1279,13}, {   2559,14}, {   1407,13}, \
-    {   2815,15}, {  32768,16}, {  65536,17}, { 131072,18}, \
-    { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
-    {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 230
-#define SQR_FFT_THRESHOLD                 2496
-
-#define MULLO_BASECASE_THRESHOLD            13
-#define MULLO_DC_THRESHOLD                  38
-#define MULLO_MUL_N_THRESHOLD             6633
-
-#define DC_DIV_QR_THRESHOLD                 56
-#define DC_DIVAPPR_Q_THRESHOLD             173
-#define DC_BDIV_QR_THRESHOLD                55
-#define DC_BDIV_Q_THRESHOLD                 96
-
-#define INV_MULMOD_BNM1_THRESHOLD           54
-#define INV_NEWTON_THRESHOLD               202
-#define INV_APPR_THRESHOLD                 166
-
-#define BINV_NEWTON_THRESHOLD              246
-#define REDC_1_TO_REDC_2_THRESHOLD           7
-#define REDC_2_TO_REDC_N_THRESHOLD          85
-
-#define MU_DIV_QR_THRESHOLD               1499
-#define MU_DIVAPPR_Q_THRESHOLD            1652
-#define MUPI_DIV_QR_THRESHOLD               83
-#define MU_BDIV_QR_THRESHOLD              1210
-#define MU_BDIV_Q_THRESHOLD               1499
-
-#define POWM_SEC_TABLE  1,28,129,642,2387
-
-#define MATRIX22_STRASSEN_THRESHOLD         15
-#define HGCD_THRESHOLD                     127
-#define HGCD_APPR_THRESHOLD                214
-#define HGCD_REDUCE_THRESHOLD             2479
-#define GCD_DC_THRESHOLD                   487
-#define GCDEXT_DC_THRESHOLD                505
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               802
-#define SET_STR_PRECOMPUTE_THRESHOLD      2042
-
-#define FAC_DSC_THRESHOLD                 1737
-#define FAC_ODD_THRESHOLD                   44
diff --git a/gmp/mpn/x86_64/nano/popcount.asm b/gmp/mpn/x86_64/nano/popcount.asm
deleted file mode 100644
index fb14dd3d31..0000000000
--- a/gmp/mpn/x86_64/nano/popcount.asm
+++ /dev/null
@@ -1,35 +0,0 @@
-dnl  x86-64 mpn_popcount.
-
-dnl  Copyright 2007, 2011 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_popcount)
-include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86_64/pentium4/aors_n.asm b/gmp/mpn/x86_64/pentium4/aors_n.asm
index 8e6ee1bae6..90f5a219b9 100644
--- a/gmp/mpn/x86_64/pentium4/aors_n.asm
+++ b/gmp/mpn/x86_64/pentium4/aors_n.asm
@@ -1,46 +1,30 @@
 dnl  x86-64 mpn_add_n/mpn_sub_n optimized for Pentium 4.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2.8
-C AMD K10	 2.8
-C Intel P4	 4
-C Intel core2	 3.6-5	(fluctuating)
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
+C K8,K9:	 2.8
+C K10:		 2.8
+C P4:		 4
+C P6-15:	 3.6-5	(fluctuating)
 
 
 C INPUT PARAMETERS
@@ -59,20 +43,19 @@ ifdef(`OPERATION_sub_n', `
 	define(func,	      mpn_sub_n)
 	define(func_nc,	      mpn_sub_nc)')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
 ASM_START()
+
 	TEXT
+	ALIGN(16)
+
+PROLOGUE(func_nc)
+	jmp	L(ent)
+EPILOGUE()
+
 PROLOGUE(func)
-	FUNC_ENTRY(4)
 	xor	%r8, %r8
-IFDOS(`	jmp	L(ent)		')
-EPILOGUE()
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
 L(ent):	push	%rbx
 	push	%r12
 
@@ -191,6 +174,5 @@ L(1):	mov	%r11, 8(rp)
 L(ret):	mov	R32(%rbx), R32(%rax)
 	pop	%r12
 	pop	%rbx
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/pentium4/aorslsh1_n.asm b/gmp/mpn/x86_64/pentium4/aorslsh1_n.asm
index 66937d3267..0723f3e6ca 100644
--- a/gmp/mpn/x86_64/pentium4/aorslsh1_n.asm
+++ b/gmp/mpn/x86_64/pentium4/aorslsh1_n.asm
@@ -1,50 +1,192 @@
 dnl  AMD64 mpn_addlsh1_n, mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1),
-dnl  optimised for Pentium 4.
+dnl  optimized for Pentium 4.
 
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2008 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
-define(LSH, 1)
-define(RSH, 31)			C 31, not 63, since we use 32-bit ops
+C	     cycles/limb
+C K8,K9:	 3.8
+C K10:		 4.8
+C P4:		 5.8
+C P6-15:	 ?
+
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
 
 ifdef(`OPERATION_addlsh1_n', `
-  define(ADDSUB,	add)
-  define(func,		mpn_addlsh1_n)')
+	define(ADDSUB,	      add)
+	define(func,	      mpn_addlsh1_n)')
 ifdef(`OPERATION_sublsh1_n', `
-  define(ADDSUB,	sub)
-  define(func,		mpn_sublsh1_n)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+	define(ADDSUB,	      sub)
+	define(func,	      mpn_sublsh1_n)')
 
 MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-include_mpn(`x86_64/pentium4/aorslshC_n.asm')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(func)
+	push	%rbx
+	push	%r12
+	push	%rbp
+
+	mov	(vp), %r9
+	shl	%r9
+	mov	4(vp), R32(%rbp)
+
+	xor	R32(%rbx), R32(%rbx)
+
+	mov	R32(n), R32(%rax)
+	and	$3, R32(%rax)
+	jne	L(n00)		C n = 0, 4, 8, ...
+
+	mov	(up), %r8
+	mov	8(up), %r10
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r8
+	mov	8(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	setc	R8(%rax)
+	mov	12(vp), R32(%rbp)
+	lea	-16(rp), rp
+	jmp	L(L00)
+
+L(n00):	cmp	$2, R32(%rax)
+	jnc	L(n01)		C n = 1, 5, 9, ...
+	mov	(up), %r11
+	lea	-8(rp), rp
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r11
+	setc	R8(%rbx)
+	dec	n
+	jz	L(1)		C jump for n = 1
+	mov	8(up), %r8
+	mov	8(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	mov	12(vp), R32(%rbp)
+	lea	8(up), up
+	lea	8(vp), vp
+	jmp	L(L01)
+
+L(n01):	jne	L(n10)		C n = 2, 6, 10, ...
+	mov	(up), %r12
+	mov	8(up), %r11
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r12
+	mov	8(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	setc	R8(%rax)
+	mov	12(vp), R32(%rbp)
+	lea	16(up), up
+	lea	16(vp), vp
+	jmp	L(L10)
+
+L(n10):	mov	(up), %r10
+	mov	8(up), %r12
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r10
+	mov	8(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	setc	R8(%rbx)
+	mov	12(vp), R32(%rbp)
+	lea	-24(rp), rp
+	lea	-8(up), up
+	lea	-8(vp), vp
+	jmp	L(L11)
+
+L(c0):	mov	$1, R8(%rbx)
+	jmp	L(rc0)
+L(c1):	mov	$1, R8(%rax)
+	jmp	L(rc1)
+L(c2):	mov	$1, R8(%rbx)
+	jmp	L(rc2)
+
+	ALIGN(16)
+L(top):	mov	(up), %r8	C not on critical path
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r11	C not on critical path
+	mov	(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	setc	R8(%rbx)	C save carry out
+	mov	4(vp), R32(%rbp)
+	mov	%r12, (rp)
+	ADDSUB	%rax, %r11	C apply previous carry out
+	jc	L(c0)		C jump if ripple
+L(rc0):
+L(L01):	mov	8(up), %r10
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r8
+	mov	8(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	setc	R8(%rax)
+	mov	12(vp), R32(%rbp)
+	mov	%r11, 8(rp)
+	ADDSUB	%rbx, %r8
+	jc	L(c1)
+L(rc1):
+L(L00):	mov	16(up), %r12
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r10
+	mov	16(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	setc	R8(%rbx)
+	mov	20(vp), R32(%rbp)
+	mov	%r8, 16(rp)
+	ADDSUB	%rax, %r10
+	jc	L(c2)
+L(rc2):
+L(L11):	mov	24(up), %r11
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r12
+	mov	24(vp), %r9
+	lea	(%rbp,%r9,2), %r9
+	lea	32(up), up
+	lea	32(vp), vp
+	setc	R8(%rax)
+	mov	-4(vp), R32(%rbp)
+	mov	%r10, 24(rp)
+	ADDSUB	%rbx, %r12
+	jc	L(c3)
+L(rc3):	lea	32(rp), rp
+L(L10):	sub	$4, n
+	ja	L(top)
+
+L(end):
+	shr	$31, R32(%rbp)
+	ADDSUB	%r9, %r11
+	setc	R8(%rbx)
+	mov	%r12, (rp)
+	ADDSUB	%rax, %r11
+	jnc	L(1)
+	mov	$1, R8(%rbx)
+L(1):	mov	%r11, 8(rp)
+	lea	(%rbx,%rbp), R32(%rax)
+	pop	%rbp
+	pop	%r12
+	pop	%rbx
+	emms
+	ret
+L(c3):	mov	$1, R8(%rax)
+	jmp	L(rc3)
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86_64/pentium4/aorslsh2_n.asm b/gmp/mpn/x86_64/pentium4/aorslsh2_n.asm
deleted file mode 100644
index 001f0ac5bf..0000000000
--- a/gmp/mpn/x86_64/pentium4/aorslsh2_n.asm
+++ /dev/null
@@ -1,50 +0,0 @@
-dnl  AMD64 mpn_addlsh2_n, mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2),
-dnl  optimised for Pentium 4.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 30)			C 30, not 62, since we use 32-bit ops
-
-ifdef(`OPERATION_addlsh2_n', `
-  define(ADDSUB,	add)
-  define(func,		mpn_addlsh2_n)')
-ifdef(`OPERATION_sublsh2_n', `
-  define(ADDSUB,	sub)
-  define(func,		mpn_sublsh2_n)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
-include_mpn(`x86_64/pentium4/aorslshC_n.asm')
diff --git a/gmp/mpn/x86_64/pentium4/aorslshC_n.asm b/gmp/mpn/x86_64/pentium4/aorslshC_n.asm
deleted file mode 100644
index d03c6a3f30..0000000000
--- a/gmp/mpn/x86_64/pentium4/aorslshC_n.asm
+++ /dev/null
@@ -1,203 +0,0 @@
-dnl  AMD64 mpn_addlshC_n, mpn_sublshC_n -- rp[] = up[] +- (vp[] << C), where
-dnl  C is 1, 2, 3.  Optimized for Pentium 4.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-C	     cycles/limb
-C AMD K8,K9	 3.8
-C AMD K10	 3.8
-C Intel P4	 5.8
-C Intel core2	 4.75
-C Intel corei	 4.75
-C Intel atom	 ?
-C VIA nano	 4.75
-
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n', `%rcx')
-
-define(M, eval(m4_lshift(1,LSH)))
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	push	%rbx
-	push	%r12
-	push	%rbp
-
-	mov	(vp), %r9
-	shl	$LSH, %r9
-	mov	4(vp), R32(%rbp)
-
-	xor	R32(%rbx), R32(%rbx)
-
-	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	jne	L(n00)		C n = 0, 4, 8, ...
-
-	mov	(up), %r8
-	mov	8(up), %r10
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r8
-	mov	8(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	setc	R8(%rax)
-	mov	12(vp), R32(%rbp)
-	lea	-16(rp), rp
-	jmp	L(L00)
-
-L(n00):	cmp	$2, R32(%rax)
-	jnc	L(n01)		C n = 1, 5, 9, ...
-	mov	(up), %r11
-	lea	-8(rp), rp
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r11
-	setc	R8(%rbx)
-	dec	n
-	jz	L(1)		C jump for n = 1
-	mov	8(up), %r8
-	mov	8(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	mov	12(vp), R32(%rbp)
-	lea	8(up), up
-	lea	8(vp), vp
-	jmp	L(L01)
-
-L(n01):	jne	L(n10)		C n = 2, 6, 10, ...
-	mov	(up), %r12
-	mov	8(up), %r11
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r12
-	mov	8(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	setc	R8(%rax)
-	mov	12(vp), R32(%rbp)
-	lea	16(up), up
-	lea	16(vp), vp
-	jmp	L(L10)
-
-L(n10):	mov	(up), %r10
-	mov	8(up), %r12
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r10
-	mov	8(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	setc	R8(%rbx)
-	mov	12(vp), R32(%rbp)
-	lea	-24(rp), rp
-	lea	-8(up), up
-	lea	-8(vp), vp
-	jmp	L(L11)
-
-L(c0):	mov	$1, R8(%rbx)
-	jmp	L(rc0)
-L(c1):	mov	$1, R8(%rax)
-	jmp	L(rc1)
-L(c2):	mov	$1, R8(%rbx)
-	jmp	L(rc2)
-
-	ALIGN(16)
-L(top):	mov	(up), %r8	C not on critical path
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r11	C not on critical path
-	mov	(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	setc	R8(%rbx)	C save carry out
-	mov	4(vp), R32(%rbp)
-	mov	%r12, (rp)
-	ADDSUB	%rax, %r11	C apply previous carry out
-	jc	L(c0)		C jump if ripple
-L(rc0):
-L(L01):	mov	8(up), %r10
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r8
-	mov	8(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	setc	R8(%rax)
-	mov	12(vp), R32(%rbp)
-	mov	%r11, 8(rp)
-	ADDSUB	%rbx, %r8
-	jc	L(c1)
-L(rc1):
-L(L00):	mov	16(up), %r12
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r10
-	mov	16(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	setc	R8(%rbx)
-	mov	20(vp), R32(%rbp)
-	mov	%r8, 16(rp)
-	ADDSUB	%rax, %r10
-	jc	L(c2)
-L(rc2):
-L(L11):	mov	24(up), %r11
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r12
-	mov	24(vp), %r9
-	lea	(%rbp,%r9,M), %r9
-	lea	32(up), up
-	lea	32(vp), vp
-	setc	R8(%rax)
-	mov	-4(vp), R32(%rbp)
-	mov	%r10, 24(rp)
-	ADDSUB	%rbx, %r12
-	jc	L(c3)
-L(rc3):	lea	32(rp), rp
-L(L10):	sub	$4, n
-	ja	L(top)
-
-L(end):
-	shr	$RSH, R32(%rbp)
-	ADDSUB	%r9, %r11
-	setc	R8(%rbx)
-	mov	%r12, (rp)
-	ADDSUB	%rax, %r11
-	jnc	L(1)
-	mov	$1, R8(%rbx)
-L(1):	mov	%r11, 8(rp)
-	lea	(%rbx,%rbp), R32(%rax)
-	pop	%rbp
-	pop	%r12
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-L(c3):	mov	$1, R8(%rax)
-	jmp	L(rc3)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86_64/pentium4/gmp-mparam.h b/gmp/mpn/x86_64/pentium4/gmp-mparam.h
index 2171e230a5..ca9239775b 100644
--- a/gmp/mpn/x86_64/pentium4/gmp-mparam.h
+++ b/gmp/mpn/x86_64/pentium4/gmp-mparam.h
@@ -1,231 +1,79 @@
 /* Pentium 4-64 gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright 1991, 1993, 1994, 2000-2010, 2014 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
 
 /* These routines exists for all x86_64 chips, but they are slower on Pentium4
    than separate add/sub and shift.  Make sure they are not really used.  */
-#undef HAVE_NATIVE_mpn_rsblsh1_n
-#undef HAVE_NATIVE_mpn_rsblsh2_n
-#undef HAVE_NATIVE_mpn_addlsh_n
-#undef HAVE_NATIVE_mpn_rsblsh_n
-
-/* 3400 MHz Pentium4 Nocona / 1024 Kibyte cache */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD                 0  /* always */
-#define MOD_1_UNNORM_THRESHOLD               0  /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD          4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD          3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD        16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD        32
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     11
-#define USE_PREINV_DIVREM_1                  1  /* native */
-#define DIV_QR_1_NORM_THRESHOLD              1
-#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
-#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
-#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD           20
-
-#define MUL_TOOM22_THRESHOLD                12
-#define MUL_TOOM33_THRESHOLD                41
-#define MUL_TOOM44_THRESHOLD               112
-#define MUL_TOOM6H_THRESHOLD               157
-#define MUL_TOOM8H_THRESHOLD               236
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD      91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD      81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD      78
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
-
-#define SQR_BASECASE_THRESHOLD               5
-#define SQR_TOOM2_THRESHOLD                 30
-#define SQR_TOOM3_THRESHOLD                 53
-#define SQR_TOOM4_THRESHOLD                154
-#define SQR_TOOM6_THRESHOLD                197
-#define SQR_TOOM8_THRESHOLD                296
-
-#define MULMID_TOOM42_THRESHOLD             22
-
-#define MULMOD_BNM1_THRESHOLD                9
-#define SQRMOD_BNM1_THRESHOLD                9
-
-#define MUL_FFT_MODF_THRESHOLD             252  /* k = 5 */
-#define MUL_FFT_TABLE3                                      \
-  { {    252, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {      8, 6}, \
-    {     17, 7}, {      9, 6}, {     19, 7}, {     13, 8}, \
-    {      7, 7}, {     17, 8}, {      9, 7}, {     20, 8}, \
-    {     11, 7}, {     23, 8}, {     13, 9}, {      7, 8}, \
-    {     21, 9}, {     11, 8}, {     25,10}, {      7, 9}, \
-    {     15, 8}, {     33, 9}, {     19, 8}, {     39, 9}, \
-    {     23, 8}, {     47,10}, {     15, 9}, {     39,10}, \
-    {     23, 9}, {     51,11}, {     15,10}, {     31, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     47, 9}, \
-    {     95,10}, {     55,11}, {     31,10}, {     63, 9}, \
-    {    127, 8}, {    255,10}, {     71, 9}, {    143, 8}, \
-    {    287,10}, {     79,11}, {     47,10}, {     95, 9}, \
-    {    191,10}, {    103,12}, {     31,11}, {     63,10}, \
-    {    127, 9}, {    255,10}, {    143, 9}, {    287,11}, \
-    {     79,10}, {    159, 9}, {    319,10}, {    175, 9}, \
-    {    351,11}, {     95,10}, {    191, 9}, {    383,10}, \
-    {    223,12}, {     63,11}, {    127,10}, {    255,11}, \
-    {    143,10}, {    287, 9}, {    575, 8}, {   1151,11}, \
-    {    159,10}, {    319,11}, {    175,10}, {    351,12}, \
-    {     95,11}, {    191,10}, {    383,11}, {    207,10}, \
-    {    415,11}, {    223,13}, {     63,12}, {    127,11}, \
-    {    255,10}, {    511,11}, {    287,10}, {    575, 9}, \
-    {   1151,12}, {    159,11}, {    319,10}, {    639,11}, \
-    {    351,10}, {    703,12}, {    191,11}, {    383,10}, \
-    {    767,11}, {    415,12}, {    223,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
-    {    575,10}, {   1151,12}, {    319,11}, {    639,12}, \
-    {    351,11}, {    703,13}, {    191,12}, {    383,11}, \
-    {    767,12}, {    415,11}, {    831,12}, {    447,11}, \
-    {    895,14}, {    127,13}, {    255,12}, {    511,11}, \
-    {   1023,12}, {    543,11}, {   1087,10}, {   2175,12}, \
-    {    575,11}, {   1151,13}, {    319,12}, {    639,11}, \
-    {   1279,12}, {    703,11}, {   1407,10}, {   2815,13}, \
-    {    383,12}, {    767,11}, {   1535,12}, {    831,11}, \
-    {   1663,13}, {    447,12}, {    895,14}, {    255,13}, \
-    {    511,12}, {   1023,11}, {   2047,12}, {   1087,11}, \
-    {   2175,13}, {    575,12}, {   1151,11}, {   2303,12}, \
-    {   1215,11}, {   2431,10}, {   4863,13}, {    639,12}, \
-    {   1279,11}, {   2559,13}, {    703,12}, {   1407,11}, \
-    {   2815,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    895,15}, {    255,14}, \
-    {    511,13}, {   1023,12}, {   2047,13}, {   1087,12}, \
-    {   2175,13}, {   1151,12}, {   2303,13}, {   1215,12}, \
-    {   2431,11}, {   4863,14}, {    639,13}, {   1279,12}, \
-    {   2559,13}, {   1407,12}, {   2815,14}, {    767,13}, \
-    {   1663,14}, {    895,13}, {   1791,12}, {   3583,13}, \
-    {   1919,12}, {   3839,15}, {    511,14}, {   1023,13}, \
-    {   2175,14}, {   1151,13}, {   2303,12}, {   4607,13}, \
-    {   2431,12}, {   4863,14}, {   1279,13}, {   2559,14}, \
-    {   1407,13}, {   2815,15}, {  32768,16}, {  65536,17}, \
-    { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
-    {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 211
-#define MUL_FFT_THRESHOLD                 2240
-
-#define SQR_FFT_MODF_THRESHOLD             212  /* k = 5 */
-#define SQR_FFT_TABLE3                                      \
-  { {    212, 5}, {     11, 6}, {      6, 5}, {     13, 6}, \
-    {     13, 7}, {      7, 6}, {     15, 7}, {      9, 6}, \
-    {     19, 7}, {     13, 8}, {      7, 7}, {     17, 8}, \
-    {      9, 7}, {     20, 8}, {     11, 7}, {     24, 8}, \
-    {     13, 9}, {      7, 8}, {     21, 9}, {     11, 8}, \
-    {     25,10}, {      7, 9}, {     15, 8}, {     33, 9}, \
-    {     19, 8}, {     39, 9}, {     23, 8}, {     47,10}, \
-    {     15, 9}, {     39,10}, {     23, 9}, {     47,11}, \
-    {     15,10}, {     31, 9}, {     63, 8}, {    127, 9}, \
-    {     67,10}, {     39, 9}, {     79,10}, {     55,11}, \
-    {     31,10}, {     63, 9}, {    127, 8}, {    255,10}, \
-    {     71, 9}, {    143, 8}, {    287,10}, {     79, 9}, \
-    {    159,11}, {     47, 9}, {    191,12}, {     31,11}, \
-    {     63,10}, {    127, 9}, {    255,10}, {    143, 9}, \
-    {    287,11}, {     79,10}, {    159, 9}, {    319,10}, \
-    {    175, 9}, {    351,10}, {    191, 9}, {    383,10}, \
-    {    207,11}, {    111,10}, {    223,12}, {     63,11}, \
-    {    127,10}, {    255,11}, {    143,10}, {    287, 9}, \
-    {    575,11}, {    159,10}, {    319,11}, {    175,10}, \
-    {    351,11}, {    191,10}, {    383,11}, {    223,13}, \
-    {     63,12}, {    127,11}, {    255,10}, {    511,11}, \
-    {    287,10}, {    575,12}, {    159,11}, {    351,12}, \
-    {    191,11}, {    383,12}, {    223,11}, {    447,13}, \
-    {    127,12}, {    255,11}, {    511,12}, {    287,11}, \
-    {    575,10}, {   1151,12}, {    319,11}, {    639,12}, \
-    {    351,13}, {    191,12}, {    383,11}, {    767,12}, \
-    {    415,11}, {    831,12}, {    447,14}, {    127,13}, \
-    {    255,12}, {    511,11}, {   1023,10}, {   2047,11}, \
-    {   1087,12}, {    575,11}, {   1151,13}, {    319,12}, \
-    {    639,11}, {   1279,12}, {    703,11}, {   1407,13}, \
-    {    383,12}, {    767,11}, {   1535,12}, {    831,13}, \
-    {    447,14}, {    255,13}, {    511,12}, {   1023,11}, \
-    {   2047,13}, {    575,12}, {   1151,11}, {   2303,12}, \
-    {   1215,13}, {    639,12}, {   1279,11}, {   2559,13}, \
-    {    703,14}, {    383,13}, {    767,12}, {   1535,13}, \
-    {    831,12}, {   1663,13}, {    895,15}, {    255,14}, \
-    {    511,13}, {   1023,12}, {   2047,13}, {   1087,12}, \
-    {   2175,13}, {   1151,12}, {   2303,13}, {   1215,12}, \
-    {   2431,14}, {    639,13}, {   1279,12}, {   2687,13}, \
-    {   1407,12}, {   2815,14}, {    767,13}, {   1663,14}, \
-    {    895,13}, {   1791,12}, {   3583,15}, {    511,14}, \
-    {   1023,13}, {   2175,14}, {   1151,13}, {   2303,12}, \
-    {   4607,13}, {   2431,12}, {   4863,14}, {   1279,13}, \
-    {   2559,14}, {   1407,13}, {   2815,15}, {  32768,16}, \
-    {  65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
-    {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 184
-#define SQR_FFT_THRESHOLD                 1984
-
-#define MULLO_BASECASE_THRESHOLD             0  /* always */
-#define MULLO_DC_THRESHOLD                  33
-#define MULLO_MUL_N_THRESHOLD             4392
-
-#define DC_DIV_QR_THRESHOLD                 35
-#define DC_DIVAPPR_Q_THRESHOLD              68
-#define DC_BDIV_QR_THRESHOLD                32
-#define DC_BDIV_Q_THRESHOLD                 56
-
-#define INV_MULMOD_BNM1_THRESHOLD           22
-#define INV_NEWTON_THRESHOLD               195
-#define INV_APPR_THRESHOLD                 116
-
-#define BINV_NEWTON_THRESHOLD              199
-#define REDC_1_TO_REDC_2_THRESHOLD           4
-#define REDC_2_TO_REDC_N_THRESHOLD          42
-
-#define MU_DIV_QR_THRESHOLD                979
-#define MU_DIVAPPR_Q_THRESHOLD             979
-#define MUPI_DIV_QR_THRESHOLD               91
-#define MU_BDIV_QR_THRESHOLD               855
-#define MU_BDIV_Q_THRESHOLD                942
-
-#define POWM_SEC_TABLE  1,16,175,692,1603
-
-#define MATRIX22_STRASSEN_THRESHOLD         17
-#define HGCD_THRESHOLD                     109
-#define HGCD_APPR_THRESHOLD                119
-#define HGCD_REDUCE_THRESHOLD             1679
-#define GCD_DC_THRESHOLD                   222
-#define GCDEXT_DC_THRESHOLD                238
-#define JACOBI_BASE_METHOD                   4
-
-#define GET_STR_DC_THRESHOLD                12
-#define GET_STR_PRECOMPUTE_THRESHOLD        24
-#define SET_STR_DC_THRESHOLD               537
-#define SET_STR_PRECOMPUTE_THRESHOLD      1430
-
-#define FAC_DSC_THRESHOLD                 1127
-#define FAC_ODD_THRESHOLD                    0  /* always */
+#undef HAVE_NATIVE_mpn_addlsh1_n
+#undef HAVE_NATIVE_mpn_sublsh1_n
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+
+/* 3200 MHz Pentium / 2048 Kibyte cache / socket 775 */
+
+/* Generated by tuneup.c, 2009-01-15, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD          12
+#define MUL_TOOM3_THRESHOLD              91
+#define MUL_TOOM44_THRESHOLD            136
+
+#define SQR_BASECASE_THRESHOLD            0  /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD          28
+#define SQR_TOOM3_THRESHOLD              97
+#define SQR_TOOM4_THRESHOLD             218
+
+#define MULLOW_BASECASE_THRESHOLD         0  /* always */
+#define MULLOW_DC_THRESHOLD              28
+#define MULLOW_MUL_N_THRESHOLD          246
+
+#define DIV_SB_PREINV_THRESHOLD           0  /* always */
+#define DIV_DC_THRESHOLD                 35
+#define POWM_THRESHOLD                   59
+
+#define MATRIX22_STRASSEN_THRESHOLD      25
+#define HGCD_THRESHOLD                  112
+#define GCD_DC_THRESHOLD                258
+#define GCDEXT_DC_THRESHOLD             311
+#define JACOBI_BASE_METHOD                1
+
+#define MOD_1_NORM_THRESHOLD              0  /* always */
+#define MOD_1_UNNORM_THRESHOLD            0  /* always */
+#define MOD_1_1_THRESHOLD                 5
+#define MOD_1_2_THRESHOLD                 7
+#define MOD_1_4_THRESHOLD                28
+#define USE_PREINV_DIVREM_1               1  /* native */
+#define USE_PREINV_MOD_1                  1
+#define DIVEXACT_1_THRESHOLD              0  /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD          0  /* always (native) */
+
+#define GET_STR_DC_THRESHOLD             15
+#define GET_STR_PRECOMPUTE_THRESHOLD     24
+#define SET_STR_DC_THRESHOLD            866
+#define SET_STR_PRECOMPUTE_THRESHOLD   1646
+
+#define MUL_FFT_TABLE  { 240, 416, 1216, 2304, 7168, 20480, 49152, 196608, 786432, 0 }
+#define MUL_FFT_MODF_THRESHOLD          256
+#define MUL_FFT_THRESHOLD              2944
+
+#define SQR_FFT_TABLE  { 208, 480, 1600, 2304, 7168, 20480, 49152, 196608, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD          224
+#define SQR_FFT_THRESHOLD              2688
diff --git a/gmp/mpn/x86_64/pentium4/lshift.asm b/gmp/mpn/x86_64/pentium4/lshift.asm
index d3b521364f..7596d9c5c0 100644
--- a/gmp/mpn/x86_64/pentium4/lshift.asm
+++ b/gmp/mpn/x86_64/pentium4/lshift.asm
@@ -1,44 +1,31 @@
 dnl  x86-64 mpn_lshift optimized for Pentium 4.
 
-dnl  Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 ?
-C Intel P4	 3.29
-C Intel core2	 2.1 (fluctuates, presumably cache related)
-C Intel corei	 ?
-C Intel atom	14.3
-C VIA nano	 ?
+C K8,K9:	 2.5
+C K10:		 ?
+C P4:		 3.29
+C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
+C P6-28 (Atom):	14.3
 
 C INPUT PARAMETERS
 define(`rp',`%rdi')
@@ -46,28 +33,24 @@ define(`up',`%rsi')
 define(`n',`%rdx')
 define(`cnt',`%cl')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(mpn_lshift)
-	FUNC_ENTRY(4)
 	mov	-8(up,n,8), %rax
-	movd	R32(%rcx), %mm4
-	neg	R32(%rcx)		C put rsh count in cl
-	and	$63, R32(%rcx)
-	movd	R32(%rcx), %mm5
+	movd	%ecx, %mm4
+	neg	%ecx			C put rsh count in cl
+	and	$63, %ecx
+	movd	%ecx, %mm5
 
-	lea	1(n), R32(%r8)
+	lea	1(n), %r8d
 
-	shr	R8(%rcx), %rax		C function return value
+	shr	%cl, %rax		C function return value
 
-	and	$3, R32(%r8)
+	and	$3, %r8d
 	je	L(rol)			C jump for n = 3, 7, 11, ...
 
-	dec	R32(%r8)
+	dec	%r8d
 	jne	L(1)
 C	n = 4, 8, 12, ...
 	movq	-8(up,n,8), %mm2
@@ -79,7 +62,7 @@ C	n = 4, 8, 12, ...
 	dec	n
 	jmp	L(rol)
 
-L(1):	dec	R32(%r8)
+L(1):	dec	%r8d
 	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
 C	n = 2, 6, 10, 16, ...
 	movq	-8(up,n,8), %mm2
@@ -148,19 +131,18 @@ L(top):
 
 	jae	L(top)			C				      2
 L(end):
-	movq	8(up), %mm0
+	movq	16(up,n,8), %mm0
 	psrlq	%mm5, %mm0
 	por	%mm0, %mm2
-	movq	(up), %mm1
+	movq	8(up,n,8), %mm1
 	psrlq	%mm5, %mm1
 	por	%mm1, %mm3
-	movq	%mm2, 16(rp)
-	movq	%mm3, 8(rp)
+	movq	%mm2, 24(rp,n,8)
+	movq	%mm3, 16(rp,n,8)
 
 L(ast):	movq	(up), %mm2
 	psllq	%mm4, %mm2
 	movq	%mm2, (rp)
 	emms
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/pentium4/lshiftc.asm b/gmp/mpn/x86_64/pentium4/lshiftc.asm
deleted file mode 100644
index fc64676574..0000000000
--- a/gmp/mpn/x86_64/pentium4/lshiftc.asm
+++ /dev/null
@@ -1,179 +0,0 @@
-dnl  x86-64 mpn_lshiftc optimized for Pentium 4.
-
-dnl  Copyright 2003, 2005, 2007, 2008, 2010, 2012 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	 ?
-C AMD K10	 ?
-C Intel P4	 4.15
-C Intel core2	 ?
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 ?
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
-define(`cnt',`%cl')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_lshiftc)
-	FUNC_ENTRY(4)
-	mov	-8(up,n,8), %rax
-	pcmpeqd	%mm6, %mm6		C 0xffff...fff
-	movd	R32(%rcx), %mm4
-	neg	R32(%rcx)		C put rsh count in cl
-	and	$63, R32(%rcx)
-	movd	R32(%rcx), %mm5
-
-	lea	1(n), R32(%r8)
-
-	shr	R8(%rcx), %rax		C function return value
-
-	and	$3, R32(%r8)
-	je	L(rol)			C jump for n = 3, 7, 11, ...
-
-	dec	R32(%r8)
-	jne	L(1)
-C	n = 4, 8, 12, ...
-	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm0
-	pxor	%mm6, %mm2
-	psrlq	%mm5, %mm0
-	pandn	%mm2, %mm0
-	movq	%mm0, -8(rp,n,8)
-	dec	n
-	jmp	L(rol)
-
-L(1):	dec	R32(%r8)
-	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
-C	n = 2, 6, 10, 16, ...
-	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm0
-	pxor	%mm6, %mm2
-	psrlq	%mm5, %mm0
-	pandn	%mm2, %mm0
-	movq	%mm0, -8(rp,n,8)
-	dec	n
-L(1x):
-	cmp	$1, n
-	je	L(ast)
-	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm3
-	psllq	%mm4, %mm3
-	movq	-16(up,n,8), %mm0
-	movq	-24(up,n,8), %mm1
-	pxor	%mm6, %mm2
-	psrlq	%mm5, %mm0
-	pandn	%mm2, %mm0
-	pxor	%mm6, %mm3
-	psrlq	%mm5, %mm1
-	pandn	%mm3, %mm1
-	movq	%mm0, -8(rp,n,8)
-	movq	%mm1, -16(rp,n,8)
-	sub	$2, n
-
-L(rol):	movq	-8(up,n,8), %mm2
-	psllq	%mm4, %mm2
-	movq	-16(up,n,8), %mm3
-	psllq	%mm4, %mm3
-
-	sub	$4, n
-	jb	L(end)
-	ALIGN(32)
-L(top):
-	C finish stuff from lsh block
-	movq	16(up,n,8), %mm0
-	pxor	%mm6, %mm2
-	movq	8(up,n,8), %mm1
-	psrlq	%mm5, %mm0
-	psrlq	%mm5, %mm1
-	pandn	%mm2, %mm0
-	pxor	%mm6, %mm3
-	movq	%mm0, 24(rp,n,8)
-	movq	(up,n,8), %mm0
-	pandn	%mm3, %mm1
-	movq	%mm1, 16(rp,n,8)
-	movq	-8(up,n,8), %mm1
-	C start two new rsh
-	psrlq	%mm5, %mm0
-	psrlq	%mm5, %mm1
-
-	C finish stuff from rsh block
-	movq	8(up,n,8), %mm2
-	pxor	%mm6, %mm0
-	movq	(up,n,8), %mm3
-	psllq	%mm4, %mm2
-	psllq	%mm4, %mm3
-	pandn	%mm0, %mm2
-	pxor	%mm6, %mm1
-	movq	%mm2, 8(rp,n,8)
-	movq	-8(up,n,8), %mm2
-	pandn	%mm1, %mm3
-	movq	%mm3, (rp,n,8)
-	movq	-16(up,n,8), %mm3
-	C start two new lsh
-	sub	$4, n
-	psllq	%mm4, %mm2
-	psllq	%mm4, %mm3
-
-	jae	L(top)
-
-L(end):	pxor	%mm6, %mm2
-	movq	8(up), %mm0
-	psrlq	%mm5, %mm0
-	pandn	%mm2, %mm0
-	pxor	%mm6, %mm3
-	movq	(up), %mm1
-	psrlq	%mm5, %mm1
-	pandn	%mm3, %mm1
-	movq	%mm0, 16(rp)
-	movq	%mm1, 8(rp)
-
-L(ast):	movq	(up), %mm2
-	psllq	%mm4, %mm2
-	pxor	%mm6, %mm2
-	movq	%mm2, (rp)
-	emms
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/pentium4/mod_34lsub1.asm b/gmp/mpn/x86_64/pentium4/mod_34lsub1.asm
deleted file mode 100644
index f34b3f079a..0000000000
--- a/gmp/mpn/x86_64/pentium4/mod_34lsub1.asm
+++ /dev/null
@@ -1,167 +0,0 @@
-dnl  AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
-
-dnl  Copyright 2000-2002, 2004, 2005, 2007, 2010-2012 Free Software Foundation,
-dnl  Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	 1.0
-C AMD K10	 1.12
-C Intel P4	 3.25
-C Intel core2	 1.5
-C Intel corei	 1.5
-C Intel atom	 2.5
-C VIA nano	 1.75
-
-
-C INPUT PARAMETERS
-define(`ap',	%rdi)
-define(`n',	%rsi)
-
-C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
-
-C TODO
-C  * Review feed-in and wind-down code.  In particular, try to avoid adc and
-C    sbb to placate Pentium4.
-C  * It seems possible to reach 2.67 c/l by using a cleaner 6-way unrolling,
-C    without the dual loop exits.
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_mod_34lsub1)
-	FUNC_ENTRY(2)
-
-	mov	$0x0000FFFFFFFFFFFF, %r11
-
-	sub	$2, %rsi
-	ja	L(gt2)
-
-	mov	(ap), %rax
-	nop
-	jb	L(1)
-
-	mov	8(ap), %rsi
-	mov	%rax, %rdx
-	shr	$48, %rax		C src[0] low
-
-	and	%r11, %rdx		C src[0] high
-	add	%rdx, %rax
-	mov	R32(%rsi), R32(%rdx)
-
-	shr	$32, %rsi		C src[1] high
-	add	%rsi, %rax
-
-	shl	$16, %rdx		C src[1] low
-	add	%rdx, %rax
-
-L(1):	FUNC_EXIT()
-	ret
-
-
-	ALIGN(16)
-L(gt2):	xor	R32(%rax), R32(%rax)
-	xor	R32(%rcx), R32(%rcx)
-	xor	R32(%rdx), R32(%rdx)
-	xor	%r8, %r8
-	xor	%r9, %r9
-	xor	%r10, %r10
-
-L(top):	add	(ap), %rax
-	adc	$0, %r10
-	add	8(ap), %rcx
-	adc	$0, %r8
-	add	16(ap), %rdx
-	adc	$0, %r9
-
-	sub	$3, %rsi
-	jng	L(end)
-
-	add	24(ap), %rax
-	adc	$0, %r10
-	add	32(ap), %rcx
-	adc	$0, %r8
-	add	40(ap), %rdx
-	lea	48(ap), ap
-	adc	$0, %r9
-
-	sub	$3, %rsi
-	jg	L(top)
-
-
-	add	$-24, ap
-L(end):	add	%r9, %rax
-	adc	%r10, %rcx
-	adc	%r8, %rdx
-
-	inc	%rsi
-	mov	$0x1, R32(%r10)
-	js	L(combine)
-
-	mov	$0x10000, R32(%r10)
-	adc	24(ap), %rax
-	dec	%rsi
-	js	L(combine)
-
-	adc	32(ap), %rcx
-	mov	$0x100000000, %r10
-
-L(combine):
-	sbb	%rsi, %rsi		C carry
-	mov	%rax, %rdi		C 0mod3
-	shr	$48, %rax		C 0mod3 high
-
-	and	%r10, %rsi		C carry masked
-	and	%r11, %rdi		C 0mod3 low
-	mov	R32(%rcx), R32(%r10)	C 1mod3
-
-	add	%rsi, %rax		C apply carry
-	shr	$32, %rcx		C 1mod3 high
-
-	add	%rdi, %rax		C apply 0mod3 low
-	movzwl	%dx, R32(%rdi)		C 2mod3
-	shl	$16, %r10		C 1mod3 low
-
-	add	%rcx, %rax		C apply 1mod3 high
-	shr	$16, %rdx		C 2mod3 high
-
-	add	%r10, %rax		C apply 1mod3 low
-	shl	$32, %rdi		C 2mod3 low
-
-	add	%rdx, %rax		C apply 2mod3 high
-	add	%rdi, %rax		C apply 2mod3 low
-
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/pentium4/popcount.asm b/gmp/mpn/x86_64/pentium4/popcount.asm
index 7014b39de5..b1a866bf5e 100644
--- a/gmp/mpn/x86_64/pentium4/popcount.asm
+++ b/gmp/mpn/x86_64/pentium4/popcount.asm
@@ -3,33 +3,21 @@ dnl  x86-64 mpn_popcount optimized for Pentium 4.
 dnl  Copyright 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
 
-MULFUNC_PROLOGUE(mpn_popcount)
 include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86_64/pentium4/rsh1aors_n.asm b/gmp/mpn/x86_64/pentium4/rsh1aors_n.asm
deleted file mode 100644
index 5528ce47da..0000000000
--- a/gmp/mpn/x86_64/pentium4/rsh1aors_n.asm
+++ /dev/null
@@ -1,334 +0,0 @@
-dnl  x86-64 mpn_rsh1add_n/mpn_rsh1sub_n optimized for Pentium 4.
-
-dnl  Contributed to the GNU project by Torbjorn Granlund.
-
-dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb
-C AMD K8,K9	 4.13
-C AMD K10	 4.13
-C Intel P4	 5.70
-C Intel core2	 4.75
-C Intel corei	 5
-C Intel atom	 8.75
-C VIA nano	 5.25
-
-C TODO
-C  * Try to make this smaller, 746 bytes seem excessive for this 2nd class
-C    function.  Less sw pipelining would help, and since we now probably
-C    pipeline somewhat too deeply, it might not affect performance too much.
-C  * A separate small-n loop might speed things as well as make things smaller.
-C    That loop should be selected before pushing registers.
-
-C INPUT PARAMETERS
-define(`rp',	`%rdi')
-define(`up',	`%rsi')
-define(`vp',	`%rdx')
-define(`n',	`%rcx')
-define(`cy',	`%r8')
-
-ifdef(`OPERATION_rsh1add_n', `
-	define(ADDSUB,	      add)
-	define(func,	      mpn_rsh1add_n)
-	define(func_nc,	      mpn_rsh1add_nc)')
-ifdef(`OPERATION_rsh1sub_n', `
-	define(ADDSUB,	      sub)
-	define(func,	      mpn_rsh1sub_n)
-	define(func_nc,	      mpn_rsh1sub_nc)')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
-
-ASM_START()
-	TEXT
-PROLOGUE(func)
-	FUNC_ENTRY(4)
-	xor	%r8, %r8
-IFDOS(`	jmp	L(ent)		')
-EPILOGUE()
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-L(ent):	push	%rbx
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	(vp), %r9
-	mov	(up), %r15
-
-	mov	R32(n), R32(%rax)
-	and	$3, R32(%rax)
-	jne	L(n00)
-
-	mov	R32(%r8), R32(%rbx)	C n = 0, 4, 8, ...
-	mov	8(up), %r10
-	ADDSUB	%r9, %r15
-	mov	8(vp), %r9
-	setc	R8(%rax)
-	ADDSUB	%rbx, %r15		C return bit
-	jnc	1f
-	mov	$1, R8(%rax)
-1:	mov	16(up), %r12
-	ADDSUB	%r9, %r10
-	mov	16(vp), %r9
-	setc	R8(%rbx)
-	mov	%r15, %r13
-	ADDSUB	%rax, %r10
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	mov	24(up), %r11
-	ADDSUB	%r9, %r12
-	lea	32(up), up
-	mov	24(vp), %r9
-	lea	32(vp), vp
-	setc	R8(%rax)
-	mov	%r10, %r14
-	shl	$63, %r10
-	shr	%r13
-	jmp	L(L00)
-
-L(n00):	cmp	$2, R32(%rax)
-	jnc	L(n01)
-	xor	R32(%rbx), R32(%rbx)	C n = 1, 5, 9, ...
-	lea	-24(rp), rp
-	mov	R32(%r8), R32(%rax)
-	dec	n
-	jnz	L(gt1)
-	ADDSUB	%r9, %r15
-	setc	R8(%rbx)
-	ADDSUB	%rax, %r15
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	mov	%r15, %r14
-	shl	$63, %rbx
-	shr	%r14
-	jmp	L(cj1)
-L(gt1):	mov	8(up), %r8
-	ADDSUB	%r9, %r15
-	mov	8(vp), %r9
-	setc	R8(%rbx)
-	ADDSUB	%rax, %r15
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	mov	16(up), %r10
-	ADDSUB	%r9, %r8
-	mov	16(vp), %r9
-	setc	R8(%rax)
-	mov	%r15, %r14
-	ADDSUB	%rbx, %r8
-	jnc	1f
-	mov	$1, R8(%rax)
-1:	mov	24(up), %r12
-	ADDSUB	%r9, %r10
-	mov	24(vp), %r9
-	setc	R8(%rbx)
-	mov	%r8, %r13
-	shl	$63, %r8
-	shr	%r14
-	lea	8(up), up
-	lea	8(vp), vp
-	jmp	L(L01)
-
-L(n01):	jne	L(n10)
-	lea	-16(rp), rp		C n = 2, 6, 10, ...
-	mov	R32(%r8), R32(%rbx)
-	mov	8(up), %r11
-	ADDSUB	%r9, %r15
-	mov	8(vp), %r9
-	setc	R8(%rax)
-	ADDSUB	%rbx, %r15
-	jnc	1f
-	mov	$1, R8(%rax)
-1:	sub	$2, n
-	jnz	L(gt2)
-	ADDSUB	%r9, %r11
-	setc	R8(%rbx)
-	mov	%r15, %r13
-	ADDSUB	%rax, %r11
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	mov	%r11, %r14
-	shl	$63, %r11
-	shr	%r13
-	jmp	L(cj2)
-L(gt2):	mov	16(up), %r8
-	ADDSUB	%r9, %r11
-	mov	16(vp), %r9
-	setc	R8(%rbx)
-	mov	%r15, %r13
-	ADDSUB	%rax, %r11
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	mov	24(up), %r10
-	ADDSUB	%r9, %r8
-	mov	24(vp), %r9
-	setc	R8(%rax)
-	mov	%r11, %r14
-	shl	$63, %r11
-	shr	%r13
-	lea	16(up), up
-	lea	16(vp), vp
-	jmp	L(L10)
-
-L(n10):	xor	R32(%rbx), R32(%rbx)	C n = 3, 7, 11, ...
-	lea	-8(rp), rp
-	mov	R32(%r8), R32(%rax)
-	mov	8(up), %r12
-	ADDSUB	%r9, %r15
-	mov	8(vp), %r9
-	setc	R8(%rbx)
-	ADDSUB	%rax, %r15
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	mov	16(up), %r11
-	ADDSUB	%r9, %r12
-	mov	16(vp), %r9
-	setc	R8(%rax)
-	mov	%r15, %r14
-	ADDSUB	%rbx, %r12
-	jnc	1f
-	mov	$1, R8(%rax)
-1:	sub	$3, n
-	jnz	L(gt3)
-	ADDSUB	%r9, %r11
-	setc	R8(%rbx)
-	mov	%r12, %r13
-	shl	$63, %r12
-	shr	%r14
-	jmp	L(cj3)
-L(gt3):	mov	24(up), %r8
-	ADDSUB	%r9, %r11
-	mov	24(vp), %r9
-	setc	R8(%rbx)
-	mov	%r12, %r13
-	shl	$63, %r12
-	shr	%r14
-	lea	24(up), up
-	lea	24(vp), vp
-	jmp	L(L11)
-
-L(c0):	mov	$1, R8(%rbx)
-	jmp	L(rc0)
-L(c1):	mov	$1, R8(%rax)
-	jmp	L(rc1)
-L(c2):	mov	$1, R8(%rbx)
-	jmp	L(rc2)
-
-	ALIGN(16)
-L(top):	mov	(up), %r8	C not on critical path
-	or	%r13, %r10
-	ADDSUB	%r9, %r11	C not on critical path
-	mov	(vp), %r9	C not on critical path
-	setc	R8(%rbx)	C save carry out
-	mov	%r12, %r13	C new for later
-	shl	$63, %r12	C shift new right
-	shr	%r14		C shift old left
-	mov	%r10, (rp)
-L(L11):	ADDSUB	%rax, %r11	C apply previous carry out
-	jc	L(c0)		C jump if ripple
-L(rc0):	mov	8(up), %r10
-	or	%r14, %r12
-	ADDSUB	%r9, %r8
-	mov	8(vp), %r9
-	setc	R8(%rax)
-	mov	%r11, %r14
-	shl	$63, %r11
-	shr	%r13
-	mov	%r12, 8(rp)
-L(L10):	ADDSUB	%rbx, %r8
-	jc	L(c1)
-L(rc1):	mov	16(up), %r12
-	or	%r13, %r11
-	ADDSUB	%r9, %r10
-	mov	16(vp), %r9
-	setc	R8(%rbx)
-	mov	%r8, %r13
-	shl	$63, %r8
-	shr	%r14
-	mov	%r11, 16(rp)
-L(L01):	ADDSUB	%rax, %r10
-	jc	L(c2)
-L(rc2):	mov	24(up), %r11
-	or	%r14, %r8
-	ADDSUB	%r9, %r12
-	lea	32(up), up
-	mov	24(vp), %r9
-	lea	32(vp), vp
-	setc	R8(%rax)
-	mov	%r10, %r14
-	shl	$63, %r10
-	shr	%r13
-	mov	%r8, 24(rp)
-	lea	32(rp), rp
-L(L00):	ADDSUB	%rbx, %r12
-	jc	L(c3)
-L(rc3):	sub	$4, n
-	ja	L(top)
-
-L(end):	or	%r13, %r10
-	ADDSUB	%r9, %r11
-	setc	R8(%rbx)
-	mov	%r12, %r13
-	shl	$63, %r12
-	shr	%r14
-	mov	%r10, (rp)
-L(cj3):	ADDSUB	%rax, %r11
-	jnc	1f
-	mov	$1, R8(%rbx)
-1:	or	%r14, %r12
-	mov	%r11, %r14
-	shl	$63, %r11
-	shr	%r13
-	mov	%r12, 8(rp)
-L(cj2):	or	%r13, %r11
-	shl	$63, %rbx
-	shr	%r14
-	mov	%r11, 16(rp)
-L(cj1):	or	%r14, %rbx
-	mov	%rbx, 24(rp)
-
-	mov	R32(%r15), R32(%rax)
-	and	$1, R32(%rax)
-	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-L(c3):	mov	$1, R8(%rax)
-	jmp	L(rc3)
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/pentium4/rshift.asm b/gmp/mpn/x86_64/pentium4/rshift.asm
index b7c1ee2cdd..61899c5ecf 100644
--- a/gmp/mpn/x86_64/pentium4/rshift.asm
+++ b/gmp/mpn/x86_64/pentium4/rshift.asm
@@ -1,44 +1,31 @@
 dnl  x86-64 mpn_rshift optimized for Pentium 4.
 
-dnl  Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 ?
-C Intel P4	 3.29
-C Intel core2	 2.1 (fluctuates, presumably cache related)
-C Intel corei	 ?
-C Intel atom	14.3
-C VIA nano	 ?
+C K8,K9:	 2.5
+C K10:		 ?
+C P4:		 3.29
+C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
+C P6-28 (Atom):	14.3
 
 C INPUT PARAMETERS
 define(`rp',`%rdi')
@@ -46,31 +33,27 @@ define(`up',`%rsi')
 define(`n',`%rdx')
 define(`cnt',`%cl')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(mpn_rshift)
-	FUNC_ENTRY(4)
 	mov	(up), %rax
-	movd	R32(%rcx), %mm4
-	neg	R32(%rcx)			C put lsh count in cl
-	and	$63, R32(%rcx)
-	movd	R32(%rcx), %mm5
+	movd	%ecx, %mm4
+	neg	%ecx			C put lsh count in cl
+	and	$63, %ecx
+	movd	%ecx, %mm5
 
 	lea	-8(up,n,8), up
 	lea	-8(rp,n,8), rp
-	lea	1(n), R32(%r8)
+	lea	1(n), %r8d
 	neg	n
 
-	shl	R8(%rcx), %rax		C function return value
+	shl	%cl, %rax		C function return value
 
-	and	$3, R32(%r8)
+	and	$3, %r8d
 	je	L(rol)			C jump for n = 3, 7, 11, ...
 
-	dec	R32(%r8)
+	dec	%r8d
 	jne	L(1)
 C	n = 4, 8, 12, ...
 	movq	8(up,n,8), %mm2
@@ -82,7 +65,7 @@ C	n = 4, 8, 12, ...
 	inc	n
 	jmp	L(rol)
 
-L(1):	dec	R32(%r8)
+L(1):	dec	%r8d
 	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
 C	n = 2, 6, 10, 16, ...
 	movq	8(up,n,8), %mm2
@@ -151,19 +134,18 @@ L(top):
 
 	jae	L(top)			C				      2
 L(end):
-	movq	-8(up), %mm0
+	movq	-16(up,n,8), %mm0
 	psllq	%mm5, %mm0
 	por	%mm0, %mm2
-	movq	(up), %mm1
+	movq	-8(up,n,8), %mm1
 	psllq	%mm5, %mm1
 	por	%mm1, %mm3
-	movq	%mm2, -16(rp)
-	movq	%mm3, -8(rp)
+	movq	%mm2, -24(rp,n,8)
+	movq	%mm3, -16(rp,n,8)
 
 L(ast):	movq	(up), %mm2
 	psrlq	%mm4, %mm2
 	movq	%mm2, (rp)
 	emms
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/pentium4/sec_tabselect.asm b/gmp/mpn/x86_64/pentium4/sec_tabselect.asm
deleted file mode 100644
index e4360341d9..0000000000
--- a/gmp/mpn/x86_64/pentium4/sec_tabselect.asm
+++ /dev/null
@@ -1,37 +0,0 @@
-dnl  X86-64 mpn_sec_tabselect.
-
-dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-MULFUNC_PROLOGUE(mpn_sec_tabselect)
-include_mpn(`x86_64/fastsse/sec_tabselect.asm')
diff --git a/gmp/mpn/x86_64/popham.asm b/gmp/mpn/x86_64/popham.asm
index 9005f81776..e2bdb1a0b8 100644
--- a/gmp/mpn/x86_64/popham.asm
+++ b/gmp/mpn/x86_64/popham.asm
@@ -1,32 +1,21 @@
 dnl  AMD64 mpn_popcount, mpn_hamdist -- population count and hamming distance.
 
-dnl  Copyright 2004, 2005, 2007, 2010-2012 Free Software Foundation, Inc.
+dnl  Copyright 2004, 2005, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
@@ -34,13 +23,10 @@ include(`../config.m4')
 
 C		     popcount	      hamdist
 C		    cycles/limb	    cycles/limb
-C AMD K8,K9		 6		 7
-C AMD K10		 6		 7
-C Intel P4		12		14.3
-C Intel core2		 7		 8
-C Intel corei		 ?		 7.3
-C Intel atom		16.5		17.5
-C VIA nano		 8.75		10.4
+C K8,K9:		 6		 7
+C K10:			 6		 7
+C P4:			12		14.3
+C P6-15:		 7		 8
 
 C TODO
 C  * Tune.  It should be possible to reach 5 c/l for popcount and 6 c/l for
@@ -55,7 +41,6 @@ ifdef(`OPERATION_popcount',`
   define(`h33333333',	`%r11')
   define(`h0f0f0f0f',	`%rcx')
   define(`h01010101',	`%rdx')
-  define(`POP',		`$1')
   define(`HAM',		`dnl')
 ')
 ifdef(`OPERATION_hamdist',`
@@ -67,111 +52,106 @@ ifdef(`OPERATION_hamdist',`
   define(`h33333333',	`%r11')
   define(`h0f0f0f0f',	`%rcx')
   define(`h01010101',	`%r14')
-  define(`POP',		`dnl')
   define(`HAM',		`$1')
 ')
 
 
 MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(func)
- POP(`	FUNC_ENTRY(2)		')
- HAM(`	FUNC_ENTRY(3)		')
-	push	%r12
-	push	%r13
- HAM(`	push	%r14		')
 
-	mov	$0x5555555555555555, h55555555
-	mov	$0x3333333333333333, h33333333
-	mov	$0x0f0f0f0f0f0f0f0f, h0f0f0f0f
-	mov	$0x0101010101010101, h01010101
+	pushq	%r12
+	pushq	%r13
+ HAM(`	pushq	%r14		')
+
+	movq	$0x5555555555555555, h55555555
+	movq	$0x3333333333333333, h33333333
+	movq	$0x0f0f0f0f0f0f0f0f, h0f0f0f0f
+	movq	$0x0101010101010101, h01010101
 
-	lea	(up,n,8), up
- HAM(`	lea	(vp,n,8), vp	')
-	neg	n
+	leaq	(up,n,8), up
+ HAM(`	leaq	(vp,n,8), vp	')
+	negq	n
 
-	xor	R32(%rax), R32(%rax)
+	xorl	%eax, %eax
 
-	bt	$0, R32(n)
-	jnc	L(top)
+	btq	$0, n
+	jnc	L(oop)
 
-	mov	(up,n,8), %r8
- HAM(`	xor	(vp,n,8), %r8	')
+	movq	(up,n,8), %r8
+ HAM(`	xorq	(vp,n,8), %r8	')
 
-	mov	%r8, %r9
-	shr	%r8
-	and	h55555555, %r8
-	sub	%r8, %r9
+	movq	%r8, %r9
+	shrq	%r8
+	andq	h55555555, %r8
+	subq	%r8, %r9
 
-	mov	%r9, %r8
-	shr	$2, %r9
-	and	h33333333, %r8
-	and	h33333333, %r9
-	add	%r8, %r9		C 16 4-bit fields (0..4)
+	movq	%r9, %r8
+	shrq	$2, %r9
+	andq	h33333333, %r8
+	andq	h33333333, %r9
+	addq	%r8, %r9		C 16 4-bit fields (0..4)
 
-	mov	%r9, %r8
-	shr	$4, %r9
-	and	h0f0f0f0f, %r8
-	and	h0f0f0f0f, %r9
-	add	%r8, %r9		C 8 8-bit fields (0..16)
+	movq	%r9, %r8
+	shrq	$4, %r9
+	andq	h0f0f0f0f, %r8
+	andq	h0f0f0f0f, %r9
+	addq	%r8, %r9		C 8 8-bit fields (0..16)
 
-	imul	h01010101, %r9		C sum the 8 fields in high 8 bits
-	shr	$56, %r9
+	imulq	h01010101, %r9		C sum the 8 fields in high 8 bits
+	shrq	$56, %r9
 
-	mov	%r9, %rax		C add to total
-	add	$1, n
-	jz	L(end)
+	addq	%r9, %rax		C add to total
+	addq	$1, n
+	jz	L(done)
 
 	ALIGN(16)
-L(top):	mov	(up,n,8), %r8
-	mov	8(up,n,8), %r12
- HAM(`	xor	(vp,n,8), %r8	')
- HAM(`	xor	8(vp,n,8), %r12	')
-
-	mov	%r8, %r9
-	mov	%r12, %r13
-	shr	%r8
-	shr	%r12
-	and	h55555555, %r8
-	and	h55555555, %r12
-	sub	%r8, %r9
-	sub	%r12, %r13
-
-	mov	%r9, %r8
-	mov	%r13, %r12
-	shr	$2, %r9
-	shr	$2, %r13
-	and	h33333333, %r8
-	and	h33333333, %r9
-	and	h33333333, %r12
-	and	h33333333, %r13
-	add	%r8, %r9		C 16 4-bit fields (0..4)
-	add	%r12, %r13		C 16 4-bit fields (0..4)
-
-	add	%r13, %r9		C 16 4-bit fields (0..8)
-	mov	%r9, %r8
-	shr	$4, %r9
-	and	h0f0f0f0f, %r8
-	and	h0f0f0f0f, %r9
-	add	%r8, %r9		C 8 8-bit fields (0..16)
-
-	imul	h01010101, %r9		C sum the 8 fields in high 8 bits
-	shr	$56, %r9
-
-	add	%r9, %rax		C add to total
-	add	$2, n
-	jnc	L(top)
-
-L(end):
- HAM(`	pop	%r14		')
-	pop	%r13
-	pop	%r12
-	FUNC_EXIT()
+L(oop):	movq	(up,n,8), %r8
+	movq	8(up,n,8), %r12
+ HAM(`	xorq	(vp,n,8), %r8	')
+ HAM(`	xorq	8(vp,n,8), %r12	')
+
+	movq	%r8, %r9
+	movq	%r12, %r13
+	shrq	%r8
+	shrq	%r12
+	andq	h55555555, %r8
+	andq	h55555555, %r12
+	subq	%r8, %r9
+	subq	%r12, %r13
+
+	movq	%r9, %r8
+	movq	%r13, %r12
+	shrq	$2, %r9
+	shrq	$2, %r13
+	andq	h33333333, %r8
+	andq	h33333333, %r9
+	andq	h33333333, %r12
+	andq	h33333333, %r13
+	addq	%r8, %r9		C 16 4-bit fields (0..4)
+	addq	%r12, %r13		C 16 4-bit fields (0..4)
+
+	addq	%r13, %r9		C 16 4-bit fields (0..8)
+	movq	%r9, %r8
+	shrq	$4, %r9
+	andq	h0f0f0f0f, %r8
+	andq	h0f0f0f0f, %r9
+	addq	%r8, %r9		C 8 8-bit fields (0..16)
+
+	imulq	h01010101, %r9		C sum the 8 fields in high 8 bits
+	shrq	$56, %r9
+
+	addq	%r9, %rax		C add to total
+	addq	$2, n
+	jnc	L(oop)
+
+L(done):
+ HAM(`	popq	%r14		')
+	popq	%r13
+	popq	%r12
 	ret
+
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/redc_1.asm b/gmp/mpn/x86_64/redc_1.asm
new file mode 100644
index 0000000000..23ccceed67
--- /dev/null
+++ b/gmp/mpn/x86_64/redc_1.asm
@@ -0,0 +1,335 @@
+dnl  AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
+
+dnl  Copyright 2004, 2008 Free Software Foundation, Inc.
+dnl
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
+dnl
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/limb
+C	     cycles/limb
+C K8,K9:	 2.5
+C K10:		 2.5
+C P4:		 ?
+C P6-15 (Core2): 5.3
+C P6-28 (Atom):	 ?
+
+C TODO
+C  * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
+C    The code for 1, 2, 3, 4 should perhaps be completely register based.
+C  * Perhaps align outer loops.
+C  * The sub_n at the end leaks side-channel data.  How do we fix that?
+C  * Write mpn_addsub_n computing R = A + B - C.  It should run at 2 c/l.
+C  * We could software pipeline the IMUL stuff, by putting it before the
+C    outer loops and before the end of the outer loops.  The last outer
+C    loop iteration would then compute an unneeded product, but it is at
+C    least not a stray read fro up[], since it is at up[n].
+C  * Can we combine both the add_n and sub_n into the loops, somehow?
+
+C INPUT PARAMETERS
+define(`rp',	  `%rdi')
+define(`up',	  `%rsi')
+define(`param_mp',`%rdx')
+define(`n',	  `%rcx')
+define(`invm',	  `%r8')
+
+define(`mp',	  `%r13')
+define(`i',	  `%r11')
+define(`nneg',	  `%r12')
+
+ASM_START()
+	TEXT
+	ALIGN(32)
+PROLOGUE(mpn_redc_1)
+	push	%rbp
+	push	%rbx
+	push	%r12
+	push	%r13
+	push	%r14
+	push	n
+	sub	$8, %rsp		C maintain ABI required rsp alignment
+
+	lea	(param_mp,n,8), mp	C mp += n
+	lea	(up,n,8), up		C up += n
+
+	mov	n, nneg
+	neg	nneg
+
+	mov	R32(n), R32(%rax)
+	and	$3, R32(%rax)
+	jz	L(b0)
+	cmp	$2, R32(%rax)
+	jz	L(b2)
+	jg	L(b3)
+
+L(b1):	C lea	(mp), mp
+	lea	-16(up), up
+L(o1):	mov	nneg, i
+	mov	16(up,nneg,8), %rbp	C up[0]
+	imul	invm, %rbp
+
+	mov	(mp,i,8), %rax
+	xor	%ebx, %ebx
+	mul	%rbp
+	add	$1, i
+	jnz	1f
+	add	%rax, 8(up,i,8)
+	adc	$0, %rdx
+	mov	%rdx, %r14
+	jmp	L(n1)
+
+1:	mov	%rax, %r9
+	mov	(mp,i,8), %rax
+	mov	%rdx, %r14
+	jmp	L(mi1)
+
+	ALIGN(16)
+L(lo1):	add	%r10, (up,i,8)
+	adc	%rax, %r9
+	mov	(mp,i,8), %rax
+	adc	%rdx, %r14
+L(mi1):	xor	%r10d, %r10d
+	mul	%rbp
+	add	%r9, 8(up,i,8)
+	adc	%rax, %r14
+	adc	%rdx, %rbx
+	mov	8(mp,i,8), %rax
+	mul	%rbp
+	add	%r14, 16(up,i,8)
+	adc	%rax, %rbx
+	adc	%rdx, %r10
+	mov	16(mp,i,8), %rax
+	mul	%rbp
+	xor	%r9d, %r9d
+	xor	%r14d, %r14d
+	add	%rbx, 24(up,i,8)
+	adc	%rax, %r10
+	mov	24(mp,i,8), %rax
+	adc	%rdx, %r9
+	xor	%ebx, %ebx
+	mul	%rbp
+	add	$4, i
+	js	L(lo1)
+L(ed1):	add	%r10, (up)
+	adc	%rax, %r9
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	add	%r9, 8(up)
+	adc	$0, %r14
+L(n1):	mov	%r14, 16(up,nneg,8)	C up[0]
+	add	$8, up
+	dec	n
+	jnz	L(o1)
+C	lea	(mp), mp
+	lea	16(up), up
+	jmp	L(common)
+
+L(b0):	C lea	(mp), mp
+	lea	-16(up), up
+L(o0):	mov	nneg, i
+	mov	16(up,nneg,8), %rbp	C up[0]
+	imul	invm, %rbp
+
+	mov	(mp,i,8), %rax
+	xor	%r10d, %r10d
+	mul	%rbp
+	mov	%rax, %r14
+	mov	%rdx, %rbx
+	jmp	L(mi0)
+
+	ALIGN(16)
+L(lo0):	add	%r10, (up,i,8)
+	adc	%rax, %r9
+	mov	(mp,i,8), %rax
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	mul	%rbp
+	add	%r9, 8(up,i,8)
+	adc	%rax, %r14
+	adc	%rdx, %rbx
+L(mi0):	mov	8(mp,i,8), %rax
+	mul	%rbp
+	add	%r14, 16(up,i,8)
+	adc	%rax, %rbx
+	adc	%rdx, %r10
+	mov	16(mp,i,8), %rax
+	mul	%rbp
+	xor	%r9d, %r9d
+	xor	%r14d, %r14d
+	add	%rbx, 24(up,i,8)
+	adc	%rax, %r10
+	mov	24(mp,i,8), %rax
+	adc	%rdx, %r9
+	xor	%ebx, %ebx
+	mul	%rbp
+	add	$4, i
+	js	L(lo0)
+L(ed0):	add	%r10, (up)
+	adc	%rax, %r9
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	add	%r9, 8(up)
+	adc	$0, %r14
+	mov	%r14, 16(up,nneg,8)	C up[0]
+	add	$8, up
+	dec	n
+	jnz	L(o0)
+C	lea	(mp), mp
+	lea	16(up), up
+	jmp	L(common)
+
+
+L(b3):	lea	-8(mp), mp
+	lea	-24(up), up
+L(o3):	mov	nneg, i
+	mov	24(up,nneg,8), %rbp	C up[0]
+	imul	invm, %rbp
+
+	mov	8(mp,i,8), %rax
+	mul	%rbp
+	mov	%rax, %rbx
+	mov	%rdx, %r10
+	jmp	L(mi3)
+
+	ALIGN(16)
+L(lo3):	add	%r10, (up,i,8)
+	adc	%rax, %r9
+	mov	(mp,i,8), %rax
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	mul	%rbp
+	add	%r9, 8(up,i,8)
+	adc	%rax, %r14
+	adc	%rdx, %rbx
+	mov	8(mp,i,8), %rax
+	mul	%rbp
+	add	%r14, 16(up,i,8)
+	adc	%rax, %rbx
+	adc	%rdx, %r10
+L(mi3):	mov	16(mp,i,8), %rax
+	mul	%rbp
+	xor	%r9d, %r9d
+	xor	%r14d, %r14d
+	add	%rbx, 24(up,i,8)
+	adc	%rax, %r10
+	mov	24(mp,i,8), %rax
+	adc	%rdx, %r9
+	xor	%ebx, %ebx
+	mul	%rbp
+	add	$4, i
+	js	L(lo3)
+L(ed3):	add	%r10, 8(up)
+	adc	%rax, %r9
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	add	%r9, 16(up)
+	adc	$0, %r14
+	mov	%r14, 24(up,nneg,8)	C up[0]
+	add	$8, up
+	dec	n
+	jnz	L(o3)
+	lea	8(mp), mp
+	lea	24(up), up
+	jmp	L(common)
+
+L(b2):	lea	-16(mp), mp
+	lea	-32(up), up
+L(o2):	mov	nneg, i
+	mov	32(up,nneg,8), %rbp	C up[0]
+	imul	invm, %rbp
+
+	mov	16(mp,i,8), %rax
+	mul	%rbp
+	xor	%r14d, %r14d
+	mov	%rax, %r10
+	mov	24(mp,i,8), %rax
+	mov	%rdx, %r9
+	jmp	L(mi2)
+
+	ALIGN(16)
+L(lo2):	add	%r10, (up,i,8)
+	adc	%rax, %r9
+	mov	(mp,i,8), %rax
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	mul	%rbp
+	add	%r9, 8(up,i,8)
+	adc	%rax, %r14
+	adc	%rdx, %rbx
+	mov	8(mp,i,8), %rax
+	mul	%rbp
+	add	%r14, 16(up,i,8)
+	adc	%rax, %rbx
+	adc	%rdx, %r10
+	mov	16(mp,i,8), %rax
+	mul	%rbp
+	xor	%r9d, %r9d
+	xor	%r14d, %r14d
+	add	%rbx, 24(up,i,8)
+	adc	%rax, %r10
+	mov	24(mp,i,8), %rax
+	adc	%rdx, %r9
+L(mi2):	xor	%ebx, %ebx
+	mul	%rbp
+	add	$4, i
+	js	L(lo2)
+L(ed2):	add	%r10, 16(up)
+	adc	%rax, %r9
+	adc	%rdx, %r14
+	xor	%r10d, %r10d
+	add	%r9, 24(up)
+	adc	$0, %r14
+	mov	%r14, 32(up,nneg,8)	C up[0]
+	add	$8, up
+	dec	n
+	jnz	L(o2)
+	lea	16(mp), mp
+	lea	32(up), up
+
+
+L(common):
+	lea	(mp,nneg,8), mp		C restore entry mp
+
+C   cy = mpn_add_n (rp, up, up - n, n);
+C		    rdi rsi  rdx    rcx
+	lea	(up,nneg,8), up		C up -= n
+	lea	(up,nneg,8), %rdx	C rdx = up - n [up entry value]
+	mov	rp, nneg		C preserve rp over first call
+	mov	8(%rsp), %rcx		C pass entry n
+C	mov	rp, %rdi
+	CALL(	mpn_add_n)
+	test	R32(%rax), R32(%rax)
+	jz	L(ret)
+
+C     mpn_sub_n (rp, rp, mp, n);
+C		 rdi rsi rdx rcx
+	mov	nneg, %rdi
+	mov	nneg, %rsi
+	mov	mp, %rdx
+	mov	8(%rsp), %rcx		C pass entry n
+	CALL(	mpn_sub_n)
+
+L(ret):
+	add	$8, %rsp
+	pop	n			C just increment rsp
+	pop	%r14
+	pop	%r13
+	pop	%r12
+	pop	%rbx
+	pop	%rbp
+	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86_64/rsh1add_n.asm b/gmp/mpn/x86_64/rsh1add_n.asm
new file mode 100644
index 0000000000..0dd46f2c48
--- /dev/null
+++ b/gmp/mpn/x86_64/rsh1add_n.asm
@@ -0,0 +1,146 @@
+dnl  AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/limb
+C K8,K9:	 2.14	(mpn_add_n + mpn_rshift need 4.125)
+C K10:		 2.14	(mpn_add_n + mpn_rshift need 4.125)
+C P4:		12.75
+C P6-15:	 3.75
+
+C TODO
+C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
+C  * Try to approach the cache bandwidth 1.5 c/l.  It should be possible.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+define(`n32',`%ecx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+	.byte	0,0,0,0,0,0,0,0
+PROLOGUE(mpn_rsh1add_n)
+	pushq	%rbx			C				1
+
+	xorl	%eax, %eax
+	movq	(up), %rbx
+	addq	(vp), %rbx
+
+	rcrq	%rbx			C rotate, save acy
+	adcl	%eax, %eax		C return value
+
+	movl	n32, %r11d
+	andl	$3, %r11d
+
+	cmpl	$1, %r11d
+	je	L(do)			C jump if n = 1 5 9 ...
+
+L(n1):	cmpl	$2, %r11d
+	jne	L(n2)			C jump unless n = 2 6 10 ...
+	addq	%rbx, %rbx		C rotate carry limb, restore acy
+	movq	8(up), %r10
+	adcq	8(vp), %r10
+	leaq	8(up), up
+	leaq	8(vp), vp
+	leaq	8(rp), rp
+	rcrq	%r10
+	rcrq	%rbx
+	movq	%rbx, -8(rp)
+	jmp	L(cj1)
+
+L(n2):	cmpl	$3, %r11d
+	jne	L(n3)			C jump unless n = 3 7 11 ...
+	addq	%rbx, %rbx		C rotate carry limb, restore acy
+	movq	8(up), %r9
+	movq	16(up), %r10
+	adcq	8(vp), %r9
+	adcq	16(vp), %r10
+	leaq	16(up), up
+	leaq	16(vp), vp
+	leaq	16(rp), rp
+	rcrq	%r10
+	rcrq	%r9
+	rcrq	%rbx
+	movq	%rbx, -16(rp)
+	jmp	L(cj2)
+
+L(n3):	decq	n			C come here for n = 4 8 12 ...
+	addq	%rbx, %rbx		C rotate carry limb, restore acy
+	movq	8(up), %r8
+	movq	16(up), %r9
+	adcq	8(vp), %r8
+	adcq	16(vp), %r9
+	movq	24(up), %r10
+	adcq	24(vp), %r10
+	leaq	24(up), up
+	leaq	24(vp), vp
+	leaq	24(rp), rp
+	rcrq	%r10
+	rcrq	%r9
+	rcrq	%r8
+	rcrq	%rbx
+	movq	%rbx, -24(rp)
+	movq	%r8, -16(rp)
+L(cj2):	movq	%r9, -8(rp)
+L(cj1):	movq	%r10, %rbx
+
+L(do):
+	shrq	$2, n			C				4
+	je	L(end)			C				2
+	ALIGN(16)
+L(oop):	addq	%rbx, %rbx		C rotate carry limb, restore acy
+
+	movq	8(up), %r8
+	movq	16(up), %r9
+	adcq	8(vp), %r8
+	adcq	16(vp), %r9
+	movq	24(up), %r10
+	movq	32(up), %r11
+	adcq	24(vp), %r10
+	adcq	32(vp), %r11
+
+	leaq	32(up), up
+	leaq	32(vp), vp
+
+	rcrq	%r11			C rotate, save acy
+	rcrq	%r10
+	rcrq	%r9
+	rcrq	%r8
+
+	rcrq	%rbx
+	movq	%rbx, (rp)
+	movq	%r8, 8(rp)
+	movq	%r9, 16(rp)
+	movq	%r10, 24(rp)
+	movq	%r11, %rbx
+
+	leaq	32(rp), rp
+	decq	n
+	jne	L(oop)
+
+L(end):	movq	%rbx, (rp)
+	popq	%rbx
+	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86_64/rsh1aors_n.asm b/gmp/mpn/x86_64/rsh1aors_n.asm
deleted file mode 100644
index a3e9cc5d23..0000000000
--- a/gmp/mpn/x86_64/rsh1aors_n.asm
+++ /dev/null
@@ -1,189 +0,0 @@
-dnl  AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
-dnl  AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
-
-dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 2.14	(mpn_add_n + mpn_rshift need 4.125)
-C AMD K10	 2.14	(mpn_add_n + mpn_rshift need 4.125)
-C Intel P4	12.75
-C Intel core2	 3.75
-C Intel NMH	 4.4
-C Intel SBR	 ?
-C Intel atom	 ?
-C VIA nano	 3.25
-
-C TODO
-C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n',`  %rcx')
-
-ifdef(`OPERATION_rsh1add_n', `
-	define(ADDSUB,	      add)
-	define(ADCSBB,	      adc)
-	define(func_n,	      mpn_rsh1add_n)
-	define(func_nc,	      mpn_rsh1add_nc)')
-ifdef(`OPERATION_rsh1sub_n', `
-	define(ADDSUB,	      sub)
-	define(ADCSBB,	      sbb)
-	define(func_n,	      mpn_rsh1sub_n)
-	define(func_nc,	      mpn_rsh1sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(func_nc)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8	')
-	push	%rbx
-
-	xor	R32(%rax), R32(%rax)
-	neg	%r8			C set C flag from parameter
-	mov	(up), %rbx
-	ADCSBB	(vp), %rbx
-	jmp	L(ent)
-EPILOGUE()
-
-	ALIGN(16)
-PROLOGUE(func_n)
-	FUNC_ENTRY(4)
-	push	%rbx
-
-	xor	R32(%rax), R32(%rax)
-	mov	(up), %rbx
-	ADDSUB	(vp), %rbx
-L(ent):
-	rcr	%rbx			C rotate, save acy
-	adc	R32(%rax), R32(%rax)	C return value
-
-	mov	R32(n), R32(%r11)
-	and	$3, R32(%r11)
-
-	cmp	$1, R32(%r11)
-	je	L(do)			C jump if n = 1 5 9 ...
-
-L(n1):	cmp	$2, R32(%r11)
-	jne	L(n2)			C jump unless n = 2 6 10 ...
-	add	%rbx, %rbx		C rotate carry limb, restore acy
-	mov	8(up), %r10
-	ADCSBB	8(vp), %r10
-	lea	8(up), up
-	lea	8(vp), vp
-	lea	8(rp), rp
-	rcr	%r10
-	rcr	%rbx
-	mov	%rbx, -8(rp)
-	jmp	L(cj1)
-
-L(n2):	cmp	$3, R32(%r11)
-	jne	L(n3)			C jump unless n = 3 7 11 ...
-	add	%rbx, %rbx		C rotate carry limb, restore acy
-	mov	8(up), %r9
-	mov	16(up), %r10
-	ADCSBB	8(vp), %r9
-	ADCSBB	16(vp), %r10
-	lea	16(up), up
-	lea	16(vp), vp
-	lea	16(rp), rp
-	rcr	%r10
-	rcr	%r9
-	rcr	%rbx
-	mov	%rbx, -16(rp)
-	jmp	L(cj2)
-
-L(n3):	dec	n			C come here for n = 4 8 12 ...
-	add	%rbx, %rbx		C rotate carry limb, restore acy
-	mov	8(up), %r8
-	mov	16(up), %r9
-	ADCSBB	8(vp), %r8
-	ADCSBB	16(vp), %r9
-	mov	24(up), %r10
-	ADCSBB	24(vp), %r10
-	lea	24(up), up
-	lea	24(vp), vp
-	lea	24(rp), rp
-	rcr	%r10
-	rcr	%r9
-	rcr	%r8
-	rcr	%rbx
-	mov	%rbx, -24(rp)
-	mov	%r8, -16(rp)
-L(cj2):	mov	%r9, -8(rp)
-L(cj1):	mov	%r10, %rbx
-
-L(do):
-	shr	$2, n			C				4
-	je	L(end)			C				2
-	ALIGN(16)
-L(top):	add	%rbx, %rbx		C rotate carry limb, restore acy
-
-	mov	8(up), %r8
-	mov	16(up), %r9
-	ADCSBB	8(vp), %r8
-	ADCSBB	16(vp), %r9
-	mov	24(up), %r10
-	mov	32(up), %r11
-	ADCSBB	24(vp), %r10
-	ADCSBB	32(vp), %r11
-
-	lea	32(up), up
-	lea	32(vp), vp
-
-	rcr	%r11			C rotate, save acy
-	rcr	%r10
-	rcr	%r9
-	rcr	%r8
-
-	rcr	%rbx
-	mov	%rbx, (rp)
-	mov	%r8, 8(rp)
-	mov	%r9, 16(rp)
-	mov	%r10, 24(rp)
-	mov	%r11, %rbx
-
-	lea	32(rp), rp
-	dec	n
-	jne	L(top)
-
-L(end):	mov	%rbx, (rp)
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/rsh1sub_n.asm b/gmp/mpn/x86_64/rsh1sub_n.asm
new file mode 100644
index 0000000000..b08bba4735
--- /dev/null
+++ b/gmp/mpn/x86_64/rsh1sub_n.asm
@@ -0,0 +1,146 @@
+dnl  AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
+
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C	     cycles/limb
+C K8,K9:	 2.14	(mpn_add_n + mpn_rshift need 4.125)
+C K10:		 2.14	(mpn_add_n + mpn_rshift need 4.125)
+C P4:		12.75
+C P6-15:	 3.75
+
+C TODO
+C  * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
+C  * Try to approach the cache bandwidth 1.5 c/l.  It should be possible.
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+define(`n32',`%ecx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+	.byte	0,0,0,0,0,0,0,0
+PROLOGUE(mpn_rsh1sub_n)
+	pushq	%rbx			C				1
+
+	xorl	%eax, %eax
+	movq	(up), %rbx
+	subq	(vp), %rbx
+
+	rcrq	%rbx			C rotate, save acy
+	adcl	%eax, %eax		C return value
+
+	movl	n32, %r11d
+	andl	$3, %r11d
+
+	cmpl	$1, %r11d
+	je	L(do)			C jump if n = 1 5 9 ...
+
+L(n1):	cmpl	$2, %r11d
+	jne	L(n2)			C jump unless n = 2 6 10 ...
+	addq	%rbx, %rbx		C rotate carry limb, restore acy
+	movq	8(up), %r10
+	sbbq	8(vp), %r10
+	leaq	8(up), up
+	leaq	8(vp), vp
+	leaq	8(rp), rp
+	rcrq	%r10
+	rcrq	%rbx
+	movq	%rbx, -8(rp)
+	jmp	L(cj1)
+
+L(n2):	cmpl	$3, %r11d
+	jne	L(n3)			C jump unless n = 3 7 11 ...
+	addq	%rbx, %rbx		C rotate carry limb, restore acy
+	movq	8(up), %r9
+	movq	16(up), %r10
+	sbbq	8(vp), %r9
+	sbbq	16(vp), %r10
+	leaq	16(up), up
+	leaq	16(vp), vp
+	leaq	16(rp), rp
+	rcrq	%r10
+	rcrq	%r9
+	rcrq	%rbx
+	movq	%rbx, -16(rp)
+	jmp	L(cj2)
+
+L(n3):	decq	n			C come here for n = 4 8 12 ...
+	addq	%rbx, %rbx		C rotate carry limb, restore acy
+	movq	8(up), %r8
+	movq	16(up), %r9
+	sbbq	8(vp), %r8
+	sbbq	16(vp), %r9
+	movq	24(up), %r10
+	sbbq	24(vp), %r10
+	leaq	24(up), up
+	leaq	24(vp), vp
+	leaq	24(rp), rp
+	rcrq	%r10
+	rcrq	%r9
+	rcrq	%r8
+	rcrq	%rbx
+	movq	%rbx, -24(rp)
+	movq	%r8, -16(rp)
+L(cj2):	movq	%r9, -8(rp)
+L(cj1):	movq	%r10, %rbx
+
+L(do):
+	shrq	$2, n			C				4
+	je	L(end)			C				2
+	ALIGN(16)
+L(oop):	addq	%rbx, %rbx		C rotate carry limb, restore acy
+
+	movq	8(up), %r8
+	movq	16(up), %r9
+	sbbq	8(vp), %r8
+	sbbq	16(vp), %r9
+	movq	24(up), %r10
+	movq	32(up), %r11
+	sbbq	24(vp), %r10
+	sbbq	32(vp), %r11
+
+	leaq	32(up), up
+	leaq	32(vp), vp
+
+	rcrq	%r11			C rotate, save acy
+	rcrq	%r10
+	rcrq	%r9
+	rcrq	%r8
+
+	rcrq	%rbx
+	movq	%rbx, (rp)
+	movq	%r8, 8(rp)
+	movq	%r9, 16(rp)
+	movq	%r10, 24(rp)
+	movq	%r11, %rbx
+
+	leaq	32(rp), rp
+	decq	n
+	jne	L(oop)
+
+L(end):	movq	%rbx, (rp)
+	popq	%rbx
+	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86_64/rshift.asm b/gmp/mpn/x86_64/rshift.asm
index 3f344f1dfc..8979d29ea8 100644
--- a/gmp/mpn/x86_64/rshift.asm
+++ b/gmp/mpn/x86_64/rshift.asm
@@ -1,44 +1,31 @@
-dnl  AMD64 mpn_rshift -- mpn right shift.
+dnl  AMD64 mpn_rshift -- mpn left shift.
 
-dnl  Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2.375
-C AMD K10	 2.375
-C Intel P4	 8
-C Intel core2	 2.11
-C Intel corei	 ?
-C Intel atom	 5.75
-C VIA nano	 3.5
+C K8,K9:	 2.375
+C K10:		 2.375
+C P4:		 8
+C P6-15 (Core2): 2.11
+C P6-28 (Atom):	 5.75
 
 
 C INPUT PARAMETERS
@@ -47,18 +34,14 @@ define(`up',	`%rsi')
 define(`n',	`%rdx')
 define(`cnt',	`%rcx')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(32)
 PROLOGUE(mpn_rshift)
-	FUNC_ENTRY(4)
-	neg	R32(%rcx)		C put rsh count in cl
+	neg	%ecx			C put rsh count in cl
 	mov	(up), %rax
-	shl	R8(%rcx), %rax		C function return value
-	neg	R32(%rcx)		C put lsh count in cl
+	shl	%cl, %rax		C function return value
+	neg	%ecx			C put lsh count in cl
 
 	lea	1(n), R32(%r8)
 
@@ -73,10 +56,10 @@ PROLOGUE(mpn_rshift)
 	jne	L(1)
 C	n = 4, 8, 12, ...
 	mov	8(up,n,8), %r10
-	shr	R8(%rcx), %r10
-	neg	R32(%rcx)		C put rsh count in cl
+	shr	%cl, %r10
+	neg	%ecx			C put rsh count in cl
 	mov	16(up,n,8), %r8
-	shl	R8(%rcx), %r8
+	shl	%cl, %r8
 	or	%r8, %r10
 	mov	%r10, 8(rp,n,8)
 	inc	n
@@ -86,91 +69,90 @@ L(1):	dec	R32(%r8)
 	je	L(1x)			C jump for n = 1, 5, 9, 13, ...
 C	n = 2, 6, 10, 16, ...
 	mov	8(up,n,8), %r10
-	shr	R8(%rcx), %r10
-	neg	R32(%rcx)		C put rsh count in cl
+	shr	%cl, %r10
+	neg	%ecx			C put rsh count in cl
 	mov	16(up,n,8), %r8
-	shl	R8(%rcx), %r8
+	shl	%cl, %r8
 	or	%r8, %r10
 	mov	%r10, 8(rp,n,8)
 	inc	n
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 L(1x):
 	cmp	$-1, n
 	je	L(ast)
 	mov	8(up,n,8), %r10
-	shr	R8(%rcx), %r10
+	shr	%cl, %r10
 	mov	16(up,n,8), %r11
-	shr	R8(%rcx), %r11
-	neg	R32(%rcx)		C put rsh count in cl
+	shr	%cl, %r11
+	neg	%ecx			C put rsh count in cl
 	mov	16(up,n,8), %r8
 	mov	24(up,n,8), %r9
-	shl	R8(%rcx), %r8
+	shl	%cl, %r8
 	or	%r8, %r10
-	shl	R8(%rcx), %r9
+	shl	%cl, %r9
 	or	%r9, %r11
 	mov	%r10, 8(rp,n,8)
 	mov	%r11, 16(rp,n,8)
 	add	$2, n
 
-L(rll):	neg	R32(%rcx)		C put lsh count in cl
+L(rll):	neg	%ecx			C put lsh count in cl
 L(rlx):	mov	8(up,n,8), %r10
-	shr	R8(%rcx), %r10
+	shr	%cl, %r10
 	mov	16(up,n,8), %r11
-	shr	R8(%rcx), %r11
+	shr	%cl, %r11
 
 	add	$4, n			C				      4
 	jb	L(end)			C				      2
 	ALIGN(16)
 L(top):
 	C finish stuff from lsh block
-	neg	R32(%rcx)		C put rsh count in cl
+	neg	%ecx			C put rsh count in cl
 	mov	-16(up,n,8), %r8
 	mov	-8(up,n,8), %r9
-	shl	R8(%rcx), %r8
+	shl	%cl, %r8
 	or	%r8, %r10
-	shl	R8(%rcx), %r9
+	shl	%cl, %r9
 	or	%r9, %r11
 	mov	%r10, -24(rp,n,8)
 	mov	%r11, -16(rp,n,8)
 	C start two new rsh
 	mov	(up,n,8), %r8
 	mov	8(up,n,8), %r9
-	shl	R8(%rcx), %r8
-	shl	R8(%rcx), %r9
+	shl	%cl, %r8
+	shl	%cl, %r9
 
 	C finish stuff from rsh block
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 	mov	-8(up,n,8), %r10
 	mov	0(up,n,8), %r11
-	shr	R8(%rcx), %r10
+	shr	%cl, %r10
 	or	%r10, %r8
-	shr	R8(%rcx), %r11
+	shr	%cl, %r11
 	or	%r11, %r9
 	mov	%r8, -8(rp,n,8)
 	mov	%r9, 0(rp,n,8)
 	C start two new lsh
 	mov	8(up,n,8), %r10
 	mov	16(up,n,8), %r11
-	shr	R8(%rcx), %r10
-	shr	R8(%rcx), %r11
+	shr	%cl, %r10
+	shr	%cl, %r11
 
 	add	$4, n
 	jae	L(top)			C				      2
 L(end):
-	neg	R32(%rcx)		C put rsh count in cl
-	mov	-8(up), %r8
-	shl	R8(%rcx), %r8
+	neg	%ecx			C put rsh count in cl
+	mov	-16(up,n,8), %r8
+	shl	%cl, %r8
 	or	%r8, %r10
-	mov	(up), %r9
-	shl	R8(%rcx), %r9
+	mov	-8(up,n,8), %r9
+	shl	%cl, %r9
 	or	%r9, %r11
-	mov	%r10, -16(rp)
-	mov	%r11, -8(rp)
+	mov	%r10, -24(rp,n,8)
+	mov	%r11, -16(rp,n,8)
 
-	neg	R32(%rcx)		C put lsh count in cl
+	neg	%ecx			C put lsh count in cl
 L(ast):	mov	(up), %r10
-	shr	R8(%rcx), %r10
+	shr	%cl, %r10
 	mov	%r10, (rp)
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/sec_tabselect.asm b/gmp/mpn/x86_64/sec_tabselect.asm
deleted file mode 100644
index e8aed261ef..0000000000
--- a/gmp/mpn/x86_64/sec_tabselect.asm
+++ /dev/null
@@ -1,176 +0,0 @@
-dnl  AMD64 mpn_sec_tabselect.
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C	     cycles/limb          good for cpu
-C AMD K8,K9	 1.5			Y
-C AMD K10	 1.4
-C AMD bd1	 2.64
-C AMD bobcat	 2.15			Y
-C Intel P4	 4
-C Intel core2	 1.38
-C Intel NHM	 1.75
-C Intel SBR	 1.25
-C Intel atom	 2.5			Y
-C VIA nano	 1.75			Y
-
-C NOTES
-C  * This has not been tuned for any specific processor.  Its speed should not
-C    be too bad, though.
-C  * Using SSE2/AVX2 could result in many-fold speedup.
-C  * WORKS FOR n mod 4 = 0 ONLY!
-
-C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
-define(`rp',     `%rdi')
-define(`tp',     `%rsi')
-define(`n',      `%rdx')
-define(`nents',  `%rcx')
-define(`which',  `%r8')
-
-define(`i',      `%rbp')
-define(`j',      `%r9')
-
-C rax  rbx  rcx  rdx  rdi  rsi  rbp   r8   r9  r10  r11  r12  r13  r14  r15
-C          nents  n   rp   tab   i   which j    *    *    *    *    *    *
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
-	FUNC_ENTRY(4)
-IFDOS(`	mov	56(%rsp), %r8d	')
-
-	push	%rbx
-	push	%rbp
-	push	%r12
-	push	%r13
-	push	%r14
-	push	%r15
-
-	mov	n, j
-	add	$-4, j
-	js	L(outer_end)
-
-L(outer_top):
-	mov	nents, i
-	push	tp
-	xor	R32(%r12), R32(%r12)
-	xor	R32(%r13), R32(%r13)
-	xor	R32(%r14), R32(%r14)
-	xor	R32(%r15), R32(%r15)
-	mov	which, %rbx
-
-	ALIGN(16)
-L(top):	sub	$1, %rbx
-	sbb	%rax, %rax
-	mov	0(tp), %r10
-	mov	8(tp), %r11
-	and	%rax, %r10
-	and	%rax, %r11
-	or	%r10, %r12
-	or	%r11, %r13
-	mov	16(tp), %r10
-	mov	24(tp), %r11
-	and	%rax, %r10
-	and	%rax, %r11
-	or	%r10, %r14
-	or	%r11, %r15
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(top)
-
-	mov	%r12, 0(rp)
-	mov	%r13, 8(rp)
-	mov	%r14, 16(rp)
-	mov	%r15, 24(rp)
-	pop	tp
-	lea	32(tp), tp
-	lea	32(rp), rp
-	add	$-4, j
-	jns	L(outer_top)
-L(outer_end):
-
-	test	$2, R8(n)
-	jz	L(b0x)
-L(b1x):	mov	nents, i
-	push	tp
-	xor	R32(%r12), R32(%r12)
-	xor	R32(%r13), R32(%r13)
-	mov	which, %rbx
-	ALIGN(16)
-L(tp2):	sub	$1, %rbx
-	sbb	%rax, %rax
-	mov	0(tp), %r10
-	mov	8(tp), %r11
-	and	%rax, %r10
-	and	%rax, %r11
-	or	%r10, %r12
-	or	%r11, %r13
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(tp2)
-	mov	%r12, 0(rp)
-	mov	%r13, 8(rp)
-	pop	tp
-	lea	16(tp), tp
-	lea	16(rp), rp
-
-L(b0x):	test	$1, R8(n)
-	jz	L(b00)
-L(b01):	mov	nents, i
-	xor	R32(%r12), R32(%r12)
-	mov	which, %rbx
-	ALIGN(16)
-L(tp1):	sub	$1, %rbx
-	sbb	%rax, %rax
-	mov	0(tp), %r10
-	and	%rax, %r10
-	or	%r10, %r12
-	lea	(tp,n,8), tp
-	add	$-1, i
-	jne	L(tp1)
-	mov	%r12, 0(rp)
-
-L(b00):	pop	%r15
-	pop	%r14
-	pop	%r13
-	pop	%r12
-	pop	%rbp
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/k8/sqr_basecase.asm b/gmp/mpn/x86_64/sqr_basecase.asm
index 60cf945a46..3ed4be1269 100644
--- a/gmp/mpn/x86_64/k8/sqr_basecase.asm
+++ b/gmp/mpn/x86_64/sqr_basecase.asm
@@ -2,33 +2,22 @@ dnl  AMD64 mpn_sqr_basecase.
 
 dnl  Contributed to the GNU project by Torbjorn Granlund.
 
-dnl  Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2008, 2009 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -36,21 +25,14 @@ C The inner loops of this code are the result of running a code generation and
 C optimization tool suite written by David Harvey and Torbjorn Granlund.
 
 C NOTES
-C   * There is a major stupidity in that we call mpn_mul_1 initially, for a
-C     large trip count.  Instead, we should follow the generic/sqr_basecase.c
-C     code which uses addmul_2s from the start, conditionally leaving a 1x1
-C     multiply to the end.  (In assembly code, one would stop invoking
-C     addmul_2s loops when perhaps 3x2s respectively a 2x2s remains.)
-C   * Another stupidity is in the sqr_diag_addlsh1 code.  It does not need to
-C     save/restore carry, instead it can propagate into the high product word.
-C   * Align more labels, should shave off a few cycles.
-C   * We can safely use 32-bit size operations, since operands with (2^32)
-C     limbs will lead to non-termination in practice.
+C   * This code only handles operands up to SQR_KARATSUBA_THRESHOLD_MAX.  That
+C     means we can safely use 32-bit operations for all sizes, unlike in e.g.,
+C     mpn_addmul_1.
 C   * The jump table could probably be optimized, at least for non-pic.
-C   * The special code for n <= 4 was quickly written.  It is probably too
+C   * The special code for n=1,2,3 was quickly written.  It is probably too
 C     large and unnecessarily slow.
-C   * Consider combining small cases code so that the n=k-1 code jumps into the
-C     middle of the n=k code.
+C   * Consider combining small cases code so that the n=k-1 code jumps into
+C     the middle of the n=k code.
 C   * Avoid saving registers for small cases code.
 C   * Needed variables:
 C    n   r11  input size
@@ -75,6 +57,12 @@ define(`rp',	  `%rdi')
 define(`up',	  `%rsi')
 define(`n_param', `%rdx')
 
+C We should really trim this, for better spatial locality.  Alternatively,
+C we could grab the upper part of the stack area, leaving the lower part
+C instead of the upper part unused.
+define(`SQR_KARATSUBA_THRESHOLD_MAX', 120)
+define(`STACK_ALLOC', eval(8*2*SQR_KARATSUBA_THRESHOLD_MAX))
+
 define(`n',	`%r11')
 define(`tp',	`%r12')
 define(`i',	`%r8')
@@ -86,137 +74,125 @@ define(`w1',	`%rcx')
 define(`w2',	`%rbp')
 define(`w3',	`%r10')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
+define(`SPECIAL_CODE_FOR_4',1)
+
 
 ASM_START()
 	TEXT
 	ALIGN(16)
-PROLOGUE(mpn_sqr_basecase)
-	FUNC_ENTRY(3)
-	mov	R32(n_param), R32(%rcx)
-	mov	R32(n_param), R32(n)		C free original n register (rdx)
 
-	add	$-40, %rsp
+PROLOGUE(mpn_sqr_basecase)
+	add	$-48, %rsp
+	mov	%rbx, 40(%rsp)
+	mov	%rbp, 32(%rsp)
+	mov	%r12, 24(%rsp)
+	mov	%r13, 16(%rsp)
+	mov	%r14, 8(%rsp)
 
+	mov	R32(n_param), R32(n)		C free original n register (rdx)
+	mov	R32(n_param), R32(%rcx)
 	and	$3, R32(%rcx)
+	lea	4(%rcx), %rbx
 	cmp	$4, R32(n_param)
-	lea	4(%rcx), %r8
-
-	mov	%rbx, 32(%rsp)
-	mov	%rbp, 24(%rsp)
-	mov	%r12, 16(%rsp)
-	mov	%r13, 8(%rsp)
-	mov	%r14, (%rsp)
-
-	cmovg	%r8, %rcx
-
-	lea	L(tab)(%rip), %rax
-ifdef(`PIC',
-`	movslq	(%rax,%rcx,4), %r10
-	add	%r10, %rax
-	jmp	*%rax
-',`
+	cmovg	%rbx, %rcx
+	lea	L(jmptab)(%rip), %rax
 	jmp	*(%rax,%rcx,8)
-')
 	JUMPTABSECT
 	ALIGN(8)
-L(tab):	JMPENT(	L(4), L(tab))
-	JMPENT(	L(1), L(tab))
-	JMPENT(	L(2), L(tab))
-	JMPENT(	L(3), L(tab))
-	JMPENT(	L(0m4), L(tab))
-	JMPENT(	L(1m4), L(tab))
-	JMPENT(	L(2m4), L(tab))
-	JMPENT(	L(3m4), L(tab))
+L(jmptab):
+	.quad	L(4)
+	.quad	L(1)
+	.quad	L(2)
+	.quad	L(3)
+	.quad	L(0m4)
+	.quad	L(1m4)
+	.quad	L(2m4)
+	.quad	L(3m4)
 	TEXT
 
 L(1):	mov	(up), %rax
 	mul	%rax
-	add	$40, %rsp
 	mov	%rax, (rp)
 	mov	%rdx, 8(rp)
-	FUNC_EXIT()
+	add	$40, %rsp
+	pop	%rbx
 	ret
 
 L(2):	mov	(up), %rax
-	mov	%rax, %r8
 	mul	%rax
-	mov	8(up), %r11
 	mov	%rax, (rp)
-	mov	%r11, %rax
 	mov	%rdx, %r9
+	mov	8(up), %rax
 	mul	%rax
-	add	$40, %rsp
 	mov	%rax, %r10
-	mov	%r11, %rax
 	mov	%rdx, %r11
-	mul	%r8
-	xor	%r8, %r8
+	mov	8(up), %rax
+	mov	(up), %rbx
+	mul	%rbx
 	add	%rax, %r9
 	adc	%rdx, %r10
-	adc	%r8, %r11
+	adc	$0, %r11
 	add	%rax, %r9
 	mov	%r9, 8(rp)
 	adc	%rdx, %r10
 	mov	%r10, 16(rp)
-	adc	%r8, %r11
+	adc	$0, %r11
 	mov	%r11, 24(rp)
-	FUNC_EXIT()
+	add	$40, %rsp
+	pop	%rbx
 	ret
 
 L(3):	mov	(up), %rax
-	mov	%rax, %r10
 	mul	%rax
-	mov	8(up), %r11
 	mov	%rax, (rp)
-	mov	%r11, %rax
 	mov	%rdx, 8(rp)
+	mov	8(up), %rax
 	mul	%rax
-	mov	16(up), %rcx
 	mov	%rax, 16(rp)
-	mov	%rcx, %rax
 	mov	%rdx, 24(rp)
+	mov	16(up), %rax
 	mul	%rax
 	mov	%rax, 32(rp)
 	mov	%rdx, 40(rp)
 
-	mov	%r11, %rax
-	mul	%r10
+	mov	(up), %rbx
+	mov	8(up), %rax
+	mul	%rbx
 	mov	%rax, %r8
-	mov	%rcx, %rax
 	mov	%rdx, %r9
-	mul	%r10
-	xor	%r10, %r10
+	mov	16(up), %rax
+	mul	%rbx
+	xor	R32(%r10), R32(%r10)
 	add	%rax, %r9
-	mov	%r11, %rax
-	mov	%r10, %r11
 	adc	%rdx, %r10
 
-	mul	%rcx
-	add	$40, %rsp
+	mov	8(up), %rbx
+	mov	16(up), %rax
+	mul	%rbx
+	xor	R32(%r11), R32(%r11)
 	add	%rax, %r10
-	adc	%r11, %rdx
+	adc	%rdx, %r11
 	add	%r8, %r8
 	adc	%r9, %r9
 	adc	%r10, %r10
-	adc	%rdx, %rdx
 	adc	%r11, %r11
+	mov	$0, R32(%rbx)
+	adc	%rbx, %rbx
 	add	%r8, 8(rp)
 	adc	%r9, 16(rp)
 	adc	%r10, 24(rp)
-	adc	%rdx, 32(rp)
-	adc	%r11, 40(rp)
-	FUNC_EXIT()
+	adc	%r11, 32(rp)
+	adc	%rbx, 40(rp)
+	add	$40, %rsp
+	pop	%rbx
 	ret
 
+ifdef(`SPECIAL_CODE_FOR_4',`
 L(4):	mov	(up), %rax
-	mov	%rax, %r11
 	mul	%rax
-	mov	8(up), %rbx
 	mov	%rax, (rp)
-	mov	%rbx, %rax
 	mov	%rdx, 8(rp)
+	mov	8(up), %rax
 	mul	%rax
 	mov	%rax, 16(rp)
 	mov	%rdx, 24(rp)
@@ -227,71 +203,77 @@ L(4):	mov	(up), %rax
 	mov	24(up), %rax
 	mul	%rax
 	mov	%rax, 48(rp)
-	mov	%rbx, %rax
 	mov	%rdx, 56(rp)
 
-	mul	%r11
-	add	$32, %rsp
+	mov	(up), %rbx
+	mov	8(up), %rax
+	mul	%rbx
 	mov	%rax, %r8
 	mov	%rdx, %r9
 	mov	16(up), %rax
-	mul	%r11
-	xor	%r10, %r10
+	mul	%rbx
+	xor	R32(%r10), R32(%r10)
 	add	%rax, %r9
 	adc	%rdx, %r10
 	mov	24(up), %rax
-	mul	%r11
-	xor	%r11, %r11
+	mul	%rbx
+	xor	R32(%r11), R32(%r11)
 	add	%rax, %r10
 	adc	%rdx, %r11
+	mov	8(up), %rbx
 	mov	16(up), %rax
 	mul	%rbx
-	xor	%rcx, %rcx
+	xor	R32(%r12), R32(%r12)
 	add	%rax, %r10
 	adc	%rdx, %r11
-	adc	$0, %rcx
+	adc	$0, %r12
 	mov	24(up), %rax
 	mul	%rbx
-	pop	%rbx
 	add	%rax, %r11
-	adc	%rdx, %rcx
-	mov	16(up), %rdx
+	adc	%rdx, %r12
+	mov	16(up), %rbx
 	mov	24(up), %rax
-	mul	%rdx
-	add	%rax, %rcx
-	adc	$0, %rdx
+	mul	%rbx
+	xor	R32(%rbp), R32(%rbp)
+	add	%rax, %r12
+	adc	%rdx, %rbp
 
 	add	%r8, %r8
 	adc	%r9, %r9
 	adc	%r10, %r10
 	adc	%r11, %r11
-	adc	%rcx, %rcx
-	mov	$0, R32(%rax)
-	adc	%rdx, %rdx
+	adc	%r12, %r12
+	mov	$0, R32(%rbx)
+	adc	%rbp, %rbp
 
-	adc	%rax, %rax
+	adc	%rbx, %rbx
 	add	%r8, 8(rp)
 	adc	%r9, 16(rp)
 	adc	%r10, 24(rp)
 	adc	%r11, 32(rp)
-	adc	%rcx, 40(rp)
-	adc	%rdx, 48(rp)
-	adc	%rax, 56(rp)
-	FUNC_EXIT()
+	adc	%r12, 40(rp)
+	adc	%rbp, 48(rp)
+	adc	%rbx, 56(rp)
+	add	$24, %rsp
+	pop	%r12
+	pop	%rbp
+	pop	%rbx
 	ret
+')
 
-
-L(0m4):
-	lea	-16(rp,n,8), tp		C point tp in middle of result operand
-	mov	(up), v0
-	mov	8(up), %rax
+L(0m4):	add	$-STACK_ALLOC, %rsp
+	lea	(%rsp,n,8), tp		C point tp in middle of result operand
 	lea	(up,n,8), up		C point up at end of input operand
 
-	lea	-4(n), i
+	lea	-1(n), i
 C Function mpn_mul_1_m3(tp, up - i, i, up[-i - 1])
-	xor	R32(j), R32(j)
-	sub	n, j
+	mov	$-1, j
+	sub	i, j
+
+	lea	-24(tp), tp		C offset FIXME
 
+	mov	(up,j,8), v0
+	mov	8(up,j,8), %rax
 	mul	v0
 	xor	R32(w2), R32(w2)
 	mov	%rax, w0
@@ -333,28 +315,31 @@ L(L3):	xor	R32(w1), R32(w1)
 	adc	%rdx, w1
 	mov	w2, 8(tp)
 	mov	w1, 16(tp)
-
-	lea	eval(2*8)(tp), tp	C tp += 2
-	lea	-8(up), up
+	lea	eval(24+2*8)(tp), tp	C tp += 2, undo offset FIXME
+ifdef(`SPECIAL_CODE_FOR_4',`',`
+	cmp	$3, R32(i)
+	je	L(last)
+')
 	jmp	L(dowhile)
 
+L(1m4):	add	$-STACK_ALLOC, %rsp
+	lea	(%rsp,n,8), tp		C point tp in middle of result operand
+	lea	(up,n,8), up		C point up at end of input operand
 
-L(1m4):
-	lea	8(rp,n,8), tp		C point tp in middle of result operand
-	mov	(up), v0		C u0
-	mov	8(up), %rax		C u1
-	lea	8(up,n,8), up		C point up at end of input operand
-
-	lea	-3(n), i
+	lea	(n), i
 C Function mpn_mul_2s_m0(tp, up - i, i, up - i - 1)
-	lea	-3(n), j
-	neg	j
+	mov	$3, R32(j)
+	sub	i, j
+
+	lea	8(up), up		C offset FIXME
 
-	mov	%rax, v1		C u1
+	mov	-32(up,j,8), v0		C u0
+	mov	-24(up,j,8), v1		C u1
+	mov	-24(up,j,8), %rax	C u1
 	mul	v0			C u0 * u1
 	mov	%rdx, w1
 	xor	R32(w2), R32(w2)
-	mov	%rax, 8(rp)
+	mov	%rax, -24(tp,j,8)
 	jmp	L(m0)
 
 	ALIGN(16)
@@ -396,7 +381,7 @@ L(m0):	mov	-16(up,j,8), %rax	C u2, u6 ...
 	add	%rax, w3
 	mov	w2, -8(tp,j,8)
 	adc	%rdx, w0
-L(m2x):	mov	(up,j,8), %rax
+	mov	(up,j,8), %rax
 	mul	v0
 	add	%rax, w3
 	adc	%rdx, w0
@@ -412,22 +397,28 @@ L(m2x):	mov	(up,j,8), %rax
 	mov	w0, -8(tp)
 	mov	w1, (tp)
 
-	lea	-16(up), up
-	lea	eval(3*8-24)(tp), tp	C tp += 3
-	jmp	L(dowhile_end)
+	lea	-8(up), up		C undo offset FIXME
+	lea	eval(3*8)(tp), tp	C tp += 3
+	add	$-2, R32(i)		C i -= 2
+	cmp	$3, R32(i)
+	je	L(last)
+	jmp	L(dowhile)
 
 
-L(2m4):
-	lea	-16(rp,n,8), tp		C point tp in middle of result operand
-	mov	(up), v0
-	mov	8(up), %rax
+
+L(2m4):	add	$-STACK_ALLOC, %rsp
+	lea	(%rsp,n,8), tp		C point tp in middle of result operand
 	lea	(up,n,8), up		C point up at end of input operand
 
-	lea	-4(n), i
+	lea	-1(n), i
 C Function mpn_mul_1_m1(tp, up - (i - 1), i - 1, up[-i])
-	lea	-2(n), j
-	neg	j
+	mov	$1, R32(j)
+	sub	i, j
+
+	lea	-24(tp), tp		C offset FIXME
 
+	mov	-16(up,j,8), v0
+	mov	-8(up,j,8), %rax
 	mul	v0
 	mov	%rax, w2
 	mov	(up,j,8), %rax
@@ -469,28 +460,30 @@ L(L1):	xor	R32(w0), R32(w0)
 	mov	w2, 8(tp)
 	mov	w1, 16(tp)
 
-	lea	eval(2*8)(tp), tp	C tp += 2
-	lea	-8(up), up
+	lea	eval(24+2*8)(tp), tp	C tp += 2, undo offset FIXME
 	jmp	L(dowhile_mid)
 
 
-L(3m4):
-	lea	8(rp,n,8), tp		C point tp in middle of result operand
-	mov	(up), v0		C u0
-	mov	8(up), %rax		C u1
-	lea	8(up,n,8), up		C point up at end of input operand
 
-	lea	-5(n), i
+L(3m4):	add	$-STACK_ALLOC, %rsp
+	lea	(%rsp,n,8), tp		C point tp in middle of result operand
+	lea	(up,n,8), up		C point up at end of input operand
+
+	lea	(n), i
 C Function mpn_mul_2s_m2(tp, up - i + 1, i - 1, up - i)
-	lea	-1(n), j
-	neg	j
+	mov	$1, R32(j)
+	sub	i, j
 
-	mov	%rax, v1		C u1
-	mul	v0			C u0 * u1
+	lea	8(up), up		C offset FIXME
+
+	mov	-16(up,j,8), v0
+	mov	-8(up,j,8), v1
+	mov	-8(up,j,8), %rax
+	mul	v0			C v0 * u0
 	mov	%rdx, w3
 	xor	R32(w0), R32(w0)
 	xor	R32(w1), R32(w1)
-	mov	%rax, 8(rp)
+	mov	%rax, -8(tp,j,8)
 	jmp	L(m2)
 
 	ALIGN(16)
@@ -548,13 +541,18 @@ L(m2):	mov	(up,j,8), %rax
 	mov	w0, -8(tp)
 	mov	w1, (tp)
 
-	lea	-16(up), up
+	lea	-8(up), up		C undo offset FIXME
+	lea	eval(3*8)(tp), tp	C tp += 3
+	add	$-2, R32(i)		C i -= 2
 	jmp	L(dowhile_mid)
 
 L(dowhile):
 C Function mpn_addmul_2s_m2(tp, up - (i - 1), i - 1, up - i)
-	lea	4(i), j
-	neg	j
+	mov	$-1, j
+	sub	i, j
+
+	lea	-24(tp), tp		C offset FIXME
+	lea	-8(up), up		C offset FIXME
 
 	mov	16(up,j,8), v0
 	mov	24(up,j,8), v1
@@ -623,13 +621,18 @@ L(am2):	mov	32(up,j,8), %rax
 	mov	w1, 16(tp)
 
 	lea	eval(2*8)(tp), tp	C tp += 2
-
 	add	$-2, R32(i)		C i -= 2
 
+	lea	24(tp), tp		C undo offset FIXME
+	lea	8(up), up		C undo offset FIXME
+
 L(dowhile_mid):
 C Function mpn_addmul_2s_m0(tp, up - (i - 1), i - 1, up - i)
-	lea	2(i), j
-	neg	j
+	mov	$1, R32(j)
+	sub	i, j
+
+	lea	-24(tp), tp		C offset FIXME
+	lea	-8(up), up		C offset FIXME
 
 	mov	(up,j,8), v0
 	mov	8(up,j,8), v1
@@ -696,57 +699,74 @@ L(20):	mov	16(up,j,8), %rax
 	mov	w0, 8(tp)
 	mov	w1, 16(tp)
 
-	lea	eval(2*8)(tp), tp	C tp += 2
-L(dowhile_end):
+	lea	24(tp), tp		C undo offset FIXME
+	lea	8(up), up		C undo offset FIXME
 
+	lea	eval(2*8)(tp), tp	C tp += 2
 	add	$-2, R32(i)		C i -= 2
+
+	cmp	$3, R32(i)
 	jne	L(dowhile)
 
+L(last):
+
 C Function mpn_addmul_2s_2
-	mov	-16(up), v0
-	mov	-8(up), v1
-	mov	-8(up), %rax
+	mov	-24(up), v0
+	mov	-16(up), v1
+	mov	-16(up), %rax
 	mul	v0
 	xor	R32(w3), R32(w3)
-	add	%rax, -8(tp)
+	add	%rax, -32(tp)
 	adc	%rdx, w3
 	xor	R32(w0), R32(w0)
 	xor	R32(w1), R32(w1)
-	mov	(up), %rax
+	mov	-8(up), %rax
 	mul	v0
 	add	%rax, w3
-	mov	(up), %rax
+	mov	-8(up), %rax
 	adc	%rdx, w0
 	mul	v1
-	add	w3, (tp)
+	add	w3, -24(tp)
 	adc	%rax, w0
 	adc	%rdx, w1
-	mov	w0, 8(tp)
-	mov	w1, 16(tp)
+	mov	w0, -16(tp)
+	mov	w1, -8(tp)
 
 C Function mpn_sqr_diag_addlsh1
-	lea	-4(n,n), j
+	mov	R32(n), R32(j)
+	shl	$3, n
+	sub	n, up
+
+	mov	(%rsp), %r11
 
-	mov	8(rp), %r11
-	lea	-8(up), up
-	lea	(rp,j,8), rp
+	bt	$0, j
+	lea	-4(j,j),j
+	jc	L(odd)
+
+L(evn):	lea	(rp,j,8), rp
+	lea	(up,j,4), up
+	lea	8(%rsp,j,8), tp
 	neg	j
-	mov	(up,j,4), %rax
-	mul	%rax
-	test	$2, R8(j)
-	jnz	L(odd)
 
-L(evn):	add	%r11, %r11
+	add	%r11, %r11
 	sbb	R32(%rbx), R32(%rbx)		C save CF
+	mov	(up,j,4), %rax
+	mul	%rax
 	add	%rdx, %r11
 	mov	%rax, (rp,j,8)
 	jmp	L(d0)
 
-L(odd):	add	%r11, %r11
+L(odd):	lea	-16(rp,j,8), rp
+	lea	-8(up,j,4), up
+	lea	-8(%rsp,j,8), tp
+	neg	j
+
+	add	%r11, %r11
 	sbb	R32(%rbp), R32(%rbp)		C save CF
+	mov	8(up,j,4), %rax
+	mul	%rax
 	add	%rdx, %r11
-	mov	%rax, (rp,j,8)
-	lea	-2(j), j
+	mov	%rax, 16(rp,j,8)
 	jmp	L(d1)
 
 	ALIGN(16)
@@ -757,9 +777,9 @@ L(top):	mov	(up,j,4), %rax
 	adc	%rdx, %r11
 	mov	%r10, (rp,j,8)
 L(d0):	mov	%r11, 8(rp,j,8)
-	mov	16(rp,j,8), %r10
+	mov	(tp,j,8), %r10
 	adc	%r10, %r10
-	mov	24(rp,j,8), %r11
+	mov	8(tp,j,8), %r11
 	adc	%r11, %r11
 	nop
 	sbb	R32(%rbp), R32(%rbp)		C save CF
@@ -770,38 +790,38 @@ L(d0):	mov	%r11, 8(rp,j,8)
 	adc	%rdx, %r11
 	mov	%r10, 16(rp,j,8)
 L(d1):	mov	%r11, 24(rp,j,8)
-	mov	32(rp,j,8), %r10
+	mov	16(tp,j,8), %r10
 	adc	%r10, %r10
-	mov	40(rp,j,8), %r11
+	mov	24(tp,j,8), %r11
 	adc	%r11, %r11
 	sbb	R32(%rbx), R32(%rbx)		C save CF
 	add	$4, j
 	js	L(top)
 
-	mov	(up), %rax
+L(end):	mov	(up,j,4), %rax
 	mul	%rax
 	add	R32(%rbp), R32(%rbp)		C restore carry
 	adc	%rax, %r10
 	adc	%rdx, %r11
-	mov	%r10, (rp)
-	mov	%r11, 8(rp)
-	mov	16(rp), %r10
+	mov	%r10, (rp,j,8)
+	mov	%r11, 8(rp,j,8)
+	mov	(tp,j,8), %r10
 	adc	%r10, %r10
 	sbb	R32(%rbp), R32(%rbp)		C save CF
 	neg	R32(%rbp)
-	mov	8(up), %rax
+	mov	8(up,j,4), %rax
 	mul	%rax
 	add	R32(%rbx), R32(%rbx)		C restore carry
 	adc	%rax, %r10
 	adc	%rbp, %rdx
-	mov	%r10, 16(rp)
-	mov	%rdx, 24(rp)
+	mov	%r10, 16(rp,j,8)
+	mov	%rdx, 24(rp,j,8)
 
+	add	$eval(8+STACK_ALLOC), %rsp
 	pop	%r14
 	pop	%r13
 	pop	%r12
 	pop	%rbp
 	pop	%rbx
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/sqr_diag_addlsh1.asm b/gmp/mpn/x86_64/sqr_diag_addlsh1.asm
deleted file mode 100644
index 4ad034c855..0000000000
--- a/gmp/mpn/x86_64/sqr_diag_addlsh1.asm
+++ /dev/null
@@ -1,116 +0,0 @@
-dnl  AMD64 mpn_sqr_diag_addlsh1
-
-dnl  Contributed to the GNU project by Torbjörn Granlund.
-
-dnl  Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C	     cycles/limb
-C AMD K8,K9	 2.5
-C AMD K10	 2.5
-C AMD bull	 3.6
-C AMD pile	 3.6
-C AMD steam	 ?
-C AMD bobcat	 4
-C AMD jaguar	 ?
-C Intel P4	 ?
-C Intel core	 4
-C Intel NHM	 3.6
-C Intel SBR	 3.15
-C Intel IBR	 3.2
-C Intel HWL	 2.6
-C Intel BWL	 ?
-C Intel atom	14
-C VIA nano	 3.5
-
-C When playing with pointers, set this to $2 to fall back to conservative
-C indexing in wind-down code.
-define(`I',`$1')
-
-define(`rp',     `%rdi')
-define(`tp',     `%rsi')
-define(`up_arg', `%rdx')
-define(`n',      `%rcx')
-
-define(`up',     `%r11')
-
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
-ASM_START()
-	TEXT
-	ALIGN(32)
-PROLOGUE(mpn_sqr_diag_addlsh1)
-	FUNC_ENTRY(4)
-	push	%rbx
-
-	dec	n
-	shl	n
-
-	mov	(up_arg), %rax
-
-	lea	(rp,n,8), rp
-	lea	(tp,n,8), tp
-	lea	(up_arg,n,4), up
-	neg	n
-
-	mul	%rax
-	mov	%rax, (rp,n,8)
-
-	xor	R32(%rbx), R32(%rbx)
-	jmp	L(mid)
-
-	ALIGN(16)
-L(top):	add	%r10, %r8
-	adc	%rax, %r9
-	mov	%r8, -8(rp,n,8)
-	mov	%r9, (rp,n,8)
-L(mid):	mov	8(up,n,4), %rax
-	mov	(tp,n,8), %r8
-	mov	8(tp,n,8), %r9
-	adc	%r8, %r8
-	adc	%r9, %r9
-	lea	(%rdx,%rbx), %r10
-	setc	R8(%rbx)
-	mul	%rax
-	add	$2, n
-	js	L(top)
-
-L(end):	add	%r10, %r8
-	adc	%rax, %r9
-	mov	%r8, I(-8(rp),-8(rp,n,8))
-	mov	%r9, I((rp),(rp,n,8))
-	adc	%rbx, %rdx
-	mov	%rdx, I(8(rp),8(rp,n,8))
-
-	pop	%rbx
-	FUNC_EXIT()
-	ret
-EPILOGUE()
diff --git a/gmp/mpn/x86_64/sublsh1_n.asm b/gmp/mpn/x86_64/sublsh1_n.asm
index c6d829fcb2..a943ed1579 100644
--- a/gmp/mpn/x86_64/sublsh1_n.asm
+++ b/gmp/mpn/x86_64/sublsh1_n.asm
@@ -1,44 +1,31 @@
 dnl  AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
 
-dnl  Copyright 2003, 2005-2007, 2011, 2012 Free Software Foundation, Inc.
+dnl  Copyright 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
 
 dnl  This file is part of the GNU MP Library.
-dnl
+
 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
-dnl
-dnl  or both in parallel, as here.
-dnl
+dnl  it under the terms of the GNU Lesser General Public License as published
+dnl  by the Free Software Foundation; either version 3 of the License, or (at
+dnl  your option) any later version.
+
 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
-dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+dnl  License for more details.
+
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
 C	     cycles/limb
-C AMD K8,K9	 2.2
-C AMD K10	 2.2
-C Intel P4	12.75
-C Intel core2	 3.45
-C Intel corei	 ?
-C Intel atom	 ?
-C VIA nano	 3.25
+C K8,K9:	 2.2
+C K10:		 2.2
+C P4:		12.75
+C P6-15:	 3.45
+
 
 C Sometimes speed degenerates, supposedly related to that some operand
 C alignments cause cache conflicts.
@@ -52,14 +39,10 @@ define(`up',`%rsi')
 define(`vp',`%rdx')
 define(`n', `%rcx')
 
-ABI_SUPPORT(DOS64)
-ABI_SUPPORT(STD64)
-
 ASM_START()
 	TEXT
 	ALIGN(16)
 PROLOGUE(mpn_sublsh1_n)
-	FUNC_ENTRY(4)
 	push	%rbx
 	push	%rbp
 
@@ -119,7 +102,7 @@ L(b01):	add	%r8, %r8
 L(ent):	jns	L(end)
 
 	ALIGN(16)
-L(top):	add	R32(%rax), R32(%rax)	C restore scy
+L(oop):	add	R32(%rax), R32(%rax)	C restore scy
 
 	mov	(vp,n,8), %r8
 L(b00):	adc	%r8, %r8
@@ -148,13 +131,12 @@ L(b00):	adc	%r8, %r8
 
 	sbb	R32(%rbp), R32(%rbp)	C save acy
 	add	$4, n
-	js	L(top)
+	js	L(oop)
 
 L(end):	add	R32(%rbp), R32(%rax)
 	neg	R32(%rax)
 
 	pop	%rbp
 	pop	%rbx
-	FUNC_EXIT()
 	ret
 EPILOGUE()
diff --git a/gmp/mpn/x86_64/x86_64-defs.m4 b/gmp/mpn/x86_64/x86_64-defs.m4
index 366598b41d..fc296c2a1e 100644
--- a/gmp/mpn/x86_64/x86_64-defs.m4
+++ b/gmp/mpn/x86_64/x86_64-defs.m4
@@ -2,78 +2,30 @@ divert(-1)
 
 dnl  m4 macros for amd64 assembler.
 
-dnl  Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
+dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software
+dnl  Foundation, Inc.
 dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
-dnl  Usage: CPUVEC_FUNCS_LIST
+dnl  Notes:
 dnl
-dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
-dnl  order they appear in that structure.
-
-define(CPUVEC_FUNCS_LIST,
-``add_n',
-`addlsh1_n',
-`addlsh2_n',
-`addmul_1',
-`addmul_2',
-`bdiv_dbm1c',
-`cnd_add_n',
-`cnd_sub_n',
-`com',
-`copyd',
-`copyi',
-`divexact_1',
-`divrem_1',
-`gcd_1',
-`lshift',
-`lshiftc',
-`mod_1',
-`mod_1_1p',
-`mod_1_1p_cps',
-`mod_1s_2p',
-`mod_1s_2p_cps',
-`mod_1s_4p',
-`mod_1s_4p_cps',
-`mod_34lsub1',
-`modexact_1c_odd',
-`mul_1',
-`mul_basecase',
-`mullo_basecase',
-`preinv_divrem_1',
-`preinv_mod_1',
-`redc_1',
-`redc_2',
-`rshift',
-`sqr_basecase',
-`sub_n',
-`sublsh1_n',
-`submul_1'')
+dnl  The 32-bit mode x86/x86-defs.m4 has various 32bit-isms, like the
+dnl  profiling calls, so it seems cleanest to start a fresh set of defines
+dnl  for 64-bit mode.
 
 
 dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
@@ -135,11 +87,8 @@ define(`ASSERT_counter',incr(ASSERT_counter))')')')
 
 define(ASSERT_counter,1)
 
-define(`LEA',`dnl
-ifdef(`PIC',
-	`mov	$1@GOTPCREL(%rip), $2'
-,
-	`movabs	`$'$1, $2')
+define(`LEA',`
+	mov	$1@GOTPCREL(%rip), $2
 ')
 
 
@@ -192,163 +141,11 @@ define(`R8',
 dnl  Usage: CALL(funcname)
 dnl
 
-define(`CALL',`dnl
 ifdef(`PIC',
-	`call	GSYM_PREFIX`'$1@PLT'
-,
-	`call	GSYM_PREFIX`'$1'
-)')
+  `define(`CALL',`call	GSYM_PREFIX`'$1@PLT')',
+  `define(`CALL',`call	GSYM_PREFIX`'$1')')
 
 
 define(`JUMPTABSECT', `.section	.data.rel.ro.local,"aw",@progbits')
 
-
-dnl  Usage: JMPENT(targlabel,tablabel)
-
-define(`JMPENT',`dnl
-ifdef(`PIC',
-	`.long	$1-$2'
-,
-	`.quad	$1'
-)')
-
-
-dnl  These macros are defined just for DOS64, where they provide calling
-dnl  sequence glue code.
-
-define(`FUNC_ENTRY',`')
-define(`FUNC_EXIT',`')
-
-
-dnl  Target ABI macros.
-
-define(`IFDOS',   `')
-define(`IFSTD',   `$1')
-define(`IFELF',   `$1')
-
-
-dnl  Usage: PROTECT(symbol)
-dnl
-dnl  Used for private GMP symbols that should never be overridden by users.
-dnl  This can save reloc entries and improve shlib sharing as well as
-dnl  application startup times
-
-define(`PROTECT',  `.hidden $1')
-
-
-dnl  Usage: x86_lookup(target, key,value, key,value, ...)
-dnl
-dnl  Look for `target' among the `key' parameters.
-dnl
-dnl  x86_lookup expands to the corresponding `value', or generates an error
-dnl  if `target' isn't found.
-
-define(x86_lookup,
-m4_assert_numargs_range(1,999)
-`ifelse(eval($#<3),1,
-`m4_error(`unrecognised part of x86 instruction: $1
-')',
-`ifelse(`$1',`$2', `$3',
-`x86_lookup(`$1',shift(shift(shift($@))))')')')
-
-
-dnl  Usage: x86_opcode_regxmm(reg)
-dnl
-dnl  Validate the given xmm register, and return its number, 0 to 7.
-
-define(x86_opcode_regxmm,
-m4_assert_numargs(1)
-`x86_lookup(`$1',x86_opcode_regxmm_list)')
-
-define(x86_opcode_regxmm_list,
-``%xmm0',0,
-`%xmm1',1,
-`%xmm2',2,
-`%xmm3',3,
-`%xmm4',4,
-`%xmm5',5,
-`%xmm6',6,
-`%xmm7',7,
-`%xmm8',8,
-`%xmm9',9,
-`%xmm10',10,
-`%xmm11',11,
-`%xmm12',12,
-`%xmm13',13,
-`%xmm14',14,
-`%xmm15',15')
-
-dnl  Usage: palignr($imm,%srcreg,%dstreg)
-dnl
-dnl  Emit a palignr instruction, using a .byte sequence, since obsolete but
-dnl  still distributed versions of gas don't know SSSE3 instructions.
-
-define(`palignr',
-m4_assert_numargs(3)
-`.byte	0x66,dnl
-ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
-       `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
-0x0f,0x3a,0x0f,dnl
-eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
-substr($1,1)')
-
-
-dnl  Usage
-dnl
-dnl    regnum(op)   raw operand index (so slightly misnamed)
-dnl    regnumh(op)  high bit of register operand nimber
-dnl    ix(op)       0 for reg operand, 1 for plain pointer operand.
-dnl
-
-define(`regnum',`x86_lookup(`$1',oplist)')
-define(`regnumh',`eval(regnum($1)/8 & 1)')
-define(`ix',`eval(regnum($1)/16)')
-define(`oplist',
-``%rax',   0, `%rcx',   1, `%rdx',   2,  `%rbx',   3,
- `%rsp',   4, `%rbp',   5, `%rsi',   6,  `%rdi',   7,
- `%r8',    8, `%r9',    9, `%r10',  10,  `%r11',  11,
- `%r12',  12, `%r13',  13, `%r14',  14,  `%r15',  15,
- `(%rax)',16, `(%rcx)',17, `(%rdx)',18,  `(%rbx)',19,
- `(%rsp)',20, `(%rbp)',21, `(%rsi)',22,  `(%rdi)',23,
- `(%r8)', 24, `(%r9)', 25, `(%r10)',26,  `(%r11)',27,
- `(%r12)',28, `(%r13)',29, `(%r14)',30,  `(%r15)' 31')
-
-
-dnl  Usage
-dnl
-dnl     mulx(reg1,reg2,reg3)
-dnl
-dnl  or
-dnl
-dnl     mulx((reg1),reg2,reg3)
-dnl
-dnl  where reg1 is any register but rsp,rbp,r12,r13, or
-dnl
-dnl     mulx(off,(reg1),reg2,reg3)
-dnl
-dnl  where reg1 is any register but rsp,r12.
-dnl
-dnl  The exceptions are due to special coding needed for some registers; rsp
-dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
-dnl  offset-less form.
-dnl
-dnl  Other addressing forms are not handled.  Invalid forms are not properly
-dnl  detected.  Offsets that don't fit one byte are not handled correctly.
-
-define(`mulx',`dnl
-ifelse($#,3,
-`.byte	0xc4`'dnl
-,0x`'eval(0xe2^32*regnumh($1)^128*regnumh($3),16)`'dnl
-,0x`'eval(0xfb-8*regnum($2),16)`'dnl
-,0xf6`'dnl
-,0x`'eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1),16)`'dnl
-',$#,4,
-`.byte	0xc4`'dnl
-,0x`'eval(0xe2^32*regnumh($2)^128*regnumh($4),16)`'dnl
-,0x`'eval(0xfb-8*regnum($3),16)`'dnl
-,0xf6`'dnl
-,0x`'eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)),16)`'dnl
-,0x`'eval(($1 + 256) % 256,16)`'dnl
-')')
-
 divert`'dnl
diff --git a/gmp/mpn/z8000/README b/gmp/mpn/z8000/README
new file mode 100644
index 0000000000..e1cf22df42
--- /dev/null
+++ b/gmp/mpn/z8000/README
@@ -0,0 +1,45 @@
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+
+
+
+                      Z8000 MPN SUBROUTINES
+
+
+This directory contains mpn functions for the Zilog Z8000.
+
+
+STATUS
+
+This code is old and has not been used for a long time.
+
+mpn/z8000 uses a 16-bit limb, it's possible this doesn't really work, on
+account of various bits of C code assuming limb>=long and of course long is
+invariably at least 32 bits.
+
+mpn/z8000x uses a 32-bit limb, this could perhaps be an ABI choice.
+Currently it's reached only by an MPN_PATH override.
+
+
+
+----------------
+Local variables:
+mode: text
+fill-column: 76
+End:
diff --git a/gmp/mpn/z8000/add_n.s b/gmp/mpn/z8000/add_n.s
new file mode 100644
index 0000000000..89fbb1a280
--- /dev/null
+++ b/gmp/mpn/z8000/add_n.s
@@ -0,0 +1,51 @@
+! Z8000 __gmpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	unseg
+	.text
+	even
+	global ___gmpn_add_n
+___gmpn_add_n:
+	pop	r0,@r6
+	pop	r1,@r5
+	add	r0,r1
+	ld	@r7,r0
+	dec	r4
+	jr	eq,Lend
+Loop:	pop	r0,@r6
+	pop	r1,@r5
+	adc	r0,r1
+	inc	r7,#2
+	ld	@r7,r0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	adc	r2,r2
+	ret	t
diff --git a/gmp/mpn/z8000/gmp-mparam.h b/gmp/mpn/z8000/gmp-mparam.h
new file mode 100644
index 0000000000..f42e380a70
--- /dev/null
+++ b/gmp/mpn/z8000/gmp-mparam.h
@@ -0,0 +1,21 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define BITS_PER_MP_LIMB 16
+#define BYTES_PER_MP_LIMB 2
diff --git a/gmp/mpn/z8000/mul_1.s b/gmp/mpn/z8000/mul_1.s
new file mode 100644
index 0000000000..fa92bc32bf
--- /dev/null
+++ b/gmp/mpn/z8000/mul_1.s
@@ -0,0 +1,66 @@
+! Z8000 __gmpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! size		r5
+! s2_limb	r4
+
+	unseg
+	.text
+	even
+	global ___gmpn_mul_1
+___gmpn_mul_1:
+	sub	r2,r2		! zero carry limb
+	and	r4,r4
+	jr	mi,Lneg
+
+Lpos:	pop	r1,@r6
+	ld	r9,r1
+	mult	rr8,r4
+	and	r1,r1		! shift msb of loaded limb into cy
+	jr	mi,Lp		! branch if loaded limb's msb is set
+	add	r8,r4		! hi_limb += sign_comp2
+Lp:	add	r9,r2		! lo_limb += cy_limb
+	xor	r2,r2
+	adc	r2,r8
+	ld	@r7,r9
+	inc	r7,#2
+	dec	r5
+	jr	ne,Lpos
+	ret t
+
+Lneg:	pop	r1,@r6
+	ld	r9,r1
+	mult	rr8,r4
+	add	r8,r1		! hi_limb += sign_comp1
+	and	r1,r1
+	jr	mi,Ln
+	add	r8,r4		! hi_limb += sign_comp2
+Ln:	add	r9,r2		! lo_limb += cy_limb
+	xor	r2,r2
+	adc	r2,r8
+	ld	@r7,r9
+	inc	r7,#2
+	dec	r5
+	jr	ne,Lneg
+	ret t
diff --git a/gmp/mpn/z8000/sub_n.s b/gmp/mpn/z8000/sub_n.s
new file mode 100644
index 0000000000..1dbd83760e
--- /dev/null
+++ b/gmp/mpn/z8000/sub_n.s
@@ -0,0 +1,52 @@
+! Z8000 __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	unseg
+	.text
+	even
+	global ___gmpn_sub_n
+___gmpn_sub_n:
+	pop	r0,@r6
+	pop	r1,@r5
+	sub	r0,r1
+	ld	@r7,r0
+	dec	r4
+	jr	eq,Lend
+Loop:	pop	r0,@r6
+	pop	r1,@r5
+	sbc	r0,r1
+	inc	r7,#2
+	ld	@r7,r0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	adc	r2,r2
+	ret	t
diff --git a/gmp/mpn/z8000x/add_n.s b/gmp/mpn/z8000x/add_n.s
new file mode 100644
index 0000000000..26b47e278b
--- /dev/null
+++ b/gmp/mpn/z8000x/add_n.s
@@ -0,0 +1,54 @@
+! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal,
+! non-zero length.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	segm
+	.text
+	even
+	global ___gmpn_add_n
+___gmpn_add_n:
+	popl	rr0,@r6
+	popl	rr8,@r5
+	addl	rr0,rr8
+	ldl	@r7,rr0
+	dec	r4
+	jr	eq,Lend
+Loop:	popl	rr0,@r6
+	popl	rr8,@r5
+	adc	r1,r9
+	adc	r0,r8
+	inc	r7,#4
+	ldl	@r7,rr0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	ld	r3,r4
+	adc	r2,r2
+	ret	t
diff --git a/gmp/mpn/z8000x/sub_n.s b/gmp/mpn/z8000x/sub_n.s
new file mode 100644
index 0000000000..837ecef0cf
--- /dev/null
+++ b/gmp/mpn/z8000x/sub_n.s
@@ -0,0 +1,54 @@
+! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the
+! same length > 0 and store difference in a third limb vector.
+
+! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation; either version 3 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+! License for more details.
+
+! You should have received a copy of the GNU Lesser General Public License
+! along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	segm
+	.text
+	even
+	global ___gmpn_sub_n
+___gmpn_sub_n:
+	popl	rr0,@r6
+	popl	rr8,@r5
+	subl	rr0,rr8
+	ldl	@r7,rr0
+	dec	r4
+	jr	eq,Lend
+Loop:	popl	rr0,@r6
+	popl	rr8,@r5
+	sbc	r1,r9
+	sbc	r0,r8
+	inc	r7,#4
+	ldl	@r7,rr0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	ld	r3,r4
+	adc	r2,r2
+	ret	t
author	Pedro Alvarez <pedro.alvarez@codethink.co.uk>	2016-05-27 17:39:31 +0100
committer	Pedro Alvarez <pedro.alvarez@codethink.co.uk>	2016-05-27 17:53:32 +0100
commit	26c75cf8267919f81a1759c9c965a52c660233f9 (patch)
tree	cf2a39cf56c2c8ac45760854413ab233e6263974 /gmp/mpn
parent	56892c1d217baea02092b51a09bbc924130ca84c (diff)
download	gcc-tarball-baserock/pedroalvarez/gcc-5.3.0-gmp432.tar.gz