55 files changed, 1939 insertions, 5926 deletions
diff --git a/gmp/tune/Makefile.am b/gmp/tune/Makefile.am
index bbe503a201..3c7f62c820 100644
--- a/gmp/tune/Makefile.am
+++ b/gmp/tune/Makefile.am
@@ -1,32 +1,21 @@
 ## Process this file with automake to generate Makefile.in
 
-# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
@@ -52,14 +41,13 @@ EXTRA_LTLIBRARIES = libspeed.la
 
 libspeed_la_SOURCES =							\
   common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c		\
-  div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1_tune.c			\
-  freq.c								\
+  freq.c					\
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
-  hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c	\
-  jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
-  mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
+  jacbase1.c jacbase2.c jacbase3.c					\
+  mod_1_div.c mod_1_inv.c modlinv.c					\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
-  set_strb.c set_strs.c set_strp.c time.c
+  set_strb.c set_strs.c set_strp.c time.c						\
+  sb_div.c sb_inv.c
 
 libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
   $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
@@ -81,10 +69,10 @@ $(top_builddir)/tests/libtests.la:
 # program.  This can always be forced with "make speed_LDFLAGS=-all-static
 # ..." if desired, see tune/README.
 
-EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup tune-gcd-p
+EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup
 
 DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+LDADD = $(DEPENDENCIES)
 
 speed_SOURCES = speed.c
 speed_LDFLAGS = $(STATIC)
@@ -95,15 +83,11 @@ speed_ext_SOURCES = speed-ext.c
 speed_ext_LDFLAGS = $(STATIC)
 
 tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
 tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDADD = $(tuneup_DEPENDENCIES)
 tuneup_LDFLAGS = $(STATIC)
 
-tune_gcd_p_SOURCES = tune-gcd-p.c
-tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
-tune_gcd_p_LDFLAGS = $(STATIC)
-
 
 tune:
 	$(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
@@ -113,7 +97,7 @@ allprogs: $(EXTRA_PROGRAMS)
 
 # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
 CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
-	$(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+	$(TUNE_MPN_SRCS) sqr_asm.asm \
 	stg.gnuplot stg.data \
 	mtg.gnuplot mtg.data \
 	fibg.gnuplot fibg.data \
@@ -139,16 +123,9 @@ DISTCLEANFILES = sqr_basecase.c  $(MANY_DISTCLEAN)
 # recompiled object will be rebuilt if that file changes.
 
 TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c			\
-  dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
-  invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c		\
-  get_str.c set_str.c matrix22_mul.c					\
-  hgcd.c hgcd_appr.c hgcd_reduce.c					\
-  mul_n.c sqr.c sec_powm.c						\
-  mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c	\
-  mulmid.c mulmid_n.c toom42_mulmid.c					\
-  nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c	\
-  toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c gcdext.c get_str.c \
+  set_str.c matrix22_mul.c hgcd.c mul_n.c toom44_mul.c toom4_sqr.c	\
+  mullow_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c
 
 $(TUNE_MPN_SRCS_BASIC):
 	for i in $(TUNE_MPN_SRCS_BASIC); do \
@@ -167,15 +144,60 @@ mod_1.c:
 	echo "#include \"mpn/generic/mod_1.c\""     >>mod_1.c
 
 sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
-	echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+	echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm
 	echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
 
-# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
-fac_ui.c: $(top_builddir)/mpz/fac_ui.c
-	echo "#define TUNE_PROGRAM_BUILD 1"          >fac_ui.c
-	echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
-	echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
-	echo "#include \"mpz/oddfac_1.c\""           >>fac_ui.c
-	echo "#include \"mpz/fac_ui.c\""             >>fac_ui.c
 
 include ../mpn/Makeasm.am
+
+
+# "mk" is multiplication in the karatsuba range
+# "st" is squaring in the toom-cook range, etc
+# "g" forms produce graphs
+
+mk:
+	./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n
+
+MTS = -s 50-150 -c
+mt:
+	./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n
+mtg:
+	./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n
+
+sk:
+	./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n
+
+STS = -s 50-150 -c
+st:
+	./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n
+stg:
+	./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n
+
+dc:
+	./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr
+
+fib:
+	./speed -s 40-60 -c mpz_fib_ui
+fibg:
+	./speed -s 10-300 -P fibg mpz_fib_ui
+
+
+gcd:
+	./speed -s 1-20 -c mpn_gcd
+
+udiv:
+	./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c
+
+divn:
+	./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1
+divun:
+	./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345
+modn:
+	./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1
+modun:
+	./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345
+
+
+graph:
+	./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr
+	gnuplot graph.gnuplot
diff --git a/gmp/tune/Makefile.in b/gmp/tune/Makefile.in
index 07abb022a3..c3698319d9 100644
--- a/gmp/tune/Makefile.in
+++ b/gmp/tune/Makefile.in
@@ -1,9 +1,8 @@
-# Makefile.in generated by automake 1.11.6 from Makefile.am.
+# Makefile.in generated by automake 1.8.4 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
+# 2003, 2004  Free Software Foundation, Inc.
 # This Makefile.in is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
@@ -15,85 +14,52 @@
 
 @SET_MAKE@
 
-# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
-# Copyright 1996, 1998-2002 Free Software Foundation, Inc.
-#
-#  This file is part of the GNU MP Library.
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# This file is part of the GNU MP Library.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#  or
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
+SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) $(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
 VPATH = @srcdir@
-am__make_dryrun = \
-  { \
-    am__dry=no; \
-    case $$MAKEFLAGS in \
-      *\\[\ \	]*) \
-        echo 'am--echo: ; @echo "AM"  OK' | $(MAKE) -f - 2>/dev/null \
-          | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
-      *) \
-        for am__flg in $$MAKEFLAGS; do \
-          case $$am__flg in \
-            *=*|--*) ;; \
-            *n*) am__dry=yes; break;; \
-          esac; \
-        done;; \
-    esac; \
-    test $$am__dry = yes; \
-  }
 pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
 pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
 am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
 install_sh_DATA = $(install_sh) -c -m 644
 install_sh_PROGRAM = $(install_sh) -c
 install_sh_SCRIPT = $(install_sh) -c
@@ -105,110 +71,74 @@ POST_INSTALL = :
 NORMAL_UNINSTALL = :
 PRE_UNINSTALL = :
 POST_UNINSTALL = :
-build_triplet = @build@
 host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
 EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
-	speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT)
+	speed-ext$(EXEEXT) tuneup$(EXEEXT)
 DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/../mpn/Makeasm.am \
 	$(srcdir)/Makefile.am $(srcdir)/Makefile.in
 subdir = tune
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
-	$(top_srcdir)/configure.ac
+	$(top_srcdir)/configure.in
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
+mkinstalldirs = $(mkdir_p)
 CONFIG_HEADER = $(top_builddir)/config.h
 CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
 am__DEPENDENCIES_1 =
 am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
 	$(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
-am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \
-	divrem2div.lo divrem2inv.lo div_qr_1n_pi1_1.lo \
-	div_qr_1n_pi1_2.lo div_qr_1_tune.lo freq.lo gcdext_single.lo \
-	gcdext_double.lo gcdextod.lo gcdextos.lo hgcd_lehmer.lo \
-	hgcd_appr_lehmer.lo hgcd_reduce_1.lo hgcd_reduce_2.lo \
-	jacbase1.lo jacbase2.lo jacbase3.lo jacbase4.lo mod_1_div.lo \
-	mod_1_inv.lo mod_1_1-1.lo mod_1_1-2.lo modlinv.lo noop.lo \
-	powm_mod.lo powm_redc.lo pre_divrem_1.lo set_strb.lo \
-	set_strs.lo set_strp.lo time.lo
+am_libspeed_la_OBJECTS = common$U.lo divrem1div$U.lo divrem1inv$U.lo \
+	divrem2div$U.lo divrem2inv$U.lo freq$U.lo gcdext_single$U.lo \
+	gcdext_double$U.lo gcdextod$U.lo gcdextos$U.lo jacbase1$U.lo \
+	jacbase2$U.lo jacbase3$U.lo mod_1_div$U.lo mod_1_inv$U.lo \
+	modlinv$U.lo noop$U.lo powm_mod$U.lo powm_redc$U.lo \
+	pre_divrem_1$U.lo set_strb$U.lo set_strs$U.lo set_strp$U.lo \
+	time$U.lo sb_div$U.lo sb_inv$U.lo
 libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
-libspeed_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
-	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
-	$(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
-am_speed_OBJECTS = speed.$(OBJEXT)
+am_speed_OBJECTS = speed$U.$(OBJEXT)
 speed_OBJECTS = $(am_speed_OBJECTS)
 speed_LDADD = $(LDADD)
-speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
-speed_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(speed_LDFLAGS) \
-	$(LDFLAGS) -o $@
-am_speed_dynamic_OBJECTS = speed.$(OBJEXT)
+am__DEPENDENCIES_3 = libspeed.la
+speed_DEPENDENCIES = $(am__DEPENDENCIES_3)
+am_speed_dynamic_OBJECTS = speed$U.$(OBJEXT)
 speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
 speed_dynamic_LDADD = $(LDADD)
-speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
-am_speed_ext_OBJECTS = speed-ext.$(OBJEXT)
+speed_dynamic_DEPENDENCIES = $(am__DEPENDENCIES_3)
+am_speed_ext_OBJECTS = speed-ext$U.$(OBJEXT)
 speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
 speed_ext_LDADD = $(LDADD)
-speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
-speed_ext_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
-	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
-	$(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
-am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT)
-tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS)
-tune_gcd_p_LDADD = $(LDADD)
-tune_gcd_p_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
-	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
-	$(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@
-am_tuneup_OBJECTS = tuneup.$(OBJEXT)
-am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \
-	dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \
-	dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \
-	invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \
-	divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
-	get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
-	hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
-	mul_n.$(OBJEXT) sqr.$(OBJEXT) sec_powm.$(OBJEXT) \
-	mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \
-	tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \
-	mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
-	nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \
-	toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \
-	toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \
-	toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT)
-am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT)
-nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \
-	$(am__objects_2)
+speed_ext_DEPENDENCIES = $(am__DEPENDENCIES_3)
+am_tuneup_OBJECTS = tuneup$U.$(OBJEXT)
+am__objects_1 = dc_divrem_n$U.$(OBJEXT) divrem_2$U.$(OBJEXT) \
+	gcd$U.$(OBJEXT) gcdext$U.$(OBJEXT) get_str$U.$(OBJEXT) \
+	set_str$U.$(OBJEXT) matrix22_mul$U.$(OBJEXT) hgcd$U.$(OBJEXT) \
+	mul_n$U.$(OBJEXT) toom44_mul$U.$(OBJEXT) toom4_sqr$U.$(OBJEXT) \
+	mullow_n$U.$(OBJEXT) mul_fft$U.$(OBJEXT) mul$U.$(OBJEXT) \
+	sb_divrem_mn$U.$(OBJEXT) tdiv_qr$U.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1$U.$(OBJEXT) \
+	mod_1$U.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase$U.$(OBJEXT) $(am__objects_2)
 tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
-am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
-tuneup_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(tuneup_LDFLAGS) \
-	$(LDFLAGS) -o $@
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) libspeed.la
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
 depcomp =
 am__depfiles_maybe =
 COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
 	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
-	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
 CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
-	--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
-	$(LDFLAGS) -o $@
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
 SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
 	$(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
-	$(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \
-	$(nodist_tuneup_SOURCES)
+	$(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
 DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
 	$(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
-	$(tune_gcd_p_SOURCES) $(tuneup_SOURCES)
-am__can_run_installinfo = \
-  case $$AM_UPDATE_INFO_DIR in \
-    n|no|NO) false;; \
-    *) (install-info --version) >/dev/null 2>&1;; \
-  esac
+	$(tuneup_SOURCES)
 HEADERS = $(noinst_HEADERS)
 ETAGS = etags
 CTAGS = ctags
@@ -223,6 +153,7 @@ AUTOCONF = @AUTOCONF@
 AUTOHEADER = @AUTOHEADER@
 AUTOMAKE = @AUTOMAKE@
 AWK = @AWK@
+BITS_PER_MP_LIMB = @BITS_PER_MP_LIMB@
 CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
 CC = @CC@
 CCAS = @CCAS@
@@ -238,17 +169,16 @@ CYGPATH_W = @CYGPATH_W@
 DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
 DEFS = @DEFS@
 DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
+ECHO = @ECHO@
 ECHO_C = @ECHO_C@
 ECHO_N = @ECHO_N@
 ECHO_T = @ECHO_T@
 EGREP = @EGREP@
+ENABLE_STATIC_FALSE = @ENABLE_STATIC_FALSE@
+ENABLE_STATIC_TRUE = @ENABLE_STATIC_TRUE@
 EXEEXT = @EXEEXT@
 EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
 GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
 GMP_NAIL_BITS = @GMP_NAIL_BITS@
 GREP = @GREP@
 HAVE_CLOCK_01 = @HAVE_CLOCK_01@
@@ -262,12 +192,10 @@ HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
 HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
 HAVE_STACK_T_01 = @HAVE_STACK_T_01@
 HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
 INSTALL_DATA = @INSTALL_DATA@
 INSTALL_PROGRAM = @INSTALL_PROGRAM@
 INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
 LDFLAGS = @LDFLAGS@
 LEX = @LEX@
 LEXLIB = @LEXLIB@
@@ -282,26 +210,20 @@ LIBOBJS = @LIBOBJS@
 LIBREADLINE = @LIBREADLINE@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
 LN_S = @LN_S@
 LTLIBOBJS = @LTLIBOBJS@
 M4 = @M4@
 MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
 MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
 OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
 PACKAGE = @PACKAGE@
 PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
 PACKAGE_NAME = @PACKAGE_NAME@
 PACKAGE_STRING = @PACKAGE_STRING@
 PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
 PACKAGE_VERSION = @PACKAGE_VERSION@
 PATH_SEPARATOR = @PATH_SEPARATOR@
 RANLIB = @RANLIB@
@@ -311,31 +233,26 @@ SHELL = @SHELL@
 SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
 STRIP = @STRIP@
 TAL_OBJECT = @TAL_OBJECT@
-TUNE_LIBS = @TUNE_LIBS@
 TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
 U_FOR_BUILD = @U_FOR_BUILD@
 VERSION = @VERSION@
+WANT_CXX_FALSE = @WANT_CXX_FALSE@
+WANT_CXX_TRUE = @WANT_CXX_TRUE@
+WANT_MPBSD_FALSE = @WANT_MPBSD_FALSE@
+WANT_MPBSD_TRUE = @WANT_MPBSD_TRUE@
 WITH_READLINE_01 = @WITH_READLINE_01@
 YACC = @YACC@
 YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
 ac_ct_CC = @ac_ct_CC@
 ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
 am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
 bindir = @bindir@
 build = @build@
 build_alias = @build_alias@
 build_cpu = @build_cpu@
 build_os = @build_os@
 build_vendor = @build_vendor@
-builddir = @builddir@
 datadir = @datadir@
 datarootdir = @datarootdir@
 docdir = @docdir@
@@ -359,6 +276,7 @@ mandir = @mandir@
 mkdir_p = @mkdir_p@
 mpn_objects = @mpn_objects@
 mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
 oldincludedir = @oldincludedir@
 pdfdir = @pdfdir@
 prefix = @prefix@
@@ -366,12 +284,8 @@ program_transform_name = @program_transform_name@
 psdir = @psdir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
 sysconfdir = @sysconfdir@
 target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
 INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
 EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
   ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
@@ -389,14 +303,13 @@ noinst_HEADERS = speed.h
 EXTRA_LTLIBRARIES = libspeed.la
 libspeed_la_SOURCES = \
   common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c		\
-  div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1_tune.c			\
-  freq.c								\
+  freq.c					\
   gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c			\
-  hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c	\
-  jacbase1.c jacbase2.c jacbase3.c jacbase4.c				\
-  mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c		\
+  jacbase1.c jacbase2.c jacbase3.c					\
+  mod_1_div.c mod_1_inv.c modlinv.c					\
   noop.c powm_mod.c powm_redc.c pre_divrem_1.c				\
-  set_strb.c set_strs.c set_strp.c time.c
+  set_strb.c set_strs.c set_strp.c time.c						\
+  sb_div.c sb_inv.c
 
 libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
   $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
@@ -404,24 +317,21 @@ libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
 libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
 libspeed_la_LDFLAGS = $(STATIC)
 DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+LDADD = $(DEPENDENCIES)
 speed_SOURCES = speed.c
 speed_LDFLAGS = $(STATIC)
 speed_dynamic_SOURCES = speed.c
 speed_ext_SOURCES = speed-ext.c
 speed_ext_LDFLAGS = $(STATIC)
 tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
 tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDADD = $(tuneup_DEPENDENCIES)
 tuneup_LDFLAGS = $(STATIC)
-tune_gcd_p_SOURCES = tune-gcd-p.c
-tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
-tune_gcd_p_LDFLAGS = $(STATIC)
 
 # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
 CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
-	$(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+	$(TUNE_MPN_SRCS) sqr_asm.asm \
 	stg.gnuplot stg.data \
 	mtg.gnuplot mtg.data \
 	fibg.gnuplot fibg.data \
@@ -446,16 +356,9 @@ DISTCLEANFILES = sqr_basecase.c  $(MANY_DISTCLEAN)
 # FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
 # recompiled object will be rebuilt if that file changes.
 TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c			\
-  dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c	\
-  invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c		\
-  get_str.c set_str.c matrix22_mul.c					\
-  hgcd.c hgcd_appr.c hgcd_reduce.c					\
-  mul_n.c sqr.c sec_powm.c						\
-  mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c	\
-  mulmid.c mulmid_n.c toom42_mulmid.c					\
-  nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c	\
-  toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c gcdext.c get_str.c \
+  set_str.c matrix22_mul.c hgcd.c mul_n.c toom44_mul.c toom4_sqr.c	\
+  mullow_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c
 
 
 # COMPILE minus CC.
@@ -486,6 +389,8 @@ SUFFIXES = .s .S .asm
 
 # can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
 RM_TMP = rm -f
+MTS = -s 50-150 -c
+STS = -s 50-150 -c
 all: all-am
 
 .SUFFIXES:
@@ -494,14 +399,14 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/..
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
 	    *$$dep*) \
-	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
-	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+		&& exit 0; \
 	      exit 1;; \
 	  esac; \
 	done; \
-	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \
-	$(am__cd) $(top_srcdir) && \
-	  $(AUTOMAKE) --gnu --ignore-deps tune/Makefile
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu  --ignore-deps tune/Makefile'; \
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  --ignore-deps tune/Makefile
 .PRECIOUS: Makefile
 Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	@case '$?' in \
@@ -511,7 +416,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
 	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
 	esac;
-$(srcdir)/../mpn/Makeasm.am:
 
 $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -520,30 +424,31 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
 $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES) 
-	$(libspeed_la_LINK)  $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
-speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES) 
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) 
+	$(LINK)  $(libspeed_la_LDFLAGS) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) 
 	@rm -f speed$(EXEEXT)
-	$(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
-speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES) 
+	$(LINK) $(speed_LDFLAGS) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) 
 	@rm -f speed-dynamic$(EXEEXT)
-	$(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
-speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES) 
+	$(LINK) $(speed_dynamic_LDFLAGS) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) 
 	@rm -f speed-ext$(EXEEXT)
-	$(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
-tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES) 
-	@rm -f tune-gcd-p$(EXEEXT)
-	$(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS)
-tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES) 
+	$(LINK) $(speed_ext_LDFLAGS) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) 
 	@rm -f tuneup$(EXEEXT)
-	$(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
+	$(LINK) $(tuneup_LDFLAGS) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
 
 mostlyclean-compile:
 	-rm -f *.$(OBJEXT)
 
 distclean-compile:
 	-rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ansi2knr
+
+mostlyclean-kr:
+	-test "$U" = "" || rm -f *_.c
 
 .c.o:
 	$(COMPILE) -c $<
@@ -553,6 +458,127 @@ distclean-compile:
 
 .c.lo:
 	$(LTCOMPILE) -c -o $@ $<
+common_.c: common.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/common.c; then echo $(srcdir)/common.c; else echo common.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dc_divrem_n_.c: dc_divrem_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dc_divrem_n.c; then echo $(srcdir)/dc_divrem_n.c; else echo dc_divrem_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem1div_.c: divrem1div.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1div.c; then echo $(srcdir)/divrem1div.c; else echo divrem1div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem1inv_.c: divrem1inv.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1inv.c; then echo $(srcdir)/divrem1inv.c; else echo divrem1inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem2div_.c: divrem2div.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2div.c; then echo $(srcdir)/divrem2div.c; else echo divrem2div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem2inv_.c: divrem2inv.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2inv.c; then echo $(srcdir)/divrem2inv.c; else echo divrem2inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_1_.c: divrem_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_2_.c: divrem_2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+freq_.c: freq.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/freq.c; then echo $(srcdir)/freq.c; else echo freq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: gcd.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_.c: gcdext.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_double_.c: gcdext_double.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_double.c; then echo $(srcdir)/gcdext_double.c; else echo gcdext_double.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_single_.c: gcdext_single.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_single.c; then echo $(srcdir)/gcdext_single.c; else echo gcdext_single.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdextod_.c: gcdextod.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextod.c; then echo $(srcdir)/gcdextod.c; else echo gcdextod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdextos_.c: gcdextos.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextos.c; then echo $(srcdir)/gcdextos.c; else echo gcdextos.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd_.c: hgcd.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase1_.c: jacbase1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase1.c; then echo $(srcdir)/jacbase1.c; else echo jacbase1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase2_.c: jacbase2.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase2.c; then echo $(srcdir)/jacbase2.c; else echo jacbase2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase3_.c: jacbase3.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase3.c; then echo $(srcdir)/jacbase3.c; else echo jacbase3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_.c: mod_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_div_.c: mod_1_div.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_div.c; then echo $(srcdir)/mod_1_div.c; else echo mod_1_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_inv_.c: mod_1_inv.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_inv.c; then echo $(srcdir)/mod_1_inv.c; else echo mod_1_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+modlinv_.c: modlinv.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/modlinv.c; then echo $(srcdir)/modlinv.c; else echo modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_fft_.c: mul_fft.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_n_.c: mul_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullow_n_.c: mullow_n.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullow_n.c; then echo $(srcdir)/mullow_n.c; else echo mullow_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+noop_.c: noop.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/noop.c; then echo $(srcdir)/noop.c; else echo noop.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_mod_.c: powm_mod.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_mod.c; then echo $(srcdir)/powm_mod.c; else echo powm_mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_redc_.c: powm_redc.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_redc.c; then echo $(srcdir)/powm_redc.c; else echo powm_redc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_div_.c: sb_div.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_div.c; then echo $(srcdir)/sb_div.c; else echo sb_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_divrem_mn_.c: sb_divrem_mn.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_divrem_mn.c; then echo $(srcdir)/sb_divrem_mn.c; else echo sb_divrem_mn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_inv_.c: sb_inv.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_inv.c; then echo $(srcdir)/sb_inv.c; else echo sb_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strb_.c: set_strb.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strb.c; then echo $(srcdir)/set_strb.c; else echo set_strb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strp_.c: set_strp.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strp.c; then echo $(srcdir)/set_strp.c; else echo set_strp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strs_.c: set_strs.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strs.c; then echo $(srcdir)/set_strs.c; else echo set_strs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+speed_.c: speed.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed.c; then echo $(srcdir)/speed.c; else echo speed.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+speed-ext_.c: speed-ext.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed-ext.c; then echo $(srcdir)/speed-ext.c; else echo speed-ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+time_.c: time.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/time.c; then echo $(srcdir)/time.c; else echo time.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tuneup_.c: tuneup.c $(ANSI2KNR)
+	$(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tuneup.c; then echo $(srcdir)/tuneup.c; else echo tuneup.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+common_.$(OBJEXT) common_.lo dc_divrem_n_.$(OBJEXT) dc_divrem_n_.lo \
+divrem1div_.$(OBJEXT) divrem1div_.lo divrem1inv_.$(OBJEXT) \
+divrem1inv_.lo divrem2div_.$(OBJEXT) divrem2div_.lo \
+divrem2inv_.$(OBJEXT) divrem2inv_.lo divrem_1_.$(OBJEXT) divrem_1_.lo \
+divrem_2_.$(OBJEXT) divrem_2_.lo freq_.$(OBJEXT) freq_.lo \
+gcd_.$(OBJEXT) gcd_.lo gcdext_.$(OBJEXT) gcdext_.lo \
+gcdext_double_.$(OBJEXT) gcdext_double_.lo gcdext_single_.$(OBJEXT) \
+gcdext_single_.lo gcdextod_.$(OBJEXT) gcdextod_.lo gcdextos_.$(OBJEXT) \
+gcdextos_.lo get_str_.$(OBJEXT) get_str_.lo hgcd_.$(OBJEXT) hgcd_.lo \
+jacbase1_.$(OBJEXT) jacbase1_.lo jacbase2_.$(OBJEXT) jacbase2_.lo \
+jacbase3_.$(OBJEXT) jacbase3_.lo matrix22_mul_.$(OBJEXT) \
+matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo mod_1_div_.$(OBJEXT) \
+mod_1_div_.lo mod_1_inv_.$(OBJEXT) mod_1_inv_.lo modlinv_.$(OBJEXT) \
+modlinv_.lo mul_.$(OBJEXT) mul_.lo mul_fft_.$(OBJEXT) mul_fft_.lo \
+mul_n_.$(OBJEXT) mul_n_.lo mullow_n_.$(OBJEXT) mullow_n_.lo \
+noop_.$(OBJEXT) noop_.lo powm_mod_.$(OBJEXT) powm_mod_.lo \
+powm_redc_.$(OBJEXT) powm_redc_.lo pre_divrem_1_.$(OBJEXT) \
+pre_divrem_1_.lo sb_div_.$(OBJEXT) sb_div_.lo sb_divrem_mn_.$(OBJEXT) \
+sb_divrem_mn_.lo sb_inv_.$(OBJEXT) sb_inv_.lo set_str_.$(OBJEXT) \
+set_str_.lo set_strb_.$(OBJEXT) set_strb_.lo set_strp_.$(OBJEXT) \
+set_strp_.lo set_strs_.$(OBJEXT) set_strs_.lo speed_.$(OBJEXT) \
+speed_.lo speed-ext_.$(OBJEXT) speed-ext_.lo sqr_basecase_.$(OBJEXT) \
+sqr_basecase_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo time_.$(OBJEXT) \
+time_.lo toom44_mul_.$(OBJEXT) toom44_mul_.lo toom4_sqr_.$(OBJEXT) \
+toom4_sqr_.lo tuneup_.$(OBJEXT) tuneup_.lo : $(ANSI2KNR)
 
 mostlyclean-libtool:
 	-rm -f *.lo
@@ -560,85 +586,83 @@ mostlyclean-libtool:
 clean-libtool:
 	-rm -rf .libs _libs
 
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+
 ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
 	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
 	mkid -fID $$unique
 tags: TAGS
 
 TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
-	set x; \
+	tags=; \
 	here=`pwd`; \
 	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	shift; \
-	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
-	  test -n "$$unique" || unique=$$empty_fix; \
-	  if test $$# -gt 0; then \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      "$$@" $$unique; \
-	  else \
-	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
-	      $$unique; \
-	  fi; \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+	  test -z "$$unique" && unique=$$empty_fix; \
+	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	    $$tags $$unique; \
 	fi
 ctags: CTAGS
 CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
 		$(TAGS_FILES) $(LISP)
+	tags=; \
+	here=`pwd`; \
 	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
 	unique=`for i in $$list; do \
 	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
 	  done | \
-	  $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
-	      END { if (nonempty) { for (i in files) print i; }; }'`; \
-	test -z "$(CTAGS_ARGS)$$unique" \
+	  $(AWK) '    { files[$$0] = 1; } \
+	       END { for (i in files) print i; }'`; \
+	test -z "$(CTAGS_ARGS)$$tags$$unique" \
 	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
-	     $$unique
+	     $$tags $$unique
 
 GTAGS:
 	here=`$(am__cd) $(top_builddir) && pwd` \
-	  && $(am__cd) $(top_srcdir) \
-	  && gtags -i $(GTAGS_ARGS) "$$here"
+	  && cd $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) $$here
 
 distclean-tags:
 	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
 
 distdir: $(DISTFILES)
-	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
-	list='$(DISTFILES)'; \
-	  dist_files=`for file in $$list; do echo $$file; done | \
-	  sed -e "s|^$$srcdirstrip/||;t" \
-	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
-	case $$dist_files in \
-	  */*) $(MKDIR_P) `echo "$$dist_files" | \
-			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
-			   sort -u` ;; \
-	esac; \
-	for file in $$dist_files; do \
+	$(mkdir_p) $(distdir)/../mpn
+	@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+	list='$(DISTFILES)'; for file in $$list; do \
+	  case $$file in \
+	    $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+	    $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+	  esac; \
 	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkdir_p) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
 	  if test -d $$d/$$file; then \
-	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
-	    if test -d "$(distdir)/$$file"; then \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
-	    fi; \
 	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
-	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
-	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
 	    fi; \
-	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
 	  else \
-	    test -f "$(distdir)/$$file" \
-	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
 	    || exit 1; \
 	  fi; \
 	done
@@ -656,23 +680,17 @@ install-am: all-am
 
 installcheck: installcheck-am
 install-strip:
-	if test -z '$(STRIP)'; then \
-	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	      install; \
-	else \
-	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
-	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
-	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
-	fi
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
 mostlyclean-generic:
 
 clean-generic:
 	-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
 
 distclean-generic:
-	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+	-rm -f $(CONFIG_CLEAN_FILES)
 	-test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
 
 maintainer-clean-generic:
@@ -685,7 +703,7 @@ clean-am: clean-generic clean-libtool mostlyclean-am
 distclean: distclean-am
 	-rm -f Makefile
 distclean-am: clean-am distclean-compile distclean-generic \
-	distclean-tags
+	distclean-libtool distclean-tags
 
 dvi: dvi-am
 
@@ -693,38 +711,18 @@ dvi-am:
 
 html: html-am
 
-html-am:
-
 info: info-am
 
 info-am:
 
 install-data-am:
 
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
 install-exec-am:
 
-install-html: install-html-am
-
-install-html-am:
-
 install-info: install-info-am
 
-install-info-am:
-
 install-man:
 
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
 installcheck-am:
 
 maintainer-clean: maintainer-clean-am
@@ -733,7 +731,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
 
 mostlyclean: mostlyclean-am
 
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
 	mostlyclean-libtool
 
 pdf: pdf-am
@@ -744,22 +742,18 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am:
-
-.MAKE: install-am install-strip
+uninstall-am: uninstall-info-am
 
 .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
 	clean-libtool ctags distclean distclean-compile \
 	distclean-generic distclean-libtool distclean-tags distdir dvi \
 	dvi-am html html-am info info-am install install-am \
-	install-data install-data-am install-dvi install-dvi-am \
-	install-exec install-exec-am install-html install-html-am \
-	install-info install-info-am install-man install-pdf \
-	install-pdf-am install-ps install-ps-am install-strip \
+	install-data install-data-am install-exec install-exec-am \
+	install-info install-info-am install-man install-strip \
 	installcheck installcheck-am installdirs maintainer-clean \
 	maintainer-clean-generic mostlyclean mostlyclean-compile \
-	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
-	tags uninstall uninstall-am
+	mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+	pdf-am ps ps-am tags uninstall uninstall-am uninstall-info-am
 
 
 $(top_builddir)/tests/libtests.la:
@@ -788,17 +782,9 @@ mod_1.c:
 	echo "#include \"mpn/generic/mod_1.c\""     >>mod_1.c
 
 sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
-	echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+	echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm
 	echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
 
-# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
-fac_ui.c: $(top_builddir)/mpz/fac_ui.c
-	echo "#define TUNE_PROGRAM_BUILD 1"          >fac_ui.c
-	echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
-	echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
-	echo "#include \"mpz/oddfac_1.c\""           >>fac_ui.c
-	echo "#include \"mpz/fac_ui.c\""             >>fac_ui.c
-
 # .s assembler, no preprocessing.
 #
 .s.o:
@@ -855,6 +841,50 @@ fac_ui.c: $(top_builddir)/mpz/fac_ui.c
 .asm.lo:
 	$(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
 
+# "mk" is multiplication in the karatsuba range
+# "st" is squaring in the toom-cook range, etc
+# "g" forms produce graphs
+
+mk:
+	./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n
+mt:
+	./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n
+mtg:
+	./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n
+
+sk:
+	./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n
+st:
+	./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n
+stg:
+	./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n
+
+dc:
+	./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr
+
+fib:
+	./speed -s 40-60 -c mpz_fib_ui
+fibg:
+	./speed -s 10-300 -P fibg mpz_fib_ui
+
+gcd:
+	./speed -s 1-20 -c mpn_gcd
+
+udiv:
+	./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c
+
+divn:
+	./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1
+divun:
+	./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345
+modn:
+	./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1
+modun:
+	./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345
+
+graph:
+	./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr
+	gnuplot graph.gnuplot
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/gmp/tune/README b/gmp/tune/README
index f76407f7ca..994231d3be 100644
--- a/gmp/tune/README
+++ b/gmp/tune/README
@@ -1,30 +1,19 @@
-Copyright 2000-2002, 2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 
@@ -84,27 +73,6 @@ SCO OpenUNIX 8 /etc/hw
     running the speed program repeatedly then set a GMP_CPU_FREQUENCY
     environment variable (see TIME BASE section below).
 
-Timing on GNU/Linux
-
-    On Linux, timing currently uses the cycle counter. This is unreliable,
-    since the counter is not saved and restored at context switches (unlike
-    FreeBSD and Solaris where the cycle counter is "virtualized").
-
-    Using the clock_gettime method with CLOCK_PROCESS_CPUTIME_ID (posix) or
-    CLOCK_VIRTUAL (BSD) should be more reliable. To get clock_gettime
-    with glibc, one has to link with -lrt (which also drags in the pthreads
-    threading library). configure.in must be hacked to detect this and
-    arrange proper linking. Something like
-
-      old_LIBS="$LIBS"
-      AC_SEARCH_LIBS(clock_gettime, rt, [AC_DEFINE(HAVE_CLOCK_GETTIME)])
-      TUNE_LIBS="$LIBS"
-      LIBS="$old_LIBS"
-
-      AC_SUBST(TUNE_LIBS)
-
-    might work.
-
 Low resolution timebase
 
     Parameter tuning can be very time consuming if the only timebase
@@ -117,7 +85,7 @@ Low resolution timebase
 PARAMETER TUNING
 
 The "tuneup" program runs some tests designed to find the best settings for
-various thresholds, like MUL_TOOM22_THRESHOLD.  Its output can be put
+various thresholds, like MUL_KARATSUBA_THRESHOLD.  Its output can be put
 into gmp-mparam.h.  The program is built and run with
 
         make tune
@@ -298,16 +266,10 @@ mpn_divrem_1, using division by 32 as an example.
 
 EXAMPLE COMPARISONS - MULTIPLICATION
 
-mul_basecase takes a ".<r>" parameter. If positive, it gives the second
-(smaller) operand size.  For example to show speeds for 3x3 up to 20x3 in
-cycles,
-
-        ./speed -s 3-20 -c mpn_mul_basecase.3
-
-A negative ".<-r>" parameter fixes the size of the product to the absolute
-value r.  For example to show speeds for 10x10 up to 19x1 in cycles,
+mul_basecase takes a ".<r>" parameter which is the first (larger) size
+parameter.  For example to show speeds for 20x1 up to 20x15 in cycles,
 
-        ./speed -s 10-19 -c mpn_mul_basecase.-20
+        ./speed -s 1-15 -c mpn_mul_basecase.20
 
 mul_basecase with no parameter does an NxN multiply, so for example to show
 speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,
@@ -319,7 +281,7 @@ up to twice as fast as mul_basecase.  In practice loop overheads and the
 products on the diagonal mean it falls short of this.  Here's an example
 running the two and showing by what factor an NxN mul_basecase is slower
 than an NxN sqr_basecase.  (Some versions of sqr_basecase only allow sizes
-below SQR_TOOM2_THRESHOLD, so if it crashes at that point don't worry.)
+below SQR_KARATSUBA_THRESHOLD, so if it crashes at that point don't worry.)
 
         ./speed -s 1-20 -r mpn_sqr_basecase mpn_mul_basecase
 
@@ -465,12 +427,12 @@ normal libgmp.la.
 Note further that the various routines may recurse into themselves on sizes
 far enough above applicable thresholds.  For example, mpn_kara_mul_n will
 recurse into itself on sizes greater than twice the compiled-in
-MUL_TOOM22_THRESHOLD.
+MUL_KARATSUBA_THRESHOLD.
 
 When doing the above comparison between mul_basecase and kara_mul_n what's
 probably of interest is mul_basecase versus a kara_mul_n that does one level
 of Karatsuba then calls to mul_basecase, but this only happens on sizes less
-than twice the compiled MUL_TOOM22_THRESHOLD.  A larger value for that
+than twice the compiled MUL_KARATSUBA_THRESHOLD.  A larger value for that
 setting can be compiled-in to avoid the problem if necessary.  The same
 applies to toom3 and DC, though in a trickier fashion.
 
diff --git a/gmp/tune/alpha.asm b/gmp/tune/alpha.asm
index 888c77fe9d..b447462083 100644
--- a/gmp/tune/alpha.asm
+++ b/gmp/tune/alpha.asm
@@ -1,32 +1,21 @@
 dnl  Alpha time stamp counter access routine.
 
 dnl  Copyright 2000, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/common.c b/gmp/tune/common.c
index 8d06b4dc02..85a1ddef3f 100644
--- a/gmp/tune/common.c
+++ b/gmp/tune/common.c
@@ -1,32 +1,22 @@
 /* Shared speed subroutines.
 
-Copyright 1999-2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_NO_ATTRIBUTE_CONST_PURE
 
@@ -51,7 +41,6 @@ see https://www.gnu.org/licenses/.  */
 
 int   speed_option_addrs = 0;
 int   speed_option_verbose = 0;
-int   speed_option_cycles_broken = 0;
 
 
 /* Provide __clz_tab even if it's not required, for the benefit of new code
@@ -72,9 +61,9 @@ pentium_wbinvd(void)
 
     if (fd == -2)
       {
-	fd = open ("/dev/wbinvd", O_RDWR);
-	if (fd == -1)
-	  perror ("open /dev/wbinvd");
+        fd = open ("/dev/wbinvd", O_RDWR);
+        if (fd == -1)
+          perror ("open /dev/wbinvd");
       }
 
     if (fd != -1)
@@ -132,9 +121,10 @@ double_cmp_ptr (const double *p, const double *q)
    s->r, -1.0 should be returned.  See the various base routines below.  */
 
 double
-speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
+speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
+               struct speed_params *s)
 {
-#define TOLERANCE    1.01  /* 1% */
+#define TOLERANCE    1.005  /* 0.5% */
   const int max_zeros = 10;
 
   struct speed_params  s_dummy;
@@ -157,77 +147,74 @@ speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
   for (i = 0; i < numberof (t); i++)
     {
       for (;;)
-	{
-	  s->src_num = 0;
-	  s->dst_num = 0;
-
-	  t[i] = (*fun) (s);
-
-	  if (speed_option_verbose >= 3)
-	    gmp_printf("size=%ld reps=%u r=%Md attempt=%d  %.9f\n",
-		       (long) s->size, s->reps, s->r, i, t[i]);
-
-	  if (t[i] == 0.0)
-	    {
-	      zeros++;
-	      if (zeros > max_zeros)
-		{
-		  fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
-		  abort ();
-		}
-	     if (s->reps < 10000)
-	       s->reps *= 2;
-
-	      continue;
-	    }
-
-	  if (t[i] == -1.0)
-	    return -1.0;
-
-	  if (t[i] >= speed_unittime * speed_precision)
-	    break;
-
-	  /* go to a value of reps to make t[i] >= precision */
-	  reps_d = ceil (1.1 * s->reps
-			 * speed_unittime * speed_precision
-			 / MAX (t[i], speed_unittime));
-	  if (reps_d > 2e9 || reps_d < 1.0)
-	    {
-	      fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
-	      fprintf (stderr, "  (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
-		       s->reps, speed_unittime, speed_precision, t[i]);
-	      abort ();
-	    }
-	  s->reps = (unsigned) reps_d;
-	}
+        {
+          s->src_num = 0;
+          s->dst_num = 0;
+
+          t[i] = (*fun) (s);
+
+          if (speed_option_verbose >= 3)
+            gmp_printf("size=%ld reps=%u r=%Md attempt=%d  %.9f\n",
+                       (long) s->size, s->reps, s->r, i, t[i]);
+
+          if (t[i] == 0.0)
+            {
+              zeros++;
+              if (zeros > max_zeros)
+                {
+                  fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
+                  abort ();
+                }
+              continue;
+            }
+
+          if (t[i] == -1.0)
+            return -1.0;
+
+          if (t[i] >= speed_unittime * speed_precision)
+            break;
+
+          /* go to a value of reps to make t[i] >= precision */
+          reps_d = ceil (1.1 * s->reps
+                         * speed_unittime * speed_precision
+                         / MAX (t[i], speed_unittime));
+          if (reps_d > 2e9 || reps_d < 1.0)
+            {
+              fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
+              fprintf (stderr, "  (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
+                       s->reps, speed_unittime, speed_precision, t[i]);
+              abort ();
+            }
+          s->reps = (unsigned) reps_d;
+        }
       t[i] /= s->reps;
       t_unsorted[i] = t[i];
 
       if (speed_precision == 0)
-	return t[i];
+        return t[i];
 
       /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
       if (t[0] >= 2.0)
-	e = 3;
+        e = 3;
       else
-	e = 4;
+        e = 4;
 
       /* Look for e many t[]'s within TOLERANCE of each other to consider a
-	 valid measurement.  Return smallest among them.  */
+         valid measurement.  Return smallest among them.  */
       if (i >= e)
-	{
-	  qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
-	  for (j = e-1; j < i; j++)
-	    if (t[j] <= t[j-e+1] * TOLERANCE)
-	      return t[j-e+1] / s->time_divisor;
-	}
+        {
+          qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
+          for (j = e-1; j < i; j++)
+            if (t[j] <= t[j-e+1] * TOLERANCE)
+              return t[j-e+1] / s->time_divisor;
+        }
     }
 
   fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
-	   e, (TOLERANCE-1.0)*100.0);
+           e, (TOLERANCE-1.0)*100.0);
   fprintf (stderr, "    unsorted         sorted\n");
   fprintf (stderr, "  %.12f    %.12f    is about 0.5%%\n",
-	   t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
+           t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
   for (i = 0; i < numberof (t); i++)
     fprintf (stderr, "  %.09f       %.09f\n", t_unsorted[i], t[i]);
 
@@ -318,30 +305,30 @@ speed_cache_fill (struct speed_params *s)
 
       different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num);
       for (i = 0; i < s->dst_num; i++)
-	different |= (s->dst[i].ptr != prev.dst[i].ptr);
+        different |= (s->dst[i].ptr != prev.dst[i].ptr);
       for (i = 0; i < s->src_num; i++)
-	different |= (s->src[i].ptr != prev.src[i].ptr);
+        different |= (s->src[i].ptr != prev.src[i].ptr);
 
       if (different)
-	{
-	  if (s->dst_num != 0)
-	    {
-	      printf ("dst");
-	      for (i = 0; i < s->dst_num; i++)
-		printf (" %08lX", (unsigned long) s->dst[i].ptr);
-	      printf (" ");
-	    }
-
-	  if (s->src_num != 0)
-	    {
-	      printf ("src");
-	      for (i = 0; i < s->src_num; i++)
-		printf (" %08lX", (unsigned long) s->src[i].ptr);
-	      printf (" ");
-	    }
-	  printf ("  (cf sp approx %08lX)\n", (unsigned long) &different);
-
-	}
+        {
+          if (s->dst_num != 0)
+            {
+              printf ("dst");
+              for (i = 0; i < s->dst_num; i++)
+                printf (" %08lX", (unsigned long) s->dst[i].ptr);
+              printf (" ");
+            }
+
+          if (s->src_num != 0)
+            {
+              printf ("src");
+              for (i = 0; i < s->src_num; i++)
+                printf (" %08lX", (unsigned long) s->src[i].ptr);
+              printf (" ");
+            }
+          printf ("  (cf sp approx %08lX)\n", (unsigned long) &different);
+
+        }
 
       memcpy (&prev, s, sizeof(prev));
     }
@@ -360,7 +347,7 @@ speed_cache_fill (struct speed_params *s)
 }
 
 
-/* Miscellaneous options accepted by tune and speed programs under -o. */
+/* Miscellanous options accepted by tune and speed programs under -o. */
 
 void
 speed_option_set (const char *s)
@@ -379,10 +366,6 @@ speed_option_set (const char *s)
     {
       speed_option_verbose = n;
     }
-  else if (strcmp (s, "cycles-broken") == 0)
-    {
-      speed_option_cycles_broken = 1;
-    }
   else
     {
       printf ("Unrecognised -o option: %s\n", s);
@@ -419,10 +402,10 @@ speed_option_set (const char *s)
    code on most CPUs, thereby minimizing overhead in the measurement.  It
    can always be assumed s->reps >= 1.
 
-	  i = s->reps
-	  do
-	    foo();
-	  while (--i != 0);
+          i = s->reps
+          do
+            foo();
+          while (--i != 0);
 
    Additional parameters might be added to "struct speed_params" in the
    future.  Routines should ignore anything they don't use.
@@ -469,14 +452,9 @@ speed_memcpy (struct speed_params *s)
   SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);
 }
 double
-speed_mpn_com (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_com);
-}
-double
-speed_mpn_sec_tabselect (struct speed_params *s)
+speed_mpn_com_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TABSELECT (mpn_sec_tabselect);
+  SPEED_ROUTINE_MPN_COPY (mpn_com_n);
 }
 
 
@@ -573,20 +551,6 @@ speed_mpn_mul_4 (struct speed_params *s)
   SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
 }
 #endif
-#if HAVE_NATIVE_mpn_mul_5
-double
-speed_mpn_mul_5 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5);
-}
-#endif
-#if HAVE_NATIVE_mpn_mul_6
-double
-speed_mpn_mul_6 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6);
-}
-#endif
 
 
 double
@@ -595,11 +559,6 @@ speed_mpn_lshift (struct speed_params *s)
   SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift);
 }
 double
-speed_mpn_lshiftc (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc);
-}
-double
 speed_mpn_rshift (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift);
@@ -699,39 +658,6 @@ speed_mpn_divrem_2_inv (struct speed_params *s)
 }
 
 double
-speed_mpn_div_qr_1n_pi1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1);
-}
-double
-speed_mpn_div_qr_1n_pi1_1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_1);
-}
-double
-speed_mpn_div_qr_1n_pi1_2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_2);
-}
-
-double
-speed_mpn_div_qr_1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1);
-}
-
-double
-speed_mpn_div_qr_2n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1);
-}
-double
-speed_mpn_div_qr_2u (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0);
-}
-
-double
 speed_mpn_mod_1 (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1);
@@ -748,36 +674,6 @@ speed_mpn_preinv_mod_1 (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);
 }
-double
-speed_mpn_mod_1_1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
-}
-double
-speed_mpn_mod_1_1_1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
-}
-double
-speed_mpn_mod_1_1_2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
-}
-double
-speed_mpn_mod_1_2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
-}
-double
-speed_mpn_mod_1_3 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3);
-}
-double
-speed_mpn_mod_1_4 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4);
-}
 
 double
 speed_mpn_divexact_1 (struct speed_params *s)
@@ -797,18 +693,6 @@ speed_mpn_bdiv_dbm1c (struct speed_params *s)
   SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c);
 }
 
-double
-speed_mpn_bdiv_q_1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1);
-}
-
-double
-speed_mpn_pi1_bdiv_q_1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1);
-}
-
 #if HAVE_NATIVE_mpn_modexact_1_odd
 double
 speed_mpn_modexact_1_odd (struct speed_params *s)
@@ -823,145 +707,59 @@ speed_mpn_modexact_1c_odd (struct speed_params *s)
   SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);
 }
 
-double
-speed_mpz_mod (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPZ_MOD (mpz_mod);
-}
 
 double
-speed_mpn_sbpi1_div_qr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0);
-}
-double
-speed_mpn_dcpi1_div_qr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3);
-}
-double
-speed_mpn_sbpi1_divappr_q (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0);
-}
-double
-speed_mpn_dcpi1_divappr_q (struct speed_params *s)
+speed_mpn_dc_tdiv_qr (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3);
+  SPEED_ROUTINE_MPN_DC_TDIV_QR (mpn_tdiv_qr);
 }
 double
-speed_mpn_mu_div_qr (struct speed_params *s)
+speed_mpn_dc_divrem_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch);
+  SPEED_ROUTINE_MPN_DC_DIVREM_N (mpn_dc_divrem_n);
 }
 double
-speed_mpn_mu_divappr_q (struct speed_params *s)
+speed_mpn_dc_divrem_sb (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch);
+  SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn);
 }
 double
-speed_mpn_mu_div_q (struct speed_params *s)
+speed_mpn_dc_divrem_sb_div (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch);
+  SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_div);
 }
 double
-speed_mpn_mupi_div_qr (struct speed_params *s)
+speed_mpn_dc_divrem_sb_inv (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch);
+  SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_inv);
 }
 
 double
-speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
+speed_mpn_sb_divrem_m3 (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
+  SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn);
 }
 double
-speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
+speed_mpn_sb_divrem_m3_div (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
+  SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_div);
 }
 double
-speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
+speed_mpn_sb_divrem_m3_inv (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
-}
-double
-speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
-}
-double
-speed_mpn_mu_bdiv_q (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch);
-}
-double
-speed_mpn_mu_bdiv_qr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
-}
-
-double
-speed_mpn_broot (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BROOT (mpn_broot);
-}
-double
-speed_mpn_broot_invm1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
-}
-double
-speed_mpn_brootinv (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+  SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_inv);
 }
 
 double
-speed_mpn_binvert (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
-}
-
-double
-speed_mpn_invert (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch);
-}
-
-double
-speed_mpn_invertappr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch);
-}
-
-double
-speed_mpn_ni_invertappr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch);
-}
-
-double
-speed_mpn_sec_invert (struct speed_params *s)
+speed_mpz_mod (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_SEC_INVERT (mpn_sec_invert, mpn_sec_invert_itch);
+  SPEED_ROUTINE_MPZ_MOD (mpz_mod);
 }
-
 double
 speed_mpn_redc_1 (struct speed_params *s)
 {
   SPEED_ROUTINE_REDC_1 (mpn_redc_1);
 }
-double
-speed_mpn_redc_2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_REDC_2 (mpn_redc_2);
-}
-double
-speed_mpn_redc_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_REDC_N (mpn_redc_n);
-}
 
 
 double
@@ -987,172 +785,28 @@ speed_mpn_sub_n (struct speed_params *s)
 SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
 }
 
+#if HAVE_NATIVE_mpn_addsub_n
 double
-speed_mpn_add_err1_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
-}
-double
-speed_mpn_sub_err1_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
-}
-double
-speed_mpn_add_err2_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
-}
-double
-speed_mpn_sub_err2_n (struct speed_params *s)
+speed_mpn_addsub_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
-}
-double
-speed_mpn_add_err3_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
-}
-double
-speed_mpn_sub_err3_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
-}
-
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-double
-speed_mpn_add_n_sub_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size));
+  SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_addsub_n (ap, sp, s->xp, s->yp, s->size));
 }
 #endif
 
-#if HAVE_NATIVE_mpn_addlsh1_n == 1
+#if HAVE_NATIVE_mpn_addlsh1_n
 double
 speed_mpn_addlsh1_n (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n);
 }
 #endif
-#if HAVE_NATIVE_mpn_sublsh1_n == 1
+#if HAVE_NATIVE_mpn_sublsh1_n
 double
 speed_mpn_sublsh1_n (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
 }
 #endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip1
-double
-speed_mpn_addlsh1_n_ip1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip2
-double
-speed_mpn_addlsh1_n_ip2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2);
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh1_n_ip1
-double
-speed_mpn_sublsh1_n_ip1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_rsblsh1_n == 1
-double
-speed_mpn_rsblsh1_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n == 1
-double
-speed_mpn_addlsh2_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n == 1
-double
-speed_mpn_sublsh2_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip1
-double
-speed_mpn_addlsh2_n_ip1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip2
-double
-speed_mpn_addlsh2_n_ip2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2);
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-double
-speed_mpn_sublsh2_n_ip1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_rsblsh2_n == 1
-double
-speed_mpn_rsblsh2_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n
-double
-speed_mpn_addlsh_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n
-double
-speed_mpn_sublsh_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip1
-double
-speed_mpn_addlsh_n_ip1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip2
-double
-speed_mpn_addlsh_n_ip2 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n_ip1
-double
-speed_mpn_sublsh_n_ip1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_rsblsh_n
-double
-speed_mpn_rsblsh_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7));
-}
-#endif
 #if HAVE_NATIVE_mpn_rsh1add_n
 double
 speed_mpn_rsh1add_n (struct speed_params *s)
@@ -1168,58 +822,47 @@ speed_mpn_rsh1sub_n (struct speed_params *s)
 }
 #endif
 
-double
-speed_mpn_cnd_add_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_add_n (1, wp, xp, yp, s->size));
-}
-double
-speed_mpn_cnd_sub_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_sub_n (1, wp, xp, yp, s->size));
-}
-
 /* mpn_and_n etc can be macros and so have to be handled with
    SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
 double
 speed_mpn_and_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_andn_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size));
+SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_nand_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_ior_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size));
+SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_iorn_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_nior_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_xor_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size));
 }
 double
 speed_mpn_xnor_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size));
+  SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size));
 }
 
 
@@ -1229,9 +872,9 @@ speed_mpn_mul_n (struct speed_params *s)
   SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n);
 }
 double
-speed_mpn_sqr (struct speed_params *s)
+speed_mpn_sqr_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_SQR (mpn_sqr);
+  SPEED_ROUTINE_MPN_SQR (mpn_sqr_n);
 }
 double
 speed_mpn_mul_n_sqr (struct speed_params *s)
@@ -1264,141 +907,29 @@ speed_mpn_sqr_diagonal (struct speed_params *s)
 }
 #endif
 
-#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
-double
-speed_mpn_sqr_diag_addlsh1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size));
-}
-#endif
-
-double
-speed_mpn_toom2_sqr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr);
-}
-double
-speed_mpn_toom3_sqr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr);
-}
-double
-speed_mpn_toom4_sqr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
-}
-double
-speed_mpn_toom6_sqr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
-}
-double
-speed_mpn_toom8_sqr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr);
-}
-double
-speed_mpn_toom22_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
-}
-double
-speed_mpn_toom33_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
-}
 double
-speed_mpn_toom44_mul (struct speed_params *s)
+speed_mpn_kara_mul_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
+  SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n);
 }
 double
-speed_mpn_toom6h_mul (struct speed_params *s)
+speed_mpn_kara_sqr_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
-}
-double
-speed_mpn_toom8h_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
+  SPEED_ROUTINE_MPN_KARA_SQR_N (mpn_kara_sqr_n);
 }
 
 double
-speed_mpn_toom32_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom42_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
-}
-double
-speed_mpn_toom43_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom63_mul (struct speed_params *s)
+speed_mpn_toom3_mul_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
+  SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n);
 }
 double
-speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
+speed_mpn_toom3_sqr_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
-}
-double
-speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
-}
-double
-speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
-}
-double
-speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
+  SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n);
 }
 
 double
-speed_mpn_nussbaumer_mul (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MUL_N_CALL
-    (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size));
-}
-double
-speed_mpn_nussbaumer_mul_sqr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_SQR_CALL
-    (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size));
-}
-
-#if WANT_OLD_FFT_FULL
-double
 speed_mpn_mul_fft_full (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_MUL_N_CALL
@@ -1410,7 +941,7 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s)
   SPEED_ROUTINE_MPN_SQR_CALL
     (mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size));
 }
-#endif
+
 
 /* These are mod 2^N+1 multiplies and squares.  If s->r is supplied it's
    used as k, otherwise the best k for the size is used.  If s->size isn't a
@@ -1424,31 +955,31 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s)
     unsigned   i;                                       \
     double     t;                                       \
     TMP_DECL;                                           \
-							\
+                                                        \
     SPEED_RESTRICT_COND (s->size >= 1);                 \
-							\
+                                                        \
     if (s->r != 0)                                      \
       k = s->r;                                         \
     else                                                \
       k = mpn_fft_best_k (s->size, sqr);                \
-							\
+                                                        \
     TMP_MARK;                                           \
     pl = mpn_fft_next_size (s->size, k);                \
     SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp);      \
-							\
+                                                        \
     speed_operand_src (s, s->xp, s->size);              \
     if (!sqr)                                           \
       speed_operand_src (s, s->yp, s->size);            \
     speed_operand_dst (s, wp, pl+1);                    \
     speed_cache_fill (s);                               \
-							\
+                                                        \
     speed_starttime ();                                 \
     i = s->reps;                                        \
     do                                                  \
       call;                                             \
     while (--i != 0);                                   \
     t = speed_endtime ();                               \
-							\
+                                                        \
     TMP_FREE;                                           \
     return t;                                           \
   }
@@ -1468,174 +999,179 @@ speed_mpn_mul_fft_sqr (struct speed_params *s)
 }
 
 double
-speed_mpn_fft_mul (struct speed_params *s)
+speed_mpn_mullow_n (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size));
+  SPEED_ROUTINE_MPN_MULLOW_N (mpn_mullow_n);
 }
-
 double
-speed_mpn_fft_sqr (struct speed_params *s)
+speed_mpn_mullow_basecase (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size));
+  SPEED_ROUTINE_MPN_MULLOW_BASECASE (mpn_mullow_basecase);
 }
 
 double
-speed_mpn_mullo_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n);
-}
-double
-speed_mpn_mullo_basecase (struct speed_params *s)
+speed_mpn_matrix22_mul (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
-}
+  /* Speed params only includes 2 inputs, so we have to invent the
+     other 6. */
 
-double
-speed_mpn_mulmid_basecase (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
-}
+  mp_ptr a1, a2, a3;
+  mp_ptr r0, r1, r2, r3;
+  mp_ptr b1, b2, b3;
+  mp_ptr tp;
+  mp_size_t scratch;
+  unsigned i;
+  double t;
+  TMP_DECL;
 
-double
-speed_mpn_mulmid (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
-}
+  TMP_MARK;
+  SPEED_TMP_ALLOC_LIMBS (a1, s->size, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (a2, s->size, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (a3, s->size, s->align_xp);
 
-double
-speed_mpn_mulmid_n (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
-}
+  SPEED_TMP_ALLOC_LIMBS (b1, s->size, s->align_yp);
+  SPEED_TMP_ALLOC_LIMBS (b2, s->size, s->align_yp);
+  SPEED_TMP_ALLOC_LIMBS (b3, s->size, s->align_yp);
 
-double
-speed_mpn_toom42_mulmid (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
-}
+  SPEED_TMP_ALLOC_LIMBS (r0, 2 * s->size +1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (r1, 2 * s->size +1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (r2, 2 * s->size +1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (r3, 2 * s->size +1, s->align_xp);
 
-double
-speed_mpn_mulmod_bnm1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp));
-}
+  mpn_random (a1, s->size);
+  mpn_random (a2, s->size);
+  mpn_random (a3, s->size);
+  mpn_random (b1, s->size);
+  mpn_random (b2, s->size);
+  mpn_random (b3, s->size);
 
-double
-speed_mpn_bc_mulmod_bnm1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp));
-}
+  scratch = mpn_matrix22_mul_itch (s->size, s->size);
+  SPEED_TMP_ALLOC_LIMBS (tp, scratch, s->align_wp);
 
-double
-speed_mpn_mulmod_bnm1_rounded (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1);
-}
-
-double
-speed_mpn_sqrmod_bnm1 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp));
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      MPN_COPY (r0, s->xp, s->size);
+      MPN_COPY (r1, a1, s->size);
+      MPN_COPY (r2, a2, s->size);
+      MPN_COPY (r3, a3, s->size);
+      mpn_matrix22_mul (r0, r1, r2, r3, s->size, s->yp, b1, b2, b3, s->size, tp);
+    }
+  while (--i != 0);
+  t = speed_endtime();
+  TMP_FREE;
+  return t;
 }
 
 double
-speed_mpn_matrix22_mul (struct speed_params *s)
+speed_mpn_hgcd (struct speed_params *s)
 {
-  /* Speed params only includes 2 inputs, so we have to invent the
-     other 6. */
+  mp_ptr wp;
+  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
+  mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr tmp1, tmp2;
 
-  mp_ptr a;
-  mp_ptr r;
-  mp_ptr b;
-  mp_ptr tp;
-  mp_size_t itch;
+  struct hgcd_matrix hgcd;
+  int res;
   unsigned i;
   double t;
   TMP_DECL;
 
+  if (s->size < 2)
+    return -1;
+
   TMP_MARK;
-  SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp);
-  SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp);
-  SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp);
 
-  MPN_COPY (a, s->xp, s->size);
-  mpn_random (a + s->size, 3 * s->size);
-  MPN_COPY (b, s->yp, s->size);
-  mpn_random (b + s->size, 3 * s->size);
+  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
 
-  itch = mpn_matrix22_mul_itch (s->size, s->size);
-  SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
+  s->xp[s->size - 1] |= 1;
+  s->yp[s->size - 1] |= 1;
 
-  speed_operand_src (s, a, 4 * s->size);
-  speed_operand_src (s, b, 4 * s->size);
-  speed_operand_dst (s, r, 8 * s->size + 4);
-  speed_operand_dst (s, tp, itch);
-  speed_cache_fill (s);
+  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
+  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
 
   speed_starttime ();
   i = s->reps;
   do
     {
-      mp_size_t sz = s->size;
-      MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz);
-      MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz);
-      MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz);
-      MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz);
-      mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz,
-			b, b + 1 * sz,     b + 2 * sz,     b + 3 * sz,     sz,
-			tp);
+      MPN_COPY (ap, s->xp, s->size);
+      MPN_COPY (bp, s->yp, s->size);
+      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
+      res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
     }
   while (--i != 0);
-  t = speed_endtime();
+  t = speed_endtime ();
   TMP_FREE;
   return t;
 }
 
 double
-speed_mpn_hgcd (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
-}
-
-double
 speed_mpn_hgcd_lehmer (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
-}
+  mp_ptr wp;
+  mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
+  mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr tmp1, tmp2;
 
-double
-speed_mpn_hgcd_appr (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
-}
+  struct hgcd_matrix hgcd;
+  int res;
+  unsigned i;
+  double t;
+  TMP_DECL;
 
-double
-speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+  if (s->size < 2)
+    return -1;
+
+  TMP_MARK;
+
+  SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
+  SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
+
+  s->xp[s->size - 1] |= 1;
+  s->yp[s->size - 1] |= 1;
+
+  SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
+  SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
+
+  speed_starttime ();
+  i = s->reps;
+  do
+    {
+      MPN_COPY (ap, s->xp, s->size);
+      MPN_COPY (bp, s->yp, s->size);
+      mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
+      res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
+    }
+  while (--i != 0);
+  t = speed_endtime ();
+  TMP_FREE;
+  return t;
 }
 
 double
-speed_mpn_hgcd_reduce (struct speed_params *s)
+speed_mpn_gcd (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+  SPEED_ROUTINE_MPN_GCD (mpn_gcd);
 }
+#if 0
 double
-speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+speed_mpn_gcd_binary (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+  SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary);
 }
 double
-speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+speed_mpn_gcd_accel (struct speed_params *s)
 {
-  SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+  SPEED_ROUTINE_MPN_GCD (mpn_gcd_accel);
 }
+#endif
 
-double
-speed_mpn_gcd (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_GCD (mpn_gcd);
-}
 
 double
 speed_mpn_gcdext (struct speed_params *s)
@@ -1706,11 +1242,6 @@ speed_mpn_jacobi_base_3 (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
 }
-double
-speed_mpn_jacobi_base_4 (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);
-}
 
 
 double
@@ -1776,11 +1307,6 @@ speed_mpz_powm_redc (struct speed_params *s)
   SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
 }
 double
-speed_mpz_powm_sec (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec);
-}
-double
 speed_mpz_powm_ui (struct speed_params *s)
 {
   SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
@@ -1856,12 +1382,12 @@ speed_noop_wxys (struct speed_params *s)
   {                                                     \
     unsigned  i;                                        \
     variables;                                          \
-							\
+                                                        \
     speed_starttime ();                                 \
     i = s->reps;                                        \
     do                                                  \
       {                                                 \
-	calls;                                          \
+        calls;                                          \
       }                                                 \
     while (--i != 0);                                   \
     return speed_endtime ();                            \
@@ -1876,39 +1402,39 @@ speed_noop_wxys (struct speed_params *s)
 double
 speed_malloc_free (struct speed_params *s)
 {
-  size_t  bytes = s->size * GMP_LIMB_BYTES;
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
   SPEED_ROUTINE_ALLOC_FREE (void *p,
-			    p = malloc (bytes);
-			    free (p));
+                            p = malloc (bytes);
+                            free (p));
 }
 
 double
 speed_malloc_realloc_free (struct speed_params *s)
 {
-  size_t  bytes = s->size * GMP_LIMB_BYTES;
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
   SPEED_ROUTINE_ALLOC_FREE (void *p,
-			    p = malloc (GMP_LIMB_BYTES);
-			    p = realloc (p, bytes);
-			    free (p));
+                            p = malloc (BYTES_PER_MP_LIMB);
+                            p = realloc (p, bytes);
+                            free (p));
 }
 
 double
 speed_gmp_allocate_free (struct speed_params *s)
 {
-  size_t  bytes = s->size * GMP_LIMB_BYTES;
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
   SPEED_ROUTINE_ALLOC_FREE (void *p,
-			    p = (*__gmp_allocate_func) (bytes);
-			    (*__gmp_free_func) (p, bytes));
+                            p = (*__gmp_allocate_func) (bytes);
+                            (*__gmp_free_func) (p, bytes));
 }
 
 double
 speed_gmp_allocate_reallocate_free (struct speed_params *s)
 {
-  size_t  bytes = s->size * GMP_LIMB_BYTES;
+  size_t  bytes = s->size * BYTES_PER_MP_LIMB;
   SPEED_ROUTINE_ALLOC_FREE
     (void *p,
-     p = (*__gmp_allocate_func) (GMP_LIMB_BYTES);
-     p = (*__gmp_reallocate_func) (p, bytes, GMP_LIMB_BYTES);
+     p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+     p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB);
      (*__gmp_free_func) (p, bytes));
 }
 
@@ -1916,38 +1442,38 @@ double
 speed_mpz_init_clear (struct speed_params *s)
 {
   SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
-			    mpz_init (z);
-			    mpz_clear (z));
+                            mpz_init (z);
+                            mpz_clear (z));
 }
 
 double
 speed_mpz_init_realloc_clear (struct speed_params *s)
 {
   SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
-			    mpz_init (z);
-			    _mpz_realloc (z, s->size);
-			    mpz_clear (z));
+                            mpz_init (z);
+                            _mpz_realloc (z, s->size);
+                            mpz_clear (z));
 }
 
 double
 speed_mpq_init_clear (struct speed_params *s)
 {
   SPEED_ROUTINE_ALLOC_FREE (mpq_t q,
-			    mpq_init (q);
-			    mpq_clear (q));
+                            mpq_init (q);
+                            mpq_clear (q));
 }
 
 double
 speed_mpf_init_clear (struct speed_params *s)
 {
   SPEED_ROUTINE_ALLOC_FREE (mpf_t f,
-			    mpf_init (f);
-			    mpf_clear (f));
+                            mpf_init (f);
+                            mpf_clear (f));
 }
 
 
 /* Compare this to mpn_add_n to see how much overhead mpz_add adds.  Note
-   that repeatedly calling mpz_add with the same data gives branch prediction
+   that repeatedly calling mpz_add with the same data gives branch predition
    in it an advantage.  */
 
 double
@@ -2011,40 +1537,6 @@ speed_mpz_bin_uiui (struct speed_params *s)
   return t;
 }
 
-/* If r==0, calculate binomial(2^size,size),
-   otherwise calculate binomial(2^size,r). */
-
-double
-speed_mpz_bin_ui (struct speed_params *s)
-{
-  mpz_t          w, x;
-  unsigned long  k;
-  unsigned  i;
-  double    t;
-
-  mpz_init (w);
-  mpz_init_set_ui (x, 0);
-
-  mpz_setbit (x, s->size);
-
-  if (s->r != 0)
-    k = s->r;
-  else
-    k = s->size;
-
-  speed_starttime ();
-  i = s->reps;
-  do
-    {
-      mpz_bin_ui (w, x, k);
-    }
-  while (--i != 0);
-  t = speed_endtime ();
-
-  mpz_clear (w);
-  mpz_clear (x);
-  return t;
-}
 
 /* The multiplies are successively dependent so the latency is measured, not
    the issue rate.  There's only 10 per loop so the code doesn't get too big
@@ -2072,41 +1564,41 @@ speed_mpz_bin_ui (struct speed_params *s)
     mp_limb_t  h, l;            \
     unsigned   i;               \
     double     t;               \
-				\
+                                \
     s->time_divisor = 10;       \
-				\
+                                \
     h = s->xp[0];               \
     l = s->yp[0];               \
-				\
+                                \
     if (s->r == 1)              \
       {                         \
-	speed_starttime ();     \
-	i = s->reps;            \
-	do                      \
-	  {
+        speed_starttime ();     \
+        i = s->reps;            \
+        do                      \
+          {
 
 #define SPEED_MACRO_UMUL_PPMM_B \
-	  }                     \
-	while (--i != 0);       \
-	t = speed_endtime ();   \
+          }                     \
+        while (--i != 0);       \
+        t = speed_endtime ();   \
       }                         \
     else                        \
       {                         \
-	speed_starttime ();     \
-	i = s->reps;            \
-	do                      \
-	  {
+        speed_starttime ();     \
+        i = s->reps;            \
+        do                      \
+          {
 
 #define SPEED_MACRO_UMUL_PPMM_C                                         \
-	  }                                                             \
-	while (--i != 0);                                               \
-	t = speed_endtime ();                                           \
+          }                                                             \
+        while (--i != 0);                                               \
+        t = speed_endtime ();                                           \
       }                                                                 \
-									\
+                                                                        \
     /* stop the compiler optimizing away the whole calculation! */      \
     noop_1 (h);                                                         \
     noop_1 (l);                                                         \
-									\
+                                                                        \
     return t;                                                           \
   }
 
@@ -2242,25 +1734,25 @@ speed_mpn_umul_ppmm_r (struct speed_params *s)
     unsigned   i;                                       \
     mp_limb_t  q, r, d;                                 \
     mp_limb_t  dinv;                                    \
-							\
+                                                        \
     s->time_divisor = 10;                               \
-							\
+                                                        \
     /* divisor from "r" parameter, or a default */      \
     d = s->r;                                           \
     if (d == 0)                                         \
-      d = mp_bases[10].big_base;                        \
-							\
+      d = __mp_bases[10].big_base;                      \
+                                                        \
     if (normalize)                                      \
       {                                                 \
-	unsigned  norm;                                 \
-	count_leading_zeros (norm, d);                  \
-	d <<= norm;                                     \
-	invert_limb (dinv, d);                          \
+        unsigned  norm;                                 \
+        count_leading_zeros (norm, d);                  \
+        d <<= norm;                                     \
+        invert_limb (dinv, d);                          \
       }                                                 \
-							\
+                                                        \
     q = s->xp[0];                                       \
     r = s->yp[0] % d;                                   \
-							\
+                                                        \
     speed_starttime ();                                 \
     i = s->reps;                                        \
     do                                                  \
@@ -2270,11 +1762,11 @@ speed_mpn_umul_ppmm_r (struct speed_params *s)
       }                                                                 \
     while (--i != 0);                                                   \
     t = speed_endtime ();                                               \
-									\
+                                                                        \
     /* stop the compiler optimizing away the whole calculation! */      \
     noop_1 (q);                                                         \
     noop_1 (r);                                                         \
-									\
+                                                                        \
     return t;                                                           \
   }
 
@@ -2298,6 +1790,44 @@ speed_udiv_qrnnd (struct speed_params *s)
 }
 
 double
+speed_udiv_qrnnd_preinv1 (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
+speed_udiv_qrnnd_preinv2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_UDIV_QRNND_A (1);
+  {
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+     udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+    udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+  }
+  SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
 speed_udiv_qrnnd_c (struct speed_params *s)
 {
   SPEED_ROUTINE_UDIV_QRNND_A (1);
@@ -2380,7 +1910,7 @@ speed_operator_div (struct speed_params *s)
   /* divisor from "r" parameter, or a default */
   d = s->r;
   if (d == 0)
-    d = mp_bases[10].big_base;
+    d = __mp_bases[10].big_base;
 
   x = s->xp[0];
   q = 0;
@@ -2421,7 +1951,7 @@ speed_operator_mod (struct speed_params *s)
   /* divisor from "r" parameter, or a default */
   d = s->r;
   if (d == 0)
-    d = mp_bases[10].big_base;
+    d = __mp_bases[10].big_base;
 
   x = s->xp[0];
   r = 0;
@@ -2455,12 +1985,12 @@ speed_operator_mod (struct speed_params *s)
    be typical for count_trailing_zeros in a GCD etc.
 
    r==1 measures on data with the resultant count uniformly distributed
-   between 0 and GMP_LIMB_BITS-1.  This is probably sensible for
+   between 0 and BITS_PER_MP_LIMB-1.  This is probably sensible for
    count_leading_zeros on the high limbs of divisors.  */
 
 int
 speed_routine_count_zeros_setup (struct speed_params *s,
-				 mp_ptr xp, int leading, int zero)
+                                 mp_ptr xp, int leading, int zero)
 {
   int        i, c;
   mp_limb_t  n;
@@ -2468,26 +1998,26 @@ speed_routine_count_zeros_setup (struct speed_params *s,
   if (s->r == 0)
     {
       /* Make uniformly distributed data.  If zero isn't allowed then change
-	 it to 1 for leading, or 0x800..00 for trailing.  */
+         it to 1 for leading, or 0x800..00 for trailing.  */
       MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);
       if (! zero)
-	for (i = 0; i < SPEED_BLOCK_SIZE; i++)
-	  if (xp[i] == 0)
-	    xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
+        for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+          if (xp[i] == 0)
+            xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
     }
   else if (s->r == 1)
     {
       /* Make counts uniformly distributed.  A randomly chosen bit is set, and
-	 for leading the rest above it are cleared, or for trailing then the
-	 rest below.  */
+         for leading the rest above it are cleared, or for trailing then the
+         rest below.  */
       for (i = 0; i < SPEED_BLOCK_SIZE; i++)
-	{
-	  mp_limb_t  set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS);
-	  mp_limb_t  keep_below = set-1;
-	  mp_limb_t  keep_above = MP_LIMB_T_MAX ^ keep_below;
-	  mp_limb_t  keep = (leading ? keep_below : keep_above);
-	  xp[i] = (s->xp_block[i] & keep) | set;
-	}
+        {
+          mp_limb_t  set = CNST_LIMB(1) << (s->yp_block[i] % BITS_PER_MP_LIMB);
+          mp_limb_t  keep_below = set-1;
+          mp_limb_t  keep_above = MP_LIMB_T_MAX ^ keep_below;
+          mp_limb_t  keep = (leading ? keep_below : keep_above);
+          xp[i] = (s->xp_block[i] & keep) | set;
+        }
     }
   else
     {
@@ -2502,9 +2032,9 @@ speed_routine_count_zeros_setup (struct speed_params *s,
       xp[i] ^= c;
 
       if (leading)
-	count_leading_zeros (c, n);
+        count_leading_zeros (c, n);
       else
-	count_trailing_zeros (c, n);
+        count_trailing_zeros (c, n);
     }
 
   return 1;
@@ -2620,7 +2150,7 @@ speed_gmp_randseed_ui (struct speed_params *s)
       gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]);
       j++;
       if (j >= SPEED_BLOCK_SIZE)
-	j = 0;
+        j = 0;
     }
   while (--i != 0);
   t = speed_endtime ();
diff --git a/gmp/tune/div_qr_1_tune.c b/gmp/tune/div_qr_1_tune.c
deleted file mode 100644
index 7e928dcce9..0000000000
--- a/gmp/tune/div_qr_1_tune.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* mpn/generic/div_qr_1, using tuned threshold and method.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define TUNE_PROGRAM_BUILD 1
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-
-#if !HAVE_NATIVE_mpn_div_qr_1n_pi1
-#define __gmpn_div_qr_1n_pi1 \
-  (div_qr_1n_pi1_method == 1 ? mpn_div_qr_1n_pi1_1 : mpn_div_qr_1n_pi1_2)
-#endif
-
-#undef mpn_div_qr_1
-#define mpn_div_qr_1 mpn_div_qr_1_tune
-
-#include "mpn/generic/div_qr_1.c"
diff --git a/gmp/tune/div_qr_1n_pi1_1.c b/gmp/tune/div_qr_1n_pi1_1.c
deleted file mode 100644
index 6dd8ceb438..0000000000
--- a/gmp/tune/div_qr_1n_pi1_1.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* mpn/generic/div_qr_1n_pi1.c method 1.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef DIV_QR_1N_METHOD
-#define DIV_QR_1N_METHOD 1
-#undef mpn_div_qr_1n_pi1
-#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_1
-
-#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp/tune/div_qr_1n_pi1_2.c b/gmp/tune/div_qr_1n_pi1_2.c
deleted file mode 100644
index acc80d4695..0000000000
--- a/gmp/tune/div_qr_1n_pi1_2.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* mpn/generic/div_qr_1n_pi1.c method 2.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef DIV_QR_1N_METHOD
-#define DIV_QR_1N_METHOD 2
-#undef mpn_div_qr_1n_pi1
-#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_2
-
-#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp/tune/divrem1div.c b/gmp/tune/divrem1div.c
index b680f9d222..5580f80578 100644
--- a/gmp/tune/divrem1div.c
+++ b/gmp/tune/divrem1div.c
@@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define OPERATION_divrem_1
 
diff --git a/gmp/tune/divrem1inv.c b/gmp/tune/divrem1inv.c
index 598c03c739..73ed57f411 100644
--- a/gmp/tune/divrem1inv.c
+++ b/gmp/tune/divrem1inv.c
@@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define OPERATION_divrem_1
 
diff --git a/gmp/tune/divrem2div.c b/gmp/tune/divrem2div.c
index cd7f3f5a88..10b50e2f83 100644
--- a/gmp/tune/divrem2div.c
+++ b/gmp/tune/divrem2div.c
@@ -6,28 +6,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/divrem2inv.c b/gmp/tune/divrem2inv.c
index bd7c4268f7..05644b2560 100644
--- a/gmp/tune/divrem2inv.c
+++ b/gmp/tune/divrem2inv.c
@@ -6,28 +6,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/freq.c b/gmp/tune/freq.c
index 210f42564e..f1092e2640 100644
--- a/gmp/tune/freq.c
+++ b/gmp/tune/freq.c
@@ -1,32 +1,21 @@
 /* CPU frequency determination.
 
-Copyright 1999-2004 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* Currently we don't get a CPU frequency on the following systems,
@@ -470,7 +459,7 @@ freq_sunos_sysinfo (int help)
 
 
 /* "/etc/hw -r cpu" for SCO OpenUnix 8, printing a line like
-	The speed of the CPU is approximately 450MHz
+	The speed of the CPU is approximately 450Mhz
  */
 static int
 freq_sco_etchw (int help)
@@ -491,7 +480,7 @@ freq_sco_etchw (int help)
       while (fgets (buf, sizeof (buf), fp) != NULL)
         {
           end = 0;
-          if (sscanf (buf, " The speed of the CPU is approximately %lfMHz%n",
+          if (sscanf (buf, " The speed of the CPU is approximately %lfMhz%n",
                       &val, &end) == 1 && end != 0)
             {
               speed_cycletime = 1e-6 / val;
diff --git a/gmp/tune/gcdext_double.c b/gmp/tune/gcdext_double.c
index c72f07ea9f..5470f1aff5 100644
--- a/gmp/tune/gcdext_double.c
+++ b/gmp/tune/gcdext_double.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/gcdext_single.c b/gmp/tune/gcdext_single.c
index 292e9e87e0..1bc47e786e 100644
--- a/gmp/tune/gcdext_single.c
+++ b/gmp/tune/gcdext_single.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/gcdextod.c b/gmp/tune/gcdextod.c
index c08087d480..957864c5e9 100644
--- a/gmp/tune/gcdextod.c
+++ b/gmp/tune/gcdextod.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/gcdextos.c b/gmp/tune/gcdextos.c
index fb8af29279..afde776f7d 100644
--- a/gmp/tune/gcdextos.c
+++ b/gmp/tune/gcdextos.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/hgcd_appr_lehmer.c b/gmp/tune/hgcd_appr_lehmer.c
deleted file mode 100644
index 790e61e3cb..0000000000
--- a/gmp/tune/hgcd_appr_lehmer.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
-
-/*
-Copyright 2010, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef  HGCD_APPR_THRESHOLD
-#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
-#define __gmpn_hgcd_appr  mpn_hgcd_appr_lehmer
-#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
-
-#include "../mpn/generic/hgcd_appr.c"
diff --git a/gmp/tune/hgcd_lehmer.c b/gmp/tune/hgcd_lehmer.c
deleted file mode 100644
index 11d0ef8821..0000000000
--- a/gmp/tune/hgcd_lehmer.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */
-
-/*
-Copyright 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef  HGCD_THRESHOLD
-#define HGCD_THRESHOLD MP_SIZE_T_MAX
-#define __gmpn_hgcd  mpn_hgcd_lehmer
-#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch
-
-#include "../mpn/generic/hgcd.c"
diff --git a/gmp/tune/hgcd_reduce_1.c b/gmp/tune/hgcd_reduce_1.c
deleted file mode 100644
index 383c2d7009..0000000000
--- a/gmp/tune/hgcd_reduce_1.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
-
-/*
-Copyright 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef  HGCD_REDUCE_THRESHOLD
-#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
-#define __gmpn_hgcd_reduce  mpn_hgcd_reduce_1
-#define __gmpn_hgcd_reduce_itch  mpn_hgcd_reduce_1_itch
-
-
-#include "../mpn/generic/hgcd_reduce.c"
diff --git a/gmp/tune/hgcd_reduce_2.c b/gmp/tune/hgcd_reduce_2.c
deleted file mode 100644
index ac18b6033a..0000000000
--- a/gmp/tune/hgcd_reduce_2.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
-
-/*
-Copyright 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef  HGCD_REDUCE_THRESHOLD
-#define HGCD_REDUCE_THRESHOLD 0
-#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
-#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
-
-#include "../mpn/generic/hgcd_reduce.c"
diff --git a/gmp/tune/hppa.asm b/gmp/tune/hppa.asm
index fc9d62e3b2..e99a399e4a 100644
--- a/gmp/tune/hppa.asm
+++ b/gmp/tune/hppa.asm
@@ -1,32 +1,21 @@
 dnl  HPPA 32-bit time stamp counter access routine.
 
 dnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/hppa2.asm b/gmp/tune/hppa2.asm
index 57ef4c4683..9755c907d2 100644
--- a/gmp/tune/hppa2.asm
+++ b/gmp/tune/hppa2.asm
@@ -1,32 +1,21 @@
 dnl  HPPA 64-bit time stamp counter access routine.
 
 dnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/hppa2w.asm b/gmp/tune/hppa2w.asm
index 215a0cc5c2..ddf0ea9b0a 100644
--- a/gmp/tune/hppa2w.asm
+++ b/gmp/tune/hppa2w.asm
@@ -1,32 +1,21 @@
 dnl  HPPA 64-bit time stamp counter access routine.
 
 dnl  Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/ia64.asm b/gmp/tune/ia64.asm
index 0651111031..ef487db8f6 100644
--- a/gmp/tune/ia64.asm
+++ b/gmp/tune/ia64.asm
@@ -1,32 +1,21 @@
 dnl  IA-64 time stamp counter access routine.
 
 dnl  Copyright 2000, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/jacbase1.c b/gmp/tune/jacbase1.c
index a73df8b723..2a0b859c2c 100644
--- a/gmp/tune/jacbase1.c
+++ b/gmp/tune/jacbase1.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/jacbase2.c b/gmp/tune/jacbase2.c
index b99ebe9061..6bbe7e9e93 100644
--- a/gmp/tune/jacbase2.c
+++ b/gmp/tune/jacbase2.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/jacbase3.c b/gmp/tune/jacbase3.c
index 408b0fed6b..f8f89d49be 100644
--- a/gmp/tune/jacbase3.c
+++ b/gmp/tune/jacbase3.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/jacbase4.c b/gmp/tune/jacbase4.c
deleted file mode 100644
index 70d535240b..0000000000
--- a/gmp/tune/jacbase4.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/* mpn/generic/jacbase.c method 4.
-
-Copyright 2002, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef JACOBI_BASE_METHOD
-#define JACOBI_BASE_METHOD 4
-#define __gmpn_jacobi_base mpn_jacobi_base_4
-
-#include "mpn/generic/jacbase.c"
diff --git a/gmp/tune/many.pl b/gmp/tune/many.pl
index 524a67dd1e..11b5cf4521 100644..100755
--- a/gmp/tune/many.pl
+++ b/gmp/tune/many.pl
@@ -1,32 +1,21 @@
 #! /usr/bin/perl -w
 
-# Copyright 2000-2002 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 #
-#  This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
 #
-#  The GNU MP Library is free software; you can redistribute it and/or modify
-#  it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
 #
-#    * the GNU Lesser General Public License as published by the Free
-#      Software Foundation; either version 3 of the License, or (at your
-#      option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+# License for more details.
 #
-#  or
-#
-#    * the GNU General Public License as published by the Free Software
-#      Foundation; either version 2 of the License, or (at your option) any
-#      later version.
-#
-#  or both in parallel, as here.
-#
-#  The GNU MP Library is distributed in the hope that it will be useful, but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-#  for more details.
-#
-#  You should have received copies of the GNU General Public License and the
-#  GNU Lesser General Public License along with the GNU MP Library.  If not,
-#  see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 # Usage:  cd $builddir/tune
@@ -380,14 +369,23 @@ my @table =
      },
 
      {
-       'regexp'=> 'add_n_sub_n',
+       'regexp'=> 'addsub_n',
        'ret'   => 'mp_limb_t',
        'args'  => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
        'speed_flags'=> 'FLAG_R_OPTIONAL',
      },
 
      {
-       'regexp'=> 'com|copyi|copyd',
+       'regexp'=> 'bdivmod',
+       'ret'   => 'mp_limb_t',
+       'args'  => 'mp_ptr qp, mp_ptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize, unsigned long int d',
+       'carrys'=> [''],
+       'try'   => 'none',
+       'speed' => 'none',
+     },
+
+     {
+       'regexp'=> 'com_n|copyi|copyd',
        'ret'   => 'void',
        'args'  => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
        'speed' => 'SPEED_ROUTINE_MPN_COPY',
diff --git a/gmp/tune/mod_1_1-1.c b/gmp/tune/mod_1_1-1.c
deleted file mode 100644
index 7eb7fcdf79..0000000000
--- a/gmp/tune/mod_1_1-1.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn/generic/mod_1_1.c method 1.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef MOD_1_1P_METHOD
-#define MOD_1_1P_METHOD 1
-#undef mpn_mod_1_1p
-#undef mpn_mod_1_1p_cps
-#define mpn_mod_1_1p mpn_mod_1_1p_1
-#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
-
-#include "mpn/generic/mod_1_1.c"
diff --git a/gmp/tune/mod_1_1-2.c b/gmp/tune/mod_1_1-2.c
deleted file mode 100644
index 52ca57749b..0000000000
--- a/gmp/tune/mod_1_1-2.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn/generic/mod_1_1.c method 2.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef MOD_1_1P_METHOD
-#define MOD_1_1P_METHOD 2
-#undef mpn_mod_1_1p
-#undef mpn_mod_1_1p_cps
-#define mpn_mod_1_1p mpn_mod_1_1p_2
-#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
-
-#include "mpn/generic/mod_1_1.c"
diff --git a/gmp/tune/mod_1_div.c b/gmp/tune/mod_1_div.c
index a0663be055..1c01e8c692 100644
--- a/gmp/tune/mod_1_div.c
+++ b/gmp/tune/mod_1_div.c
@@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define OPERATION_mod_1
 
@@ -35,12 +24,8 @@ see https://www.gnu.org/licenses/.  */
 
 #undef MOD_1_NORM_THRESHOLD
 #undef MOD_1_UNNORM_THRESHOLD
-#undef MOD_1N_TO_MOD_1_1_THRESHOLD
-#undef MOD_1U_TO_MOD_1_1_THRESHOLD
 #define MOD_1_NORM_THRESHOLD    MP_SIZE_T_MAX
 #define MOD_1_UNNORM_THRESHOLD  MP_SIZE_T_MAX
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
 #define __gmpn_mod_1  mpn_mod_1_div
 
 #include "mpn/generic/mod_1.c"
diff --git a/gmp/tune/mod_1_inv.c b/gmp/tune/mod_1_inv.c
index 92c936ddcf..448d093f31 100644
--- a/gmp/tune/mod_1_inv.c
+++ b/gmp/tune/mod_1_inv.c
@@ -5,28 +5,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define OPERATION_mod_1
 
@@ -35,12 +24,8 @@ see https://www.gnu.org/licenses/.  */
 
 #undef MOD_1_NORM_THRESHOLD
 #undef MOD_1_UNNORM_THRESHOLD
-#undef MOD_1N_TO_MOD_1_1_THRESHOLD
-#undef MOD_1U_TO_MOD_1_1_THRESHOLD
 #define MOD_1_NORM_THRESHOLD    0
 #define MOD_1_UNNORM_THRESHOLD  0
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
 #define __gmpn_mod_1  mpn_mod_1_inv
 
 #include "mpn/generic/mod_1.c"
diff --git a/gmp/tune/modlinv.c b/gmp/tune/modlinv.c
index e3f2063e07..3ed0c8a03f 100644
--- a/gmp/tune/modlinv.c
+++ b/gmp/tune/modlinv.c
@@ -6,28 +6,17 @@ Copyright 2000, 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h>
 #include "gmp.h"
@@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/.  */
    dependent chain, whereas the "2*" in the standard version isn't.
    Depending on the CPU this should be the same or a touch slower.  */
 
-#if GMP_LIMB_BITS <= 32
+#if BITS_PER_MP_LIMB <= 32
 #define binvert_limb_mul1(inv,n)                                \
   do {                                                          \
     mp_limb_t  __n = (n);                                       \
@@ -55,7 +44,7 @@ see https://www.gnu.org/licenses/.  */
   } while (0)
 #endif
 
-#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
+#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
 #define binvert_limb_mul1(inv,n)                                \
   do {                                                          \
     mp_limb_t  __n = (n);                                       \
@@ -111,7 +100,7 @@ see https://www.gnu.org/licenses/.  */
                                                 \
     ASSERT ((__n & 1) == 1);                    \
                                                 \
-    __count = GMP_LIMB_BITS-1;               \
+    __count = BITS_PER_MP_LIMB-1;               \
     do                                          \
       {                                         \
         __inv >>= 1;                            \
@@ -142,11 +131,11 @@ see https://www.gnu.org/licenses/.  */
                                                                         \
     ASSERT ((__n & 1) == 1);                                            \
                                                                         \
-    __count = GMP_LIMB_BITS-1;                                       \
+    __count = BITS_PER_MP_LIMB-1;                                       \
     do                                                                  \
       {                                                                 \
         __lowbit = __rem & 1;                                           \
-        __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
+        __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1));      \
         __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
       }                                                                 \
     while (-- __count);                                                 \
diff --git a/gmp/tune/noop.c b/gmp/tune/noop.c
index 5c13c96ee3..7c7f1b5fe6 100644
--- a/gmp/tune/noop.c
+++ b/gmp/tune/noop.c
@@ -9,28 +9,17 @@ Copyright 1999, 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/pentium.asm b/gmp/tune/pentium.asm
index fb1e8332c8..369a8ea76f 100644
--- a/gmp/tune/pentium.asm
+++ b/gmp/tune/pentium.asm
@@ -1,32 +1,21 @@
 dnl  x86 pentium time stamp counter access routine.
 
 dnl  Copyright 1999, 2000, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')
diff --git a/gmp/tune/powerpc.asm b/gmp/tune/powerpc.asm
index 2f4ac27bea..19ab901386 100644
--- a/gmp/tune/powerpc.asm
+++ b/gmp/tune/powerpc.asm
@@ -1,32 +1,21 @@
 dnl  PowerPC mftb_function -- read time base registers.
 
 dnl  Copyright 2002 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundationn; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
@@ -43,7 +32,7 @@ L(again):
 	mftbu	r4
 	mftb	r5
 	mftbu	r6
-	cmpw	cr0, r4, r6
+	cmp	cr0, r4, r6
 	bne	L(again)
 
 	stw	r5, 0(r3)
diff --git a/gmp/tune/powerpc64.asm b/gmp/tune/powerpc64.asm
index 1ade99638a..eb705466da 100644
--- a/gmp/tune/powerpc64.asm
+++ b/gmp/tune/powerpc64.asm
@@ -1,32 +1,21 @@
 dnl  PowerPC mftb_function -- read time base registers, 64-bit integer.
 
-dnl  Copyright 2002-2004 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundationn; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/powm_mod.c b/gmp/tune/powm_mod.c
index 7c20f53e70..e65f512e85 100644
--- a/gmp/tune/powm_mod.c
+++ b/gmp/tune/powm_mod.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/powm_redc.c b/gmp/tune/powm_redc.c
index c34bb2e0a5..a9e4bb502a 100644
--- a/gmp/tune/powm_redc.c
+++ b/gmp/tune/powm_redc.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/pre_divrem_1.c b/gmp/tune/pre_divrem_1.c
index 388ca4150a..2b3fb79e22 100644
--- a/gmp/tune/pre_divrem_1.c
+++ b/gmp/tune/pre_divrem_1.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/tune/sb_div.c b/gmp/tune/sb_div.c
new file mode 100644
index 0000000000..4d52a24ce3
--- /dev/null
+++ b/gmp/tune/sb_div.c
@@ -0,0 +1,30 @@
+/* mpn/generic/sb_divrem_mn.c forced to use plain udiv_qrnnd. */
+
+/*
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef DIV_SB_PREINV_THRESHOLD
+#undef DIV_SB_PREINV_THRESHOLD
+#endif
+#define DIV_SB_PREINV_THRESHOLD  MP_SIZE_T_MAX
+#define __gmpn_sb_divrem_mn  mpn_sb_divrem_mn_div
+
+#include "mpn/generic/sb_divrem_mn.c"
diff --git a/gmp/tune/sb_inv.c b/gmp/tune/sb_inv.c
new file mode 100644
index 0000000000..f8d798b43b
--- /dev/null
+++ b/gmp/tune/sb_inv.c
@@ -0,0 +1,30 @@
+/* mpn/generic/sb_divrem_mn.c forced to use udiv_qrnnd_preinv. */
+
+/*
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef DIV_SB_PREINV_THRESHOLD
+#undef DIV_SB_PREINV_THRESHOLD
+#endif
+#define DIV_SB_PREINV_THRESHOLD  0
+#define __gmpn_sb_divrem_mn  mpn_sb_divrem_mn_inv
+
+#include "mpn/generic/sb_divrem_mn.c"
diff --git a/gmp/tune/set_strb.c b/gmp/tune/set_strb.c
index 842ec4cd44..c67b09ccaa 100644
--- a/gmp/tune/set_strb.c
+++ b/gmp/tune/set_strb.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __gmpn_set_str mpn_set_str_basecase
 #define __gmpn_bc_set_str mpn_bc_set_str_basecase
diff --git a/gmp/tune/set_strp.c b/gmp/tune/set_strp.c
index 5520f28696..701ab2bf2b 100644
--- a/gmp/tune/set_strp.c
+++ b/gmp/tune/set_strp.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define TUNE_PROGRAM_BUILD  1   /* for gmp-impl.h */
 
diff --git a/gmp/tune/set_strs.c b/gmp/tune/set_strs.c
index 75b6f39b4d..d8edc7dfde 100644
--- a/gmp/tune/set_strs.c
+++ b/gmp/tune/set_strs.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __gmpn_set_str mpn_set_str_subquad
 #define __gmpn_bc_set_str mpn_bc_set_str_subquad
diff --git a/gmp/tune/sparcv9.asm b/gmp/tune/sparcv9.asm
index f0981c70fe..b951ff3de2 100644
--- a/gmp/tune/sparcv9.asm
+++ b/gmp/tune/sparcv9.asm
@@ -1,32 +1,21 @@
 dnl  Sparc v9 32-bit time stamp counter access routine.
 
 dnl  Copyright 2000, 2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
diff --git a/gmp/tune/speed-ext.c b/gmp/tune/speed-ext.c
index e7fb8b9f60..04760a5eab 100644
--- a/gmp/tune/speed-ext.c
+++ b/gmp/tune/speed-ext.c
@@ -5,28 +5,17 @@ Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* The extension here is three versions of an mpn arithmetic mean.  These
@@ -68,9 +57,9 @@ see https://www.gnu.org/licenses/.  */
 
 
 #define SPEED_EXTRA_PROTOS                                              \
-  double speed_mean_calls (struct speed_params *s);			\
-  double speed_mean_open  (struct speed_params *s);			\
-  double speed_mean_open2 (struct speed_params *s);
+  double speed_mean_calls __GMP_PROTO ((struct speed_params *s));       \
+  double speed_mean_open  __GMP_PROTO ((struct speed_params *s));       \
+  double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
 
 #define SPEED_EXTRA_ROUTINES            \
   { "mean_calls",  speed_mean_calls  }, \
@@ -93,8 +82,8 @@ mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   ASSERT (size >= 1);
 
   c = mpn_add_n (wp, xp, yp, size);
-  ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
-  wp[size-1] |= (c << (GMP_LIMB_BITS-1));
+  ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
+  wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
   return ret;
 }
 
@@ -118,7 +107,7 @@ mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
   c = (wprev < x);
   ret = (wprev & 1);
 
-#define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
+#define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
 
   for (i = 1; i < size; i++)
     {
diff --git a/gmp/tune/speed.c b/gmp/tune/speed.c
index 12d53bcaa3..2bb7f7d933 100644
--- a/gmp/tune/speed.c
+++ b/gmp/tune/speed.c
@@ -1,32 +1,21 @@
 /* Speed measuring program.
 
-Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* Usage message is in the code below, run with no arguments to print it.
    See README for interesting applications.
@@ -100,10 +89,10 @@ SPEED_EXTRA_PROTOS2
   } while (0)
 
 
-#if GMP_LIMB_BITS == 32
+#if BITS_PER_MP_LIMB == 32
 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
 #endif
-#if GMP_LIMB_BITS == 64
+#if BITS_PER_MP_LIMB == 64
 #define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
 #endif
 
@@ -163,15 +152,8 @@ const struct routine_t {
   { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
   { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
 
-  { "mpn_add_err1_n",    speed_mpn_add_err1_n    },
-  { "mpn_add_err2_n",    speed_mpn_add_err2_n    },
-  { "mpn_add_err3_n",    speed_mpn_add_err3_n    },
-  { "mpn_sub_err1_n",    speed_mpn_sub_err1_n    },
-  { "mpn_sub_err2_n",    speed_mpn_sub_err2_n    },
-  { "mpn_sub_err3_n",    speed_mpn_sub_err3_n    },
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_addsub_n
+  { "mpn_addsub_n",      speed_mpn_addsub_n,     FLAG_R_OPTIONAL },
 #endif
 
   { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
@@ -208,12 +190,6 @@ const struct routine_t {
 #if HAVE_NATIVE_mpn_mul_4
   { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
 #endif
-#if HAVE_NATIVE_mpn_mul_5
-  { "mpn_mul_5",         speed_mpn_mul_5,     FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_mul_6
-  { "mpn_mul_6",         speed_mpn_mul_6,     FLAG_R_OPTIONAL },
-#endif
 
   { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
   { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
@@ -229,13 +205,6 @@ const struct routine_t {
   { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
   { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
 
-  { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R },
-  { "mpn_mod_1_1_1",     speed_mpn_mod_1_1_1,     FLAG_R },
-  { "mpn_mod_1_1_2",     speed_mpn_mod_1_1_2,     FLAG_R },
-  { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R },
-  { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R },
-  { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R },
-
   { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
   { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
   { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
@@ -247,19 +216,9 @@ const struct routine_t {
   { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
   { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
 
-  { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R  },
-  { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R  },
-  { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R  },
-  { "mpn_div_qr_1",      speed_mpn_div_qr_1,      FLAG_R },
-
-  { "mpn_div_qr_2n",     speed_mpn_div_qr_2n,       },
-  { "mpn_div_qr_2u",     speed_mpn_div_qr_2u,       },
-
   { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
   { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
 
-  { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R },
-  { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
   { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
 
 #if HAVE_NATIVE_mpn_modexact_1_odd
@@ -271,8 +230,17 @@ const struct routine_t {
   { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
 #endif
 
+  { "mpn_dc_tdiv_qr",       speed_mpn_dc_tdiv_qr       },
+  { "mpn_dc_divrem_n",      speed_mpn_dc_divrem_n      },
+  { "mpn_dc_divrem_sb",     speed_mpn_dc_divrem_sb     },
+  { "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div },
+  { "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv },
+
+  { "mpn_sb_divrem_m3",     speed_mpn_sb_divrem_m3     },
+  { "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div },
+  { "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv },
+
   { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
-  { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
   { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
 
   { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
@@ -283,7 +251,7 @@ const struct routine_t {
   { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
   { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
   { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
-  { "mpn_com",           speed_mpn_com              },
+  { "mpn_com_n",         speed_mpn_com_n            },
 
   { "mpn_popcount",      speed_mpn_popcount         },
   { "mpn_hamdist",       speed_mpn_hamdist          },
@@ -292,17 +260,16 @@ const struct routine_t {
 
   { "mpn_hgcd",          speed_mpn_hgcd             },
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
-  { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
-  { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
-
-  { "mpn_hgcd_reduce",   speed_mpn_hgcd_reduce      },
-  { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1    },
-  { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2    },
 
   { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
   { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
 
   { "mpn_gcd",           speed_mpn_gcd                    },
+#if 0
+  { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
+  { "mpn_gcd_accel",     speed_mpn_gcd_accel              },
+  { "find_a",            speed_find_a,        FLAG_NODATA },
+#endif
 
   { "mpn_gcdext",            speed_mpn_gcdext            },
   { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
@@ -317,7 +284,6 @@ const struct routine_t {
   { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
   { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
   { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
-  { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4    },
 
   { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
   { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
@@ -325,74 +291,27 @@ const struct routine_t {
 #if HAVE_NATIVE_mpn_sqr_diagonal
   { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
 #endif
-#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
-  { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
-#endif
 
   { "mpn_mul_n",         speed_mpn_mul_n            },
-  { "mpn_sqr",           speed_mpn_sqr              },
-
-  { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
-  { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
-  { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
-  { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
-  { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
-  { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
-  { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
-  { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
-  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
-  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
-  { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
-  { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
-  { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
-  { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
-  { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
-  { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
-#if WANT_OLD_FFT_FULL
+  { "mpn_sqr",           speed_mpn_sqr_n            },
+
+  { "mpn_kara_mul_n",    speed_mpn_kara_mul_n       },
+  { "mpn_kara_sqr_n",    speed_mpn_kara_sqr_n       },
+  { "mpn_toom3_mul_n",   speed_mpn_toom3_mul_n      },
+  { "mpn_toom3_sqr_n",   speed_mpn_toom3_sqr_n      },
   { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
   { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
-#endif
+
   { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
   { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
 
-  { "mpn_mullo_n",        speed_mpn_mullo_n         },
-  { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
-
-  { "mpn_mulmid_basecase",  speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
-  { "mpn_toom42_mulmid",    speed_mpn_toom42_mulmid },
-  { "mpn_mulmid_n",         speed_mpn_mulmid_n },
-  { "mpn_mulmid",           speed_mpn_mulmid, FLAG_R_OPTIONAL },
-
-  { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
-  { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
-  { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
-  { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
-
-  { "mpn_invert",              speed_mpn_invert              },
-  { "mpn_invertappr",          speed_mpn_invertappr          },
-  { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
-  { "mpn_binvert",             speed_mpn_binvert             },
-  { "mpn_sec_invert",          speed_mpn_sec_invert          },
-
-  { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
-  { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
-  { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
-  { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
-  { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
-  { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
-
-  { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
-  { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
-  { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
-  { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
-
-  { "mpn_broot",               speed_mpn_broot,    FLAG_R },
-  { "mpn_broot_invm1",         speed_mpn_broot_invm1, FLAG_R },
-  { "mpn_brootinv",            speed_mpn_brootinv, FLAG_R },
-
-  { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
-  { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
-  { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
+  { "mpn_mullow_n",      speed_mpn_mullow_n         },
+  { "mpn_mullow_basecase", speed_mpn_mullow_basecase},
+
+  { "mpn_get_str",       speed_mpn_get_str,  FLAG_R_OPTIONAL },
+
+  { "mpn_set_str",         speed_mpn_set_str,     FLAG_R_OPTIONAL },
+  { "mpn_set_str_basecase",speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
 
   { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
   { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
@@ -405,18 +324,14 @@ const struct routine_t {
 
   { "mpz_add",           speed_mpz_add              },
   { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
-  { "mpz_bin_ui",        speed_mpz_bin_ui,   FLAG_NODATA | FLAG_R_OPTIONAL },
   { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
   { "mpz_powm",          speed_mpz_powm             },
   { "mpz_powm_mod",      speed_mpz_powm_mod         },
   { "mpz_powm_redc",     speed_mpz_powm_redc        },
-  { "mpz_powm_sec",      speed_mpz_powm_sec        },
   { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
 
   { "mpz_mod",           speed_mpz_mod              },
   { "mpn_redc_1",        speed_mpn_redc_1           },
-  { "mpn_redc_2",        speed_mpn_redc_2           },
-  { "mpn_redc_n",        speed_mpn_redc_n           },
 
   { "MPN_COPY",          speed_MPN_COPY             },
   { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
@@ -428,71 +343,19 @@ const struct routine_t {
 #if HAVE_NATIVE_mpn_copyd
   { "mpn_copyd",         speed_mpn_copyd            },
 #endif
-  { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
-#if HAVE_NATIVE_mpn_addlsh1_n == 1
-  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_sublsh1_n == 1
-  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip1
-  { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1    },
-#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip2
-  { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2    },
-#endif
-#if HAVE_NATIVE_mpn_sublsh1_n_ip1
-  { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1    },
-#endif
-#if HAVE_NATIVE_mpn_rsblsh1_n == 1
-  { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n == 1
-  { "mpn_addlsh2_n",     speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n == 1
-  { "mpn_sublsh2_n",     speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip1
-  { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1    },
+#if HAVE_NATIVE_mpn_addlsh1_n
+  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n        },
 #endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip2
-  { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2    },
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-  { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1    },
-#endif
-#if HAVE_NATIVE_mpn_rsblsh2_n == 1
-  { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n
-  { "mpn_addlsh_n",     speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n
-  { "mpn_sublsh_n",     speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip1
-  { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1    },
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip2
-  { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2    },
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n_ip1
-  { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1    },
-#endif
-#if HAVE_NATIVE_mpn_rsblsh_n
-  { "mpn_rsblsh_n",     speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_sublsh1_n
+  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n        },
 #endif
 #if HAVE_NATIVE_mpn_rsh1add_n
-  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
+  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n        },
 #endif
 #if HAVE_NATIVE_mpn_rsh1sub_n
-  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
+  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n        },
 #endif
 
-  { "mpn_cnd_add_n",     speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
-  { "mpn_cnd_sub_n",     speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
-
   { "MPN_ZERO",          speed_MPN_ZERO             },
 
   { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
@@ -522,6 +385,8 @@ const struct routine_t {
   { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
 
   { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
+  { "udiv_qrnnd_preinv1",     speed_udiv_qrnnd_preinv1,     FLAG_R_OPTIONAL },
+  { "udiv_qrnnd_preinv2",     speed_udiv_qrnnd_preinv2,     FLAG_R_OPTIONAL },
   { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
 #if HAVE_NATIVE_mpn_udiv_qrnnd
   { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
@@ -884,7 +749,7 @@ run_gnuplot (int argc, char *argv[])
   fprintf (fp, "set key left\n");
 
   /* designed to make it possible to see crossovers easily */
-  fprintf (fp, "set style data lines\n");
+  fprintf (fp, "set data style lines\n");
 
   fprintf (fp, "plot ");
   for (i = 0; i < num_choices; i++)
@@ -914,7 +779,7 @@ run_gnuplot (int argc, char *argv[])
 /* Return a limb with n many one bits (starting from the least significant) */
 
 #define LIMB_ONES(n) \
-  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
+  ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX      \
     : (n) == 0 ? CNST_LIMB(0)                   \
     : (CNST_LIMB(1) << (n)) - 1)
 
@@ -941,7 +806,7 @@ r_string (const char *s)
       {
         if (siz > 1 || siz < -1)
           printf ("Warning, r parameter %s truncated to %d bits\n",
-                  s_orig, GMP_LIMB_BITS);
+                  s_orig, BITS_PER_MP_LIMB);
         return l;
       }
   }
@@ -954,10 +819,10 @@ r_string (const char *s)
   if (strcmp (s, "bits") == 0)
     {
       mp_limb_t  l;
-      if (n > GMP_LIMB_BITS)
+      if (n > BITS_PER_MP_LIMB)
         {
           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
-                   n, GMP_LIMB_BITS);
+                   n, BITS_PER_MP_LIMB);
           exit (1);
         }
       mpn_random (&l, 1);
@@ -965,10 +830,10 @@ r_string (const char *s)
     }
   else  if (strcmp (s, "ones") == 0)
     {
-      if (n > GMP_LIMB_BITS)
+      if (n > BITS_PER_MP_LIMB)
         {
           fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
-                   n, GMP_LIMB_BITS);
+                   n, BITS_PER_MP_LIMB);
           exit (1);
         }
       return LIMB_ONES (n);
diff --git a/gmp/tune/speed.h b/gmp/tune/speed.h
index d9474adb35..ca021409ef 100644
--- a/gmp/tune/speed.h
+++ b/gmp/tune/speed.h
@@ -1,32 +1,22 @@
 /* Header for speed and threshold things.
 
-Copyright 1999-2003, 2005, 2006, 2008-2013 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #ifndef __SPEED_H__
 #define __SPEED_H__
@@ -41,9 +31,9 @@ see https://www.gnu.org/licenses/.  */
   } while (0)
 
 /* A mask of the least significant n bits.  Note 1<<32 doesn't give zero on
-   x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
+   x86 family CPUs, hence the separate case for BITS_PER_MP_LIMB. */
 #define MP_LIMB_T_LOWBITMASK(n)	\
-  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
+  ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
 
 
 /* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
@@ -66,7 +56,7 @@ see https://www.gnu.org/licenses/.  */
 */
 #define CACHE_LINE_SIZE   64 /* bytes */
 
-#define SPEED_TMP_ALLOC_ADJUST_MASK  (CACHE_LINE_SIZE/GMP_LIMB_BYTES - 1)
+#define SPEED_TMP_ALLOC_ADJUST_MASK  (CACHE_LINE_SIZE/BYTES_PER_MP_LIMB - 1)
 
 /* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
    alignment.  */
@@ -75,7 +65,7 @@ see https://www.gnu.org/licenses/.  */
     mp_ptr     __ptr;							\
     mp_size_t  __ptr_align, __ptr_add;					\
 									\
-    ASSERT ((CACHE_LINE_SIZE % GMP_LIMB_BYTES) == 0);		\
+    ASSERT ((CACHE_LINE_SIZE % BYTES_PER_MP_LIMB) == 0);		\
     __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK);	\
     __ptr_align = (__ptr - (mp_ptr) NULL);				\
     __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK;	\
@@ -97,13 +87,13 @@ extern double  speed_unittime;
 extern double  speed_cycletime;
 extern int     speed_precision;
 extern char    speed_time_string[];
-void speed_time_init (void);
-void speed_cycletime_fail (const char *str);
-void speed_cycletime_init (void);
-void speed_cycletime_need_cycles (void);
-void speed_cycletime_need_seconds (void);
-void speed_starttime (void);
-double speed_endtime (void);
+void speed_time_init __GMP_PROTO ((void));
+void speed_cycletime_fail __GMP_PROTO ((const char *str));
+void speed_cycletime_init __GMP_PROTO ((void));
+void speed_cycletime_need_cycles __GMP_PROTO ((void));
+void speed_cycletime_need_seconds __GMP_PROTO ((void));
+void speed_starttime __GMP_PROTO ((void));
+double speed_endtime __GMP_PROTO ((void));
 
 
 struct speed_params {
@@ -127,304 +117,203 @@ struct speed_params {
   struct {
     mp_ptr    ptr;
     mp_size_t size;
-  } src[5], dst[4];
+  } src[2], dst[3];
 };
 
-typedef double (*speed_function_t) (struct speed_params *);
+typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
 
-double speed_measure (speed_function_t fun, struct speed_params *);
+double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s));
 
 /* Prototypes for speed measuring routines */
 
-double speed_back_to_back (struct speed_params *);
-double speed_count_leading_zeros (struct speed_params *);
-double speed_count_trailing_zeros (struct speed_params *);
-double speed_find_a (struct speed_params *);
-double speed_gmp_allocate_free (struct speed_params *);
-double speed_gmp_allocate_reallocate_free (struct speed_params *);
-double speed_invert_limb (struct speed_params *);
-double speed_malloc_free (struct speed_params *);
-double speed_malloc_realloc_free (struct speed_params *);
-double speed_memcpy (struct speed_params *);
-double speed_binvert_limb (struct speed_params *);
-double speed_binvert_limb_mul1 (struct speed_params *);
-double speed_binvert_limb_loop (struct speed_params *);
-double speed_binvert_limb_cond (struct speed_params *);
-double speed_binvert_limb_arith (struct speed_params *);
-
-double speed_mpf_init_clear (struct speed_params *);
-
-double speed_mpn_add_n (struct speed_params *);
-double speed_mpn_add_err1_n (struct speed_params *);
-double speed_mpn_add_err2_n (struct speed_params *);
-double speed_mpn_add_err3_n (struct speed_params *);
-double speed_mpn_addlsh_n (struct speed_params *);
-double speed_mpn_addlsh1_n (struct speed_params *);
-double speed_mpn_addlsh2_n (struct speed_params *);
-double speed_mpn_addlsh_n_ip1 (struct speed_params *);
-double speed_mpn_addlsh1_n_ip1 (struct speed_params *);
-double speed_mpn_addlsh2_n_ip1 (struct speed_params *);
-double speed_mpn_addlsh_n_ip2 (struct speed_params *);
-double speed_mpn_addlsh1_n_ip2 (struct speed_params *);
-double speed_mpn_addlsh2_n_ip2 (struct speed_params *);
-double speed_mpn_add_n_sub_n (struct speed_params *);
-double speed_mpn_and_n (struct speed_params *);
-double speed_mpn_andn_n (struct speed_params *);
-double speed_mpn_addmul_1 (struct speed_params *);
-double speed_mpn_addmul_2 (struct speed_params *);
-double speed_mpn_addmul_3 (struct speed_params *);
-double speed_mpn_addmul_4 (struct speed_params *);
-double speed_mpn_addmul_5 (struct speed_params *);
-double speed_mpn_addmul_6 (struct speed_params *);
-double speed_mpn_addmul_7 (struct speed_params *);
-double speed_mpn_addmul_8 (struct speed_params *);
-double speed_mpn_cnd_add_n (struct speed_params *);
-double speed_mpn_cnd_sub_n (struct speed_params *);
-double speed_mpn_com (struct speed_params *);
-double speed_mpn_copyd (struct speed_params *);
-double speed_mpn_copyi (struct speed_params *);
-double speed_MPN_COPY (struct speed_params *);
-double speed_MPN_COPY_DECR (struct speed_params *);
-double speed_MPN_COPY_INCR (struct speed_params *);
-double speed_mpn_sec_tabselect (struct speed_params *);
-double speed_mpn_divexact_1 (struct speed_params *);
-double speed_mpn_divexact_by3 (struct speed_params *);
-double speed_mpn_bdiv_q_1 (struct speed_params *);
-double speed_mpn_pi1_bdiv_q_1 (struct speed_params *);
-double speed_mpn_bdiv_dbm1c (struct speed_params *);
-double speed_mpn_divrem_1 (struct speed_params *);
-double speed_mpn_divrem_1f (struct speed_params *);
-double speed_mpn_divrem_1c (struct speed_params *);
-double speed_mpn_divrem_1cf (struct speed_params *);
-double speed_mpn_divrem_1_div (struct speed_params *);
-double speed_mpn_divrem_1f_div (struct speed_params *);
-double speed_mpn_divrem_1_inv (struct speed_params *);
-double speed_mpn_divrem_1f_inv (struct speed_params *);
-double speed_mpn_divrem_2 (struct speed_params *);
-double speed_mpn_divrem_2_div (struct speed_params *);
-double speed_mpn_divrem_2_inv (struct speed_params *);
-double speed_mpn_div_qr_1n_pi1 (struct speed_params *);
-double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *);
-double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *);
-double speed_mpn_div_qr_1 (struct speed_params *);
-double speed_mpn_div_qr_2n (struct speed_params *);
-double speed_mpn_div_qr_2u (struct speed_params *);
-double speed_mpn_fib2_ui (struct speed_params *);
-double speed_mpn_matrix22_mul (struct speed_params *);
-double speed_mpn_hgcd (struct speed_params *);
-double speed_mpn_hgcd_lehmer (struct speed_params *);
-double speed_mpn_hgcd_appr (struct speed_params *);
-double speed_mpn_hgcd_appr_lehmer (struct speed_params *);
-double speed_mpn_hgcd_reduce (struct speed_params *);
-double speed_mpn_hgcd_reduce_1 (struct speed_params *);
-double speed_mpn_hgcd_reduce_2 (struct speed_params *);
-double speed_mpn_gcd (struct speed_params *);
-double speed_mpn_gcd_1 (struct speed_params *);
-double speed_mpn_gcd_1N (struct speed_params *);
-double speed_mpn_gcdext (struct speed_params *);
-double speed_mpn_gcdext_double (struct speed_params *);
-double speed_mpn_gcdext_one_double (struct speed_params *);
-double speed_mpn_gcdext_one_single (struct speed_params *);
-double speed_mpn_gcdext_single (struct speed_params *);
-double speed_mpn_get_str (struct speed_params *);
-double speed_mpn_hamdist (struct speed_params *);
-double speed_mpn_ior_n (struct speed_params *);
-double speed_mpn_iorn_n (struct speed_params *);
-double speed_mpn_jacobi_base (struct speed_params *);
-double speed_mpn_jacobi_base_1 (struct speed_params *);
-double speed_mpn_jacobi_base_2 (struct speed_params *);
-double speed_mpn_jacobi_base_3 (struct speed_params *);
-double speed_mpn_jacobi_base_4 (struct speed_params *);
-double speed_mpn_lshift (struct speed_params *);
-double speed_mpn_lshiftc (struct speed_params *);
-double speed_mpn_mod_1 (struct speed_params *);
-double speed_mpn_mod_1c (struct speed_params *);
-double speed_mpn_mod_1_div (struct speed_params *);
-double speed_mpn_mod_1_inv (struct speed_params *);
-double speed_mpn_mod_1_1 (struct speed_params *);
-double speed_mpn_mod_1_1_1 (struct speed_params *);
-double speed_mpn_mod_1_1_2 (struct speed_params *);
-double speed_mpn_mod_1_2 (struct speed_params *);
-double speed_mpn_mod_1_3 (struct speed_params *);
-double speed_mpn_mod_1_4 (struct speed_params *);
-double speed_mpn_mod_34lsub1 (struct speed_params *);
-double speed_mpn_modexact_1_odd (struct speed_params *);
-double speed_mpn_modexact_1c_odd (struct speed_params *);
-double speed_mpn_mul_1 (struct speed_params *);
-double speed_mpn_mul_1_inplace (struct speed_params *);
-double speed_mpn_mul_2 (struct speed_params *);
-double speed_mpn_mul_3 (struct speed_params *);
-double speed_mpn_mul_4 (struct speed_params *);
-double speed_mpn_mul_5 (struct speed_params *);
-double speed_mpn_mul_6 (struct speed_params *);
-double speed_mpn_mul (struct speed_params *);
-double speed_mpn_mul_basecase (struct speed_params *);
-double speed_mpn_mulmid (struct speed_params *);
-double speed_mpn_mulmid_basecase (struct speed_params *);
-double speed_mpn_mul_fft (struct speed_params *);
-double speed_mpn_mul_fft_sqr (struct speed_params *);
-double speed_mpn_fft_mul (struct speed_params *);
-double speed_mpn_fft_sqr (struct speed_params *);
-#if WANT_OLD_FFT_FULL
-double speed_mpn_mul_fft_full (struct speed_params *);
-double speed_mpn_mul_fft_full_sqr (struct speed_params *);
-#endif
-double speed_mpn_nussbaumer_mul (struct speed_params *);
-double speed_mpn_nussbaumer_mul_sqr (struct speed_params *);
-double speed_mpn_mul_n (struct speed_params *);
-double speed_mpn_mul_n_sqr (struct speed_params *);
-double speed_mpn_mulmid_n (struct speed_params *);
-double speed_mpn_mullo_n (struct speed_params *);
-double speed_mpn_mullo_basecase (struct speed_params *);
-double speed_mpn_nand_n (struct speed_params *);
-double speed_mpn_nior_n (struct speed_params *);
-double speed_mpn_popcount (struct speed_params *);
-double speed_mpn_preinv_divrem_1 (struct speed_params *);
-double speed_mpn_preinv_divrem_1f (struct speed_params *);
-double speed_mpn_preinv_mod_1 (struct speed_params *);
-double speed_mpn_sbpi1_div_qr (struct speed_params *);
-double speed_mpn_dcpi1_div_qr (struct speed_params *);
-double speed_mpn_sbpi1_divappr_q (struct speed_params *);
-double speed_mpn_dcpi1_divappr_q (struct speed_params *);
-double speed_mpn_mu_div_qr (struct speed_params *);
-double speed_mpn_mu_divappr_q (struct speed_params *);
-double speed_mpn_mupi_div_qr (struct speed_params *);
-double speed_mpn_mu_div_q (struct speed_params *);
-double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
-double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
-double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
-double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
-double speed_mpn_mu_bdiv_q (struct speed_params *);
-double speed_mpn_mu_bdiv_qr (struct speed_params *);
-double speed_mpn_broot (struct speed_params *);
-double speed_mpn_broot_invm1 (struct speed_params *);
-double speed_mpn_brootinv (struct speed_params *);
-double speed_mpn_invert (struct speed_params *);
-double speed_mpn_invertappr (struct speed_params *);
-double speed_mpn_ni_invertappr (struct speed_params *);
-double speed_mpn_sec_invert (struct speed_params *s);
-double speed_mpn_binvert (struct speed_params *);
-double speed_mpn_redc_1 (struct speed_params *);
-double speed_mpn_redc_2 (struct speed_params *);
-double speed_mpn_redc_n (struct speed_params *);
-double speed_mpn_rsblsh_n (struct speed_params *);
-double speed_mpn_rsblsh1_n (struct speed_params *);
-double speed_mpn_rsblsh2_n (struct speed_params *);
-double speed_mpn_rsh1add_n (struct speed_params *);
-double speed_mpn_rsh1sub_n (struct speed_params *);
-double speed_mpn_rshift (struct speed_params *);
-double speed_mpn_sb_divrem_m3 (struct speed_params *);
-double speed_mpn_sb_divrem_m3_div (struct speed_params *);
-double speed_mpn_sb_divrem_m3_inv (struct speed_params *);
-double speed_mpn_set_str (struct speed_params *);
-double speed_mpn_bc_set_str (struct speed_params *);
-double speed_mpn_dc_set_str (struct speed_params *);
-double speed_mpn_set_str_pre (struct speed_params *);
-double speed_mpn_sqr_basecase (struct speed_params *);
-double speed_mpn_sqr_diag_addlsh1 (struct speed_params *);
-double speed_mpn_sqr_diagonal (struct speed_params *);
-double speed_mpn_sqr (struct speed_params *);
-double speed_mpn_sqrtrem (struct speed_params *);
-double speed_mpn_rootrem (struct speed_params *);
-double speed_mpn_sub_n (struct speed_params *);
-double speed_mpn_sub_err1_n (struct speed_params *);
-double speed_mpn_sub_err2_n (struct speed_params *);
-double speed_mpn_sub_err3_n (struct speed_params *);
-double speed_mpn_sublsh_n (struct speed_params *);
-double speed_mpn_sublsh1_n (struct speed_params *);
-double speed_mpn_sublsh2_n (struct speed_params *);
-double speed_mpn_sublsh_n_ip1 (struct speed_params *);
-double speed_mpn_sublsh1_n_ip1 (struct speed_params *);
-double speed_mpn_sublsh2_n_ip1 (struct speed_params *);
-double speed_mpn_submul_1 (struct speed_params *);
-double speed_mpn_toom2_sqr (struct speed_params *);
-double speed_mpn_toom3_sqr (struct speed_params *);
-double speed_mpn_toom4_sqr (struct speed_params *);
-double speed_mpn_toom6_sqr (struct speed_params *);
-double speed_mpn_toom8_sqr (struct speed_params *);
-double speed_mpn_toom22_mul (struct speed_params *);
-double speed_mpn_toom33_mul (struct speed_params *);
-double speed_mpn_toom44_mul (struct speed_params *);
-double speed_mpn_toom6h_mul (struct speed_params *);
-double speed_mpn_toom8h_mul (struct speed_params *);
-double speed_mpn_toom32_mul (struct speed_params *);
-double speed_mpn_toom42_mul (struct speed_params *);
-double speed_mpn_toom43_mul (struct speed_params *);
-double speed_mpn_toom63_mul (struct speed_params *);
-double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
-double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
-double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
-double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
-double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
-double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
-double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
-double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
-double speed_mpn_toom42_mulmid (struct speed_params *);
-double speed_mpn_mulmod_bnm1 (struct speed_params *);
-double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
-double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
-double speed_mpn_sqrmod_bnm1 (struct speed_params *);
-double speed_mpn_udiv_qrnnd (struct speed_params *);
-double speed_mpn_udiv_qrnnd_r (struct speed_params *);
-double speed_mpn_umul_ppmm (struct speed_params *);
-double speed_mpn_umul_ppmm_r (struct speed_params *);
-double speed_mpn_xnor_n (struct speed_params *);
-double speed_mpn_xor_n (struct speed_params *);
-double speed_MPN_ZERO (struct speed_params *);
-
-double speed_mpq_init_clear (struct speed_params *);
-
-double speed_mpz_add (struct speed_params *);
-double speed_mpz_bin_uiui (struct speed_params *);
-double speed_mpz_bin_ui (struct speed_params *);
-double speed_mpz_fac_ui (struct speed_params *);
-double speed_mpz_fib_ui (struct speed_params *);
-double speed_mpz_fib2_ui (struct speed_params *);
-double speed_mpz_init_clear (struct speed_params *);
-double speed_mpz_init_realloc_clear (struct speed_params *);
-double speed_mpz_jacobi (struct speed_params *);
-double speed_mpz_lucnum_ui (struct speed_params *);
-double speed_mpz_lucnum2_ui (struct speed_params *);
-double speed_mpz_mod (struct speed_params *);
-double speed_mpz_powm (struct speed_params *);
-double speed_mpz_powm_mod (struct speed_params *);
-double speed_mpz_powm_redc (struct speed_params *);
-double speed_mpz_powm_sec (struct speed_params *);
-double speed_mpz_powm_ui (struct speed_params *);
-double speed_mpz_urandomb (struct speed_params *);
-
-double speed_gmp_randseed (struct speed_params *);
-double speed_gmp_randseed_ui (struct speed_params *);
-
-double speed_noop (struct speed_params *);
-double speed_noop_wxs (struct speed_params *);
-double speed_noop_wxys (struct speed_params *);
-
-double speed_operator_div (struct speed_params *);
-double speed_operator_mod (struct speed_params *);
-
-double speed_udiv_qrnnd (struct speed_params *);
-double speed_udiv_qrnnd_preinv1 (struct speed_params *);
-double speed_udiv_qrnnd_preinv2 (struct speed_params *);
-double speed_udiv_qrnnd_preinv3 (struct speed_params *);
-double speed_udiv_qrnnd_c (struct speed_params *);
-double speed_umul_ppmm (struct speed_params *);
+double speed_back_to_back __GMP_PROTO ((struct speed_params *s));
+double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s));
+double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s));
+double speed_find_a __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s));
+double speed_invert_limb __GMP_PROTO ((struct speed_params *s));
+double speed_malloc_free __GMP_PROTO ((struct speed_params *s));
+double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s));
+double speed_memcpy __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addsub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_com_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_sb __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_sb_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_sb_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_tdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_binary __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_accel __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_kara_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_kara_sqr_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mullow_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mullow_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom3_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom3_sqr_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpz_add __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_mod __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s));
+
+double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s));
+
+double speed_noop __GMP_PROTO ((struct speed_params *s));
+double speed_noop_wxs __GMP_PROTO ((struct speed_params *s));
+double speed_noop_wxys __GMP_PROTO ((struct speed_params *s));
+
+double speed_operator_div __GMP_PROTO ((struct speed_params *s));
+double speed_operator_mod __GMP_PROTO ((struct speed_params *s));
+
+double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s));
+double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s));
 
 /* Prototypes for other routines */
 
 /* low 32-bits in p[0], high 32-bits in p[1] */
-void speed_cyclecounter (unsigned p[2]);
+void speed_cyclecounter __GMP_PROTO ((unsigned p[2]));
 
-void mftb_function (unsigned p[2]);
+void mftb_function __GMP_PROTO ((unsigned p[2]));
 
 /* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy
    output or a clobber for the cpuid, hence an explicit save and restore.  A
    clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use
    the dummy output style in non-PIC, so there's an error if somehow -fPIC
-   is used without a -DPIC to tell us about it.  */
+   is used without a -DPIC to tell us about it.	 */
 #if defined(__GNUC__) && ! defined (NO_ASM)	\
   && (defined (__i386__) || defined (__i486__))
-#if defined (PIC) || defined (__APPLE_CC__)
+#ifdef PIC
 #define speed_cyclecounter(p)						\
   do {									\
     int	 __speed_cyclecounter__save_ebx;				\
@@ -453,80 +342,71 @@ void mftb_function (unsigned p[2]);
 #endif
 #endif
 
-double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]);
-int gettimeofday_microseconds_p (void);
-int getrusage_microseconds_p (void);
-int cycles_works_p (void);
-long clk_tck (void);
-double freq_measure (const char *, double (*)(void));
-
-int double_cmp_ptr (const double *, const double *);
-void pentium_wbinvd (void);
-typedef int (*qsort_function_t) (const void *, const void *);
-
-void noop (void);
-void noop_1 (mp_limb_t);
-void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
-void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
-void mpn_cache_fill (mp_srcptr, mp_size_t);
-void mpn_cache_fill_dummy (mp_limb_t);
-void speed_cache_fill (struct speed_params *);
-void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t);
-void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t);
+double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2]));
+int gettimeofday_microseconds_p __GMP_PROTO ((void));
+int getrusage_microseconds_p __GMP_PROTO ((void));
+int cycles_works_p __GMP_PROTO ((void));
+long clk_tck __GMP_PROTO ((void));
+double freq_measure __GMP_PROTO ((const char *, double (*)(void)));
+
+int double_cmp_ptr __GMP_PROTO ((const double *, const double *));
+void pentium_wbinvd __GMP_PROTO ((void));
+typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *));
+
+void noop __GMP_PROTO ((void));
+void noop_1 __GMP_PROTO ((mp_limb_t));
+void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t));
+void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t));
+void speed_cache_fill __GMP_PROTO ((struct speed_params *));
+void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
+void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
 
 extern int  speed_option_addrs;
 extern int  speed_option_verbose;
-extern int  speed_option_cycles_broken;
-void speed_option_set (const char *);
-
-mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-
-mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
-mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+void speed_option_set __GMP_PROTO((const char *));
 
-int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int);
-int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int);
-int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
-int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
+mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
 
-mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
+int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
 
-mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
 
-void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
-void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+mp_size_t mpn_gcd_binary
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcd_accel
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_one_double
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_one_single
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_single
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_double
+  __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
 
-mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
-mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
+mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
 
-mp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
-mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
+mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
 
-mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
-mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
+void mpn_toom3_mul_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+void mpn_toom3_sqr_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+void mpn_toom3_mul_n_mpn __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+void mpn_toom3_sqr_n_mpn __GMP_PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 
-mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
-mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
+void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
 
-mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
-mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
-
-mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int);
-void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
-
-void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
-void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
-
-int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
+int speed_routine_count_zeros_setup
+  __GMP_PROTO ((struct speed_params *, mp_ptr, int, int));
 
 
 /* "get" is called repeatedly until it ticks over, just in case on a fast
@@ -623,7 +503,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 #define SPEED_RESTRICT_COND(cond)   if (!(cond)) return -1.0;
 
 /* For mpn_copy or similar. */
-#define SPEED_ROUTINE_MPN_COPY_CALL(call)				\
+#define SPEED_ROUTINE_MPN_COPY(function)				\
   {									\
     mp_ptr    wp;							\
     unsigned  i;							\
@@ -642,47 +522,13 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     speed_starttime ();							\
     i = s->reps;							\
     do									\
-      call;								\
+      function (wp, s->xp, s->size);					\
     while (--i != 0);							\
     t = speed_endtime ();						\
 									\
     TMP_FREE;								\
     return t;								\
   }
-#define SPEED_ROUTINE_MPN_COPY(function)				\
-  SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
-
-#define SPEED_ROUTINE_MPN_TABSELECT(function)				\
-  {									\
-    mp_ptr    xp, wp;							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 0);					\
-									\
-    if (s->r == 0)							\
-      s->r = s->size;	/* default to a quadratic shape */		\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (xp, s->size * s->r, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
-									\
-    speed_operand_src (s, xp, s->size * s->r);				\
-    speed_operand_dst (s, wp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (wp, xp, s->size, s->r, (s->r) / 2);			\
-    while (--i != 0);							\
-    t = speed_endtime () / s->r;					\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
 
 #define SPEED_ROUTINE_MPN_COPYC(function)				\
   {									\
@@ -712,7 +558,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
   }
 
 /* s->size is still in limbs, and it's limbs which are copied, but
-   "function" takes a size in bytes not limbs.  */
+   "function" takes a size in bytes not limbs.	*/
 #define SPEED_ROUTINE_MPN_COPY_BYTES(function)				\
   {									\
     mp_ptr    wp;							\
@@ -732,7 +578,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     speed_starttime ();							\
     i = s->reps;							\
     do									\
-      function (wp, s->xp, s->size * GMP_LIMB_BYTES);		\
+      function (wp, s->xp, s->size * BYTES_PER_MP_LIMB);		\
     while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -788,72 +634,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-
-/* For mpn_aors_errK_n, where 1 <= K <= 3. */
-#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K)			\
-  {									\
-    mp_ptr     wp;							\
-    mp_ptr     xp, yp;							\
-    mp_ptr     zp[K];							\
-    mp_limb_t  ep[2*K];							\
-    unsigned   i;							\
-    double     t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
-									\
-    /* (don't have a mechanism to specify zp alignments) */		\
-    for (i = 0; i < K; i++)						\
-      SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0);			\
-									\
-    xp = s->xp;								\
-    yp = s->yp;								\
-									\
-    if (s->r == 0)	;						\
-    else if (s->r == 1) { xp = wp;	    }				\
-    else if (s->r == 2) {	   yp = wp; }				\
-    else if (s->r == 3) { xp = wp; yp = wp; }				\
-    else if (s->r == 4) {     yp = xp;	    }				\
-    else		{						\
-      TMP_FREE;								\
-      return -1.0;							\
-    }									\
-									\
-    /* initialize wp if operand overlap */				\
-    if (xp == wp || yp == wp)						\
-      MPN_COPY (wp, s->xp, s->size);					\
-									\
-    speed_operand_src (s, xp, s->size);					\
-    speed_operand_src (s, yp, s->size);					\
-    for (i = 0; i < K; i++)						\
-      speed_operand_src (s, zp[i], s->size);				\
-    speed_operand_dst (s, wp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      call;								\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function)			\
-  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
-
-#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function)			\
-  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
-
-#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function)			\
-  SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
-
-
 /* For mpn_add_n, mpn_sub_n, or similar. */
 #define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call)				\
   {									\
@@ -954,26 +734,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 #define SPEED_ROUTINE_MPN_DIVEXACT_1(function)				\
   SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
 
-#define SPEED_ROUTINE_MPN_BDIV_Q_1(function)				\
-    SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
-
-#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call)			\
-  {									\
-    unsigned   shift;							\
-    mp_limb_t  dinv;							\
-									\
-    SPEED_RESTRICT_COND (s->size > 0);					\
-    SPEED_RESTRICT_COND (s->r != 0);					\
-									\
-    count_trailing_zeros (shift, s->r);					\
-    binvert_limb (dinv, s->r >> shift);					\
-									\
-    SPEED_ROUTINE_MPN_UNARY_1_CALL (call);				\
-  }
-#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function)			\
-  SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL					\
-  ((*function) (wp, s->xp, s->size, s->r, dinv, shift))
-
 #define SPEED_ROUTINE_MPN_BDIV_DBM1C(function)				\
   SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
 
@@ -1069,30 +829,30 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 /* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
 #define SPEED_ROUTINE_MPN_MUL(function)					\
   {									\
-    mp_ptr    wp;							\
+    mp_ptr    wp, xp;							\
     mp_size_t size1;							\
     unsigned  i;							\
     double    t;							\
     TMP_DECL;								\
 									\
     size1 = (s->r == 0 ? s->size : s->r);				\
-    if (size1 < 0) size1 = -size1 - s->size;				\
 									\
-    SPEED_RESTRICT_COND (size1 >= 1);					\
-    SPEED_RESTRICT_COND (s->size >= size1);				\
+    SPEED_RESTRICT_COND (s->size >= 1);					\
+    SPEED_RESTRICT_COND (size1 >= s->size);				\
 									\
     TMP_MARK;								\
     SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
 									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, size1);				\
+    speed_operand_src (s, xp, size1);					\
+    speed_operand_src (s, s->yp, s->size);				\
     speed_operand_dst (s, wp, size1 + s->size);				\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
     i = s->reps;							\
     do									\
-      function (wp, s->xp, s->size, s->yp, size1);			\
+      function (wp, xp, size1, s->yp, s->size);				\
     while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -1132,7 +892,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 #define SPEED_ROUTINE_MPN_MUL_N(function)				\
   SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
 
-#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call)				\
+#define SPEED_ROUTINE_MPN_MULLOW_N_CALL(call)				\
   {									\
     mp_ptr    wp;							\
     unsigned  i;							\
@@ -1160,11 +920,11 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_MULLO_N(function)				\
-  SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
+#define SPEED_ROUTINE_MPN_MULLOW_N(function)				\
+  SPEED_ROUTINE_MPN_MULLOW_N_CALL (function (wp, s->xp, s->yp, s->size));
 
 /* For mpn_mul_basecase, xsize=r, ysize=s->size. */
-#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function)			\
+#define SPEED_ROUTINE_MPN_MULLOW_BASECASE(function)			\
   {									\
     mp_ptr    wp;							\
     unsigned  i;							\
@@ -1192,172 +952,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
-#define SPEED_ROUTINE_MPN_MULMID(function)				\
-  {									\
-    mp_ptr    wp, xp;							\
-    mp_size_t size1;							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    size1 = (s->r == 0 ? (2 * s->size - 1) : s->r);			\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-    SPEED_RESTRICT_COND (size1 >= s->size);				\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);	\
-    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
-									\
-    speed_operand_src (s, xp, size1);					\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, size1 - s->size + 3);			\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (wp, xp, size1, s->yp, s->size);				\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_MULMID_N(function)				\
-  {									\
-    mp_ptr    wp, xp;							\
-    mp_size_t size1;							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    size1 = 2 * s->size - 1;						\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);	\
-    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
-									\
-    speed_operand_src (s, xp, size1);					\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, size1 - s->size + 3);			\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (wp, xp, s->yp, s->size);				\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function)			\
-  {									\
-    mp_ptr    wp, xp, scratch;						\
-    mp_size_t size1, scratch_size;					\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    size1 = 2 * s->size - 1;						\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp);	\
-    SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp);			\
-    scratch_size = mpn_toom42_mulmid_itch (s->size);			\
-    SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0);			\
-									\
-    speed_operand_src (s, xp, size1);					\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, size1 - s->size + 3);			\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (wp, xp, s->yp, s->size, scratch);			\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call)			\
-  {									\
-    mp_ptr    wp, tp;							\
-    unsigned  i;							\
-    double    t;							\
-    mp_size_t itch;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size);		\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);			\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, 2 * s->size);				\
-    speed_operand_dst (s, tp, itch);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      call;								\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function)			\
-  {									\
-    mp_ptr    wp, tp;							\
-    unsigned  i;							\
-    double    t;							\
-    mp_size_t size, itch;						\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    size = mpn_mulmod_bnm1_next_size (s->size);				\
-    itch = mpn_mulmod_bnm1_itch (size, size, size);			\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);			\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, wp, size);					\
-    speed_operand_dst (s, tp, itch);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (wp, size, s->xp, s->size, s->yp, s->size, tp);		\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
 #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize)		\
   {									\
     mp_ptr    wp, tspace;						\
@@ -1388,104 +982,17 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function)			\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom22_mul_itch (s->size, s->size),				\
-     MPN_TOOM22_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function)			\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom33_mul_itch (s->size, s->size),				\
-     MPN_TOOM33_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function)			\
+#define SPEED_ROUTINE_MPN_KARA_MUL_N(function)				\
   SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom44_mul_itch (s->size, s->size),				\
-     MPN_TOOM44_MUL_MINSIZE)
+    (function (wp, s->xp, s->xp, s->size, tspace),			\
+     MPN_KARA_MUL_N_TSIZE (s->size),					\
+     MPN_KARA_MUL_N_MINSIZE)
 
-#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function)			\
+#define SPEED_ROUTINE_MPN_TOOM3_MUL_N(function)				\
   SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom6h_mul_itch (s->size, s->size),				\
-     MPN_TOOM6H_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function)			\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size, tspace),		\
-     mpn_toom8h_mul_itch (s->size, s->size),				\
-     MPN_TOOM8H_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_MUL(function)				\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace),		\
-     mpn_toom32_mul_itch (s->size, 2*s->size/3),			\
-     MPN_TOOM32_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM42_MUL(function)				\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
-     mpn_toom42_mul_itch (s->size, s->size/2),				\
-     MPN_TOOM42_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM43_MUL(function)				\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace),		\
-     mpn_toom43_mul_itch (s->size, s->size*3/4),			\
-     MPN_TOOM43_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM63_MUL(function)				\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, s->size/2, tspace),		\
-     mpn_toom63_mul_itch (s->size, s->size/2),				\
-     MPN_TOOM63_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),	\
-     mpn_toom32_mul_itch (s->size, 17*s->size/24),			\
-     MPN_TOOM32_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace),	\
-     mpn_toom43_mul_itch (s->size, 17*s->size/24),			\
-     MPN_TOOM43_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),	\
-     mpn_toom32_mul_itch (s->size, 19*s->size/30),			\
-     MPN_TOOM32_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace),	\
-     mpn_toom53_mul_itch (s->size, 19*s->size/30),			\
-     MPN_TOOM53_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),	\
-     mpn_toom42_mul_itch (s->size, 11*s->size/20),			\
-     MPN_TOOM42_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace),	\
-     mpn_toom53_mul_itch (s->size, 11*s->size/20),			\
-     MPN_TOOM53_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace),	\
-     mpn_toom42_mul_itch (s->size, 5*s->size/6),			\
-     MPN_TOOM54_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function)		\
-  SPEED_ROUTINE_MPN_MUL_N_TSPACE					\
-    (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace),	\
-     mpn_toom54_mul_itch (s->size, 5*s->size/6),			\
-     MPN_TOOM54_MUL_MINSIZE)
-
+    (function (wp, s->xp, s->yp, s->size, tspace),			\
+     MPN_TOOM3_MUL_N_TSIZE (s->size),					\
+     MPN_TOOM3_MUL_N_MINSIZE)
 
 
 #define SPEED_ROUTINE_MPN_SQR_CALL(call)				\
@@ -1518,34 +1025,9 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 #define SPEED_ROUTINE_MPN_SQR(function)					\
   SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
 
-#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call)			\
-  {									\
-    mp_ptr    wp, tp;							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp);		\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, tp, 2 * s->size);				\
-    speed_operand_dst (s, wp, 2 * s->size);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      call;								\
-    while (--i != 0);							\
-    t = speed_endtime () / 2;						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
+#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function)			\
+  SPEED_ROUTINE_MPN_SQR (function)
+
 
 #define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize)		\
   {									\
@@ -1576,32 +1058,17 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_TOOM2_SQR(function)				\
+#define SPEED_ROUTINE_MPN_KARA_SQR_N(function)				\
   SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
-				mpn_toom2_sqr_itch (s->size),		\
-				MPN_TOOM2_SQR_MINSIZE)
+				MPN_KARA_SQR_N_TSIZE (s->size),		\
+				MPN_KARA_SQR_N_MINSIZE)
 
-#define SPEED_ROUTINE_MPN_TOOM3_SQR(function)				\
+#define SPEED_ROUTINE_MPN_TOOM3_SQR_N(function)				\
   SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
-				mpn_toom3_sqr_itch (s->size),		\
-				MPN_TOOM3_SQR_MINSIZE)
+				MPN_TOOM3_SQR_N_TSIZE (s->size),	\
+				MPN_TOOM3_SQR_N_MINSIZE)
 
 
-#define SPEED_ROUTINE_MPN_TOOM4_SQR(function)				\
-  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
-				mpn_toom4_sqr_itch (s->size),		\
-				MPN_TOOM4_SQR_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM6_SQR(function)				\
-  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
-				mpn_toom6_sqr_itch (s->size),		\
-				MPN_TOOM6_SQR_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM8_SQR(function)				\
-  SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace),	\
-				mpn_toom8_sqr_itch (s->size),		\
-				MPN_TOOM8_SQR_MINSIZE)
-
 #define SPEED_ROUTINE_MPN_MOD_CALL(call)				\
   {									\
     unsigned   i;							\
@@ -1656,47 +1123,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return speed_endtime ();						\
   }
 
-#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc)			\
-  {									\
-    unsigned   i;							\
-    mp_limb_t  inv[4];							\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    mpn_mod_1_1p_cps (inv, s->r);					\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      pfunc (inv, s->r);						\
-      function (s->xp, s->size, s->r << inv[1], inv);				\
-    } while (--i != 0);							\
-									\
-    return speed_endtime ();						\
-  }
-#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N)			\
-  {									\
-    unsigned   i;							\
-    mp_limb_t  inv[N+3];						\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-    SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N);			\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      pfunc (inv, s->r);						\
-      function (s->xp, s->size, s->r, inv);				\
-    } while (--i != 0);							\
-									\
-    return speed_endtime ();						\
-  }
-
 
 /* A division of 2*s->size by s->size limbs */
 
@@ -1705,7 +1131,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     unsigned  i;							\
     mp_ptr    a, d, q, r;						\
     double    t;							\
-    gmp_pi1_t dinv;							\
     TMP_DECL;								\
 									\
     SPEED_RESTRICT_COND (s->size >= 1);					\
@@ -1725,8 +1150,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     d[s->size-1] |= GMP_NUMB_HIGHBIT;					\
     a[2*s->size-1] = d[s->size-1] - 1;					\
 									\
-    invert_pi1 (dinv, d[s->size-1], d[s->size-2]);			\
-									\
     speed_operand_src (s, a, 2*s->size);				\
     speed_operand_src (s, d, s->size);					\
     speed_operand_dst (s, q, s->size+1);				\
@@ -1744,522 +1167,52 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
+#define SPEED_ROUTINE_MPN_DC_DIVREM_N(function)				\
+  SPEED_ROUTINE_MPN_DC_DIVREM_CALL((*function) (q, a, d, s->size))
 
-/* A remainder 2*s->size by s->size limbs */
+#define SPEED_ROUTINE_MPN_DC_DIVREM_SB(function)			\
+  SPEED_ROUTINE_MPN_DC_DIVREM_CALL					\
+    ((*function) (q, a, 2*s->size, d, s->size))
 
-#define SPEED_ROUTINE_MPZ_MOD(function)					\
-  {									\
-    unsigned   i;							\
-    mpz_t      a, d, r;							\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    mpz_init_set_n (d, s->yp, s->size);					\
-									\
-    /* high part less than d, low part a duplicate copied in */		\
-    mpz_init_set_n (a, s->xp, s->size);					\
-    mpz_mod (a, a, d);							\
-    mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size);			\
-    MPN_COPY (PTR(a), s->xp, s->size);					\
-									\
-    mpz_init (r);							\
-									\
-    speed_operand_src (s, PTR(a), SIZ(a));				\
-    speed_operand_src (s, PTR(d), SIZ(d));				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (r, a, d);						\
-    while (--i != 0);							\
-    return speed_endtime ();						\
-  }
+#define SPEED_ROUTINE_MPN_DC_TDIV_QR(function)				\
+  SPEED_ROUTINE_MPN_DC_DIVREM_CALL					\
+    ((*function) (q, r, 0, a, 2*s->size, d, s->size))
 
-#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN)		\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, tp, ap, qp;						\
-    gmp_pi1_t  inv;							\
-    double     t;							\
-    mp_size_t size1;							\
-    TMP_DECL;								\
-									\
-    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
-									\
-    SPEED_RESTRICT_COND (s->size >= DMIN);				\
-    SPEED_RESTRICT_COND (size1 - s->size >= QMIN);			\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2);			\
-									\
-    /* we don't fill in dividend completely when size1 > s->size */	\
-    MPN_COPY (ap,         s->xp, s->size);				\
-    MPN_COPY (ap + size1 - s->size, s->xp, s->size);			\
-									\
-    MPN_COPY (dp,         s->yp, s->size);				\
-									\
-    /* normalize the data */						\
-    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    ap[size1 - 1] = dp[s->size - 1] - 1;				\
-									\
-    invert_pi1 (inv, dp[s->size-1], dp[s->size-2]);			\
-									\
-    speed_operand_src (s, ap, size1);					\
-    speed_operand_dst (s, tp, size1);					\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, qp, size1 - s->size);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      MPN_COPY (tp, ap, size1);						\
-      function (qp, tp, size1, dp, s->size, INV);			\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn)			\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, tp, qp, scratch;					\
-    double     t;							\
-    mp_size_t itch;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    itch = itchfn (2 * s->size, s->size, 0);				\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
-									\
-    MPN_COPY (tp,         s->xp, s->size);				\
-    MPN_COPY (tp+s->size, s->xp, s->size);				\
-									\
-    /* normalize the data */						\
-    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    tp[2*s->size-1] = dp[s->size-1] - 1;				\
-									\
-    speed_operand_dst (s, qp, s->size);					\
-    speed_operand_src (s, tp, 2 * s->size);				\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, scratch, itch);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      function (qp, tp, 2 * s->size, dp, s->size, scratch);		\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn)			\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, tp, qp, rp, scratch;					\
-    double     t;							\
-    mp_size_t size1, itch;						\
-    TMP_DECL;								\
-									\
-    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-    SPEED_RESTRICT_COND (size1 >= s->size);				\
-									\
-    itch = itchfn (size1, s->size, 0);					\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
-    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
-									\
-    /* we don't fill in dividend completely when size1 > s->size */	\
-    MPN_COPY (tp,         s->xp, s->size);				\
-    MPN_COPY (tp + size1 - s->size, s->xp, s->size);			\
-									\
-    MPN_COPY (dp,         s->yp, s->size);				\
-									\
-    /* normalize the data */						\
-    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    tp[size1 - 1] = dp[s->size - 1] - 1;				\
-									\
-    speed_operand_dst (s, qp, size1 - s->size);				\
-    speed_operand_dst (s, rp, s->size);					\
-    speed_operand_src (s, tp, size1);					\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, scratch, itch);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      function (qp, rp, tp, size1, dp, s->size, scratch);		\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn)			\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, tp, qp, rp, ip, scratch, tmp;			\
-    double     t;							\
-    mp_size_t  size1, itch;						\
-    TMP_DECL;								\
-									\
-    size1 = (s->r == 0 ? 2 * s->size : s->r);				\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-    SPEED_RESTRICT_COND (size1 >= s->size);				\
-									\
-    itch = itchfn (size1, s->size, s->size);				\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
-    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
-    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */	\
-									\
-    /* we don't fill in dividend completely when size1 > s->size */	\
-    MPN_COPY (tp,         s->xp, s->size);				\
-    MPN_COPY (tp + size1 - s->size, s->xp, s->size);			\
-									\
-    MPN_COPY (dp,         s->yp, s->size);				\
-									\
-    /* normalize the data */						\
-    dp[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-    tp[size1 - 1] = dp[s->size-1] - 1;					\
-									\
-    tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size));			\
-    mpn_invert (ip, dp, s->size, tmp);					\
-									\
-    speed_operand_dst (s, qp, size1 - s->size);				\
-    speed_operand_dst (s, rp, s->size);					\
-    speed_operand_src (s, tp, size1);					\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_src (s, ip, s->size);					\
-    speed_operand_dst (s, scratch, itch);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch);	\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
 
-#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function)				\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, tp, ap, qp;						\
-    mp_limb_t  inv;							\
-    double     t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2);		\
-									\
-    MPN_COPY (ap,         s->xp, s->size);				\
-    MPN_COPY (ap+s->size, s->xp, s->size);				\
-									\
-    /* divisor must be odd */						\
-    MPN_COPY (dp, s->yp, s->size);					\
-    dp[0] |= 1;								\
-    binvert_limb (inv, dp[0]);						\
-    inv = -inv;								\
-									\
-    speed_operand_src (s, ap, 2*s->size);				\
-    speed_operand_dst (s, tp, 2*s->size);				\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, qp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      MPN_COPY (tp, ap, 2*s->size);					\
-      function (qp, tp, 2*s->size, dp, s->size, inv);			\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function)				\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, tp, qp;						\
-    mp_limb_t  inv;							\
-    double     t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2);			\
-									\
-    /* divisor must be odd */						\
-    MPN_COPY (dp, s->yp, s->size);					\
-    dp[0] |= 1;								\
-    binvert_limb (inv, dp[0]);						\
-    inv = -inv;								\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_dst (s, tp, s->size);					\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, qp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      MPN_COPY (tp, s->xp, s->size);					\
-      function (qp, tp, s->size, dp, s->size, inv);			\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn)			\
-  {									\
-    unsigned   i;							\
-    mp_ptr     dp, qp, scratch;						\
-    double     t;							\
-    mp_size_t itch;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    itch = itchfn (s->size, s->size);					\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
-									\
-    /* divisor must be odd */						\
-    MPN_COPY (dp, s->yp, s->size);					\
-    dp[0] |= 1;								\
-									\
-    speed_operand_dst (s, qp, s->size);					\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, scratch, itch);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      function (qp, s->xp, s->size, dp, s->size, scratch);		\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn)			\
+/* A division of s->size by 3 limbs */
+
+#define SPEED_ROUTINE_MPN_SB_DIVREM_M3(function)			\
   {									\
     unsigned   i;							\
-    mp_ptr     dp, tp, qp, rp, scratch;					\
+    mp_ptr     a, d, q;							\
+    mp_size_t  qsize;							\
     double     t;							\
-    mp_size_t itch;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    itch = itchfn (2 * s->size, s->size);				\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2);		\
-    SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */	\
-									\
-    MPN_COPY (tp,         s->xp, s->size);				\
-    MPN_COPY (tp+s->size, s->xp, s->size);				\
-									\
-    /* divisor must be odd */						\
-    MPN_COPY (dp, s->yp, s->size);					\
-    dp[0] |= 1;								\
-									\
-    speed_operand_dst (s, qp, s->size);					\
-    speed_operand_dst (s, rp, s->size);					\
-    speed_operand_src (s, tp, 2 * s->size);				\
-    speed_operand_src (s, dp, s->size);					\
-    speed_operand_dst (s, scratch, itch);				\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      function (qp, rp, tp, 2 * s->size, dp, s->size, scratch);		\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_BROOT(function)	\
-  {						\
-    SPEED_RESTRICT_COND (s->r & 1);		\
-    s->xp[0] |= 1;				\
-    SPEED_ROUTINE_MPN_UNARY_1_CALL		\
-      ((*function) (wp, s->xp, s->size, s->r));	\
-  }
-
-#define SPEED_ROUTINE_MPN_BROOTINV(function, itch)	\
-  {							\
-    mp_ptr    wp, tp;					\
-    unsigned  i;					\
-    double    t;					\
-    TMP_DECL;						\
-    TMP_MARK;						\
-    SPEED_RESTRICT_COND (s->size >= 1);			\
-    SPEED_RESTRICT_COND (s->r & 1);			\
-    wp = TMP_ALLOC_LIMBS (s->size);			\
-    tp = TMP_ALLOC_LIMBS ( (itch));			\
-    s->xp[0] |= 1;					\
-							\
-    speed_operand_src (s, s->xp, s->size);		\
-    speed_operand_dst (s, wp, s->size);			\
-    speed_cache_fill (s);				\
-							\
-    speed_starttime ();					\
-    i = s->reps;					\
-    do							\
-      (*function) (wp, s->xp, s->size, s->r, tp);	\
-    while (--i != 0);					\
-    t = speed_endtime ();				\
-							\
-    TMP_FREE;						\
-    return t;						\
-  }
-
-#define SPEED_ROUTINE_MPN_INVERT(function,itchfn)			\
-  {									\
-    long  i;								\
-    mp_ptr    up, tp, ip;						\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
-									\
-    MPN_COPY (up, s->xp, s->size);					\
-									\
-    /* normalize the data */						\
-    up[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-									\
-    speed_operand_src (s, up, s->size);					\
-    speed_operand_dst (s, tp, s->size);					\
-    speed_operand_dst (s, ip, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (ip, up, s->size, tp);					\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn)			\
-  {									\
-    long  i;								\
-    mp_ptr    up, tp, ip;						\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
-									\
-    MPN_COPY (up, s->xp, s->size);					\
-									\
-    /* normalize the data */						\
-    up[s->size-1] |= GMP_NUMB_HIGHBIT;					\
-									\
-    speed_operand_src (s, up, s->size);					\
-    speed_operand_dst (s, tp, s->size);					\
-    speed_operand_dst (s, ip, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (ip, up, s->size, tp);					\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn)		\
-  {									\
-    long  i;								\
-    mp_ptr    up, tp, ip;						\
-    double    t;							\
     TMP_DECL;								\
 									\
     SPEED_RESTRICT_COND (s->size >= 3);					\
 									\
     TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
+    SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp);			\
 									\
-    MPN_COPY (up, s->xp, s->size);					\
+    SPEED_TMP_ALLOC_LIMBS (d, 3, s->align_yp);				\
+    MPN_COPY (d, s->yp, 3);						\
+    d[2] |= GMP_NUMB_HIGHBIT;						\
 									\
-    /* normalize the data */						\
-    up[s->size-1] |= GMP_NUMB_HIGHBIT;					\
+    qsize = s->size - 3;						\
+    SPEED_TMP_ALLOC_LIMBS (q, qsize, s->align_wp);			\
 									\
-    speed_operand_src (s, up, s->size);					\
-    speed_operand_dst (s, tp, s->size);					\
-    speed_operand_dst (s, ip, s->size);					\
+    speed_operand_dst (s, a, s->size);					\
+    speed_operand_src (s, d, 3);					\
+    speed_operand_dst (s, q, qsize);					\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
     i = s->reps;							\
     do									\
-      function (ip, up, s->size, tp);					\
+      {									\
+	MPN_COPY (a, s->xp, s->size);					\
+	function (q, a, s->size, d, 3);					\
+      }									\
     while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -2267,83 +1220,43 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn)			\
+/* A remainder 2*s->size by s->size limbs */
+
+#define SPEED_ROUTINE_MPZ_MOD(function)					\
   {									\
-    long  i;								\
-    mp_ptr    up, tp, ip;						\
-    double    t;							\
-    TMP_DECL;								\
+    unsigned   i;							\
+    mpz_t      a, d, r;							\
 									\
     SPEED_RESTRICT_COND (s->size >= 1);					\
 									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (up, s->size,   s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
+    mpz_init_set_n (d, s->yp, s->size);					\
 									\
-    MPN_COPY (up, s->xp, s->size);					\
+    /* high part less than d, low part a duplicate copied in */		\
+    mpz_init_set_n (a, s->xp, s->size);					\
+    mpz_mod (a, a, d);							\
+    mpz_mul_2exp (a, a, BITS_PER_MP_LIMB * s->size);			\
+    MPN_COPY (PTR(a), s->xp, s->size);					\
 									\
-    /* normalize the data */						\
-    up[0] |= 1;								\
+    mpz_init (r);							\
 									\
-    speed_operand_src (s, up, s->size);					\
-    speed_operand_dst (s, tp, s->size);					\
-    speed_operand_dst (s, ip, s->size);					\
+    speed_operand_src (s, PTR(a), SIZ(a));				\
+    speed_operand_src (s, PTR(d), SIZ(d));				\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
     i = s->reps;							\
     do									\
-      function (ip, up, s->size, tp);					\
+      function (r, a, d);						\
     while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
+    return speed_endtime ();						\
   }
 
-#define SPEED_ROUTINE_MPN_SEC_INVERT(function,itchfn)			\
-  {									\
-    long  i;								\
-    mp_ptr    up, mp, tp, ip;						\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp);			\
-    SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp);			\
-    SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp);		\
-									\
-    speed_operand_src (s, up, s->size);					\
-    speed_operand_dst (s, tp, s->size);					\
-    speed_operand_dst (s, ip, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    MPN_COPY (mp, s->yp, s->size);					\
-    /* Must be odd */							\
-    mp[0] |= 1;								\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      {									\
-	MPN_COPY (up, s->xp, s->size);					\
-	function (ip, up, mp, s->size, 2*s->size*GMP_NUMB_BITS, tp);	\
-      }									\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
 
 #define SPEED_ROUTINE_REDC_1(function)					\
   {									\
     unsigned   i;							\
     mp_ptr     cp, mp, tp, ap;						\
-    mp_limb_t  inv;							\
+    mp_limb_t  Nprim;							\
     double     t;							\
     TMP_DECL;								\
 									\
@@ -2361,8 +1274,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     /* modulus must be odd */						\
     MPN_COPY (mp, s->yp, s->size);					\
     mp[0] |= 1;								\
-    binvert_limb (inv, mp[0]);						\
-    inv = -inv;								\
+    binvert_limb (Nprim, mp[0]);					\
 									\
     speed_operand_src (s, ap, 2*s->size+1);				\
     speed_operand_dst (s, tp, 2*s->size+1);				\
@@ -2374,90 +1286,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     i = s->reps;							\
     do {								\
       MPN_COPY (tp, ap, 2*s->size);					\
-      function (cp, tp, mp, s->size, inv);				\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_REDC_2(function)					\
-  {									\
-    unsigned   i;							\
-    mp_ptr     cp, mp, tp, ap;						\
-    mp_limb_t  invp[2];							\
-    double     t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);		\
-    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);		\
-									\
-    MPN_COPY (ap,         s->xp, s->size);				\
-    MPN_COPY (ap+s->size, s->xp, s->size);				\
-									\
-    /* modulus must be odd */						\
-    MPN_COPY (mp, s->yp, s->size);					\
-    mp[0] |= 1;								\
-    mpn_binvert (invp, mp, 2, tp);					\
-    invp[0] = -invp[0]; invp[1] = ~invp[1];				\
-									\
-    speed_operand_src (s, ap, 2*s->size+1);				\
-    speed_operand_dst (s, tp, 2*s->size+1);				\
-    speed_operand_src (s, mp, s->size);					\
-    speed_operand_dst (s, cp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      MPN_COPY (tp, ap, 2*s->size);					\
-      function (cp, tp, mp, s->size, invp);				\
-    } while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-#define SPEED_ROUTINE_REDC_N(function)					\
-  {									\
-    unsigned   i;							\
-    mp_ptr     cp, mp, tp, ap, invp;					\
-    double     t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size > 8);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (mp, s->size,     s->align_yp);		\
-    SPEED_TMP_ALLOC_LIMBS (cp, s->size,     s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2);		\
-    SPEED_TMP_ALLOC_LIMBS (invp, s->size,   s->align_wp2); /* align? */	\
-									\
-    MPN_COPY (ap,         s->xp, s->size);				\
-    MPN_COPY (ap+s->size, s->xp, s->size);				\
-									\
-    /* modulus must be odd */						\
-    MPN_COPY (mp, s->yp, s->size);					\
-    mp[0] |= 1;								\
-    mpn_binvert (invp, mp, s->size, tp);				\
-									\
-    speed_operand_src (s, ap, 2*s->size+1);				\
-    speed_operand_dst (s, tp, 2*s->size+1);				\
-    speed_operand_src (s, mp, s->size);					\
-    speed_operand_dst (s, cp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do {								\
-      MPN_COPY (tp, ap, 2*s->size);					\
-      function (cp, tp, mp, s->size, invp);				\
+      function (cp, tp, mp, s->size, Nprim);				\
     } while (--i != 0);							\
     t = speed_endtime ();						\
 									\
@@ -2805,107 +1634,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
      function (px[j-1], py[j-1], 0))
 
 
-#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc)			\
-  {									\
-    mp_size_t hgcd_init_itch, hgcd_itch;				\
-    mp_ptr ap, bp, wp, tmp1;						\
-    struct hgcd_matrix hgcd;						\
-    int res;								\
-    unsigned i;								\
-    double t;								\
-    TMP_DECL;								\
-									\
-    if (s->size < 2)							\
-      return -1;							\
-									\
-    TMP_MARK;								\
-									\
-    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
-									\
-    s->xp[s->size - 1] |= 1;						\
-    s->yp[s->size - 1] |= 1;						\
-									\
-    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
-    hgcd_itch = itchfunc (s->size);					\
-									\
-    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp);			\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, ap, s->size + 1);				\
-    speed_operand_dst (s, bp, s->size + 1);				\
-    speed_operand_dst (s, wp, hgcd_itch);				\
-    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      {									\
-	MPN_COPY (ap, s->xp, s->size);					\
-	MPN_COPY (bp, s->yp, s->size);					\
-	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
-	res = func (ap, bp, s->size, &hgcd, wp);			\
-      }									\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc)		\
-  {									\
-    mp_size_t hgcd_init_itch, hgcd_step_itch;				\
-    mp_ptr ap, bp, wp, tmp1;						\
-    struct hgcd_matrix hgcd;						\
-    mp_size_t p = s->size/2;						\
-    int res;								\
-    unsigned i;								\
-    double t;								\
-    TMP_DECL;								\
-									\
-    if (s->size < 2)							\
-      return -1;							\
-									\
-    TMP_MARK;								\
-									\
-    SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);		\
-    SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);		\
-									\
-    s->xp[s->size - 1] |= 1;						\
-    s->yp[s->size - 1] |= 1;						\
-									\
-    hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);		\
-    hgcd_step_itch = itchfunc (s->size, p);				\
-									\
-    SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp);		\
-    SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp);			\
-									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, s->yp, s->size);				\
-    speed_operand_dst (s, ap, s->size + 1);				\
-    speed_operand_dst (s, bp, s->size + 1);				\
-    speed_operand_dst (s, wp, hgcd_step_itch);				\
-    speed_operand_dst (s, tmp1, hgcd_init_itch);			\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      {									\
-	MPN_COPY (ap, s->xp, s->size);					\
-	MPN_COPY (bp, s->yp, s->size);					\
-	mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);			\
-	res = func (&hgcd, ap, bp, s->size, p, wp);			\
-      }									\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-    TMP_FREE;								\
-    return t;								\
-  }
-
 /* Run some GCDs of s->size limbs each.  The number of different data values
    is decreased as s->size**2, since GCD is a quadratic algorithm.
    SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
@@ -3169,111 +1897,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     return t;								\
   }
 
-#define SPEED_ROUTINE_MPN_DIV_QR_1(function)				\
-  {									\
-    mp_ptr    wp, xp;							\
-    mp_limb_t d;							\
-    mp_limb_t r;							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
-									\
-    d = s->r;								\
-    if (d == 0)								\
-      d = 1;								\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_dst (s, wp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      r = function (wp, wp+s->size-1, s->xp, s->size, d);		\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function)			\
-  {									\
-    mp_ptr    wp, xp;							\
-    mp_limb_t d, dinv;							\
-    mp_limb_t r;							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 1);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
-									\
-    d = s->r;								\
-    /* divisor must be normalized */					\
-    SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT);				\
-    invert_limb (dinv, d);						\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_dst (s, wp, s->size);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      r = function (wp, s->xp, s->size, 0, d, dinv);			\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
-
-#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm)			\
-  {									\
-    mp_ptr    wp, xp;							\
-    mp_limb_t yp[2];							\
-    mp_limb_t rp[2];							\
-    unsigned  i;							\
-    double    t;							\
-    TMP_DECL;								\
-									\
-    SPEED_RESTRICT_COND (s->size >= 2);					\
-									\
-    TMP_MARK;								\
-    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);			\
-									\
-    /* divisor must be normalized */					\
-    MPN_COPY (yp, s->yp_block, 2);					\
-    if (norm)								\
-      yp[1] |= GMP_NUMB_HIGHBIT;					\
-    else								\
-      {									\
-	yp[1] &= ~GMP_NUMB_HIGHBIT;					\
-	if (yp[1] == 0)							\
-	  yp[1] = 1;							\
-      }									\
-    speed_operand_src (s, s->xp, s->size);				\
-    speed_operand_src (s, yp, 2);					\
-    speed_operand_dst (s, wp, s->size);					\
-    speed_operand_dst (s, rp, 2);					\
-    speed_cache_fill (s);						\
-									\
-    speed_starttime ();							\
-    i = s->reps;							\
-    do									\
-      function (wp, rp, s->xp, s->size, yp);				\
-    while (--i != 0);							\
-    t = speed_endtime ();						\
-									\
-    TMP_FREE;								\
-    return t;								\
-  }
 
 #define SPEED_ROUTINE_MODLIMB_INVERT(function)				\
   {									\
@@ -3402,7 +2025,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 									\
     speed_operand_src (s, s->xp, s->size);				\
     speed_operand_dst (s, xp, s->size);					\
-    speed_operand_dst (s, (mp_ptr) wp, wn/GMP_LIMB_BYTES);		\
+    speed_operand_dst (s, (mp_ptr) wp, wn/BYTES_PER_MP_LIMB);		\
     speed_cache_fill (s);						\
 									\
     speed_starttime ();							\
@@ -3442,7 +2065,8 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     for (i = 0; i < s->size; i++)					\
       xp[i] = s->xp[i] % base;						\
 									\
-    LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);			\
+    wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly)) \
+      / BITS_PER_MP_LIMB + 2;						\
     SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);			\
 									\
     /* use this during development to check wn is big enough */		\
@@ -3450,7 +2074,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
     ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn);		\
     */									\
 									\
-    speed_operand_src (s, (mp_ptr) xp, s->size/GMP_LIMB_BYTES);	\
+    speed_operand_src (s, (mp_ptr) xp, s->size/BYTES_PER_MP_LIMB);	\
     speed_operand_dst (s, wp, wn);					\
     speed_cache_fill (s);						\
 									\
@@ -3466,7 +2090,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
   }
 
 
-/* Run an accel gcd find_a() function over various data values.  A set of
+/* Run an accel gcd find_a() function over various data values.	 A set of
    values is used in case some run particularly fast or slow.  The size
    parameter is ignored, the amount of data tested is fixed.  */
 
@@ -3602,6 +2226,9 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
   }
 
 
+#endif
+
+
 #define SPEED_ROUTINE_MPN_BACK_TO_BACK(function)			\
   {									\
     unsigned  i;							\
@@ -3641,6 +2268,3 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
 
 #define SPEED_ROUTINE_MPN_ZERO(function)				\
   SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
-
-
-#endif
diff --git a/gmp/tune/time.c b/gmp/tune/time.c
index 0178b345af..865a92c758 100644
--- a/gmp/tune/time.c
+++ b/gmp/tune/time.c
@@ -1,32 +1,21 @@
-/* Time routines for speed measurements.
+/* Time routines for speed measurments.
 
-Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* Usage:
@@ -266,7 +255,7 @@ static const int  use_stck = 1;  /* always use when available */
 typedef uint64_t  stck_t; /* gcc for s390 is quite new, always has uint64_t */
 #define STCK(timestamp)                 \
   do {                                  \
-    asm ("stck %0" : "=Q" (timestamp)); \
+    asm ("stck %0" : "=m" (timestamp)); \
   } while (0)
 #else
 static const int  have_stck = 0;
@@ -467,22 +456,9 @@ cycles_works_p (void)
   if (result != -1)
     goto done;
 
-  /* FIXME: On linux, the cycle counter is not saved and restored over
-   * context switches, making it almost useless for precise cputime
-   * measurements. When available, it's better to use clock_gettime,
-   * which seems to have reasonable accuracy (tested on x86_32,
-   * linux-2.6.26, glibc-2.7). However, there are also some linux
-   * systems where clock_gettime is broken in one way or the other,
-   * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
-   * kind-of implemented but broken (needs code to detect that), and
-   * on those systems a wall-clock cycle counter is the least bad
-   * fallback.
-   *
-   * So we need some code to disable the cycle counter on some but not
-   * all linux systems. */
 #ifdef SIGILL
   {
-    RETSIGTYPE (*old_handler) (int);
+    RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
     unsigned  cycles[2];
 
     old_handler = signal (SIGILL, cycles_works_handler);
@@ -695,8 +671,8 @@ getrusage_backwards_p (void)
 	  if (speed_option_verbose)
 	    printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
 		    i,
-		    (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
-		    (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
+		    prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
+		    next.ru_utime.tv_sec, next.ru_utime.tv_usec);
 	  result = 1;
 	  break;
 	}
@@ -733,8 +709,6 @@ const int  have_cgt_id = 0;
 # define CGT_ID       (ASSERT_FAIL (CGT_ID not determined), -1)
 #endif
 
-#define CGT_DELAY_COUNT 1000
-
 int
 cgt_works_p (void)
 {
@@ -776,44 +750,6 @@ cgt_works_p (void)
   cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
   printf ("clock_gettime is %s accurate\n",
 	  unittime_string (cgt_unittime));
-
-  if (cgt_unittime < 10e-9)
-    {
-      /* Do we believe this? */
-      struct timespec start, end;
-      static volatile int counter;
-      double duration;
-      if (clock_gettime (CGT_ID, &start))
-	{
-	  if (speed_option_verbose)
-	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
-	  result = 0;
-	  return result;
-	}
-      /* Loop of at least 1000 memory accesses, ought to take at
-	 least 100 ns*/
-      for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
-	;
-      if (clock_gettime (CGT_ID, &end))
-	{
-	  if (speed_option_verbose)
-	    printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
-	  result = 0;
-	  return result;
-	}
-      duration = (end.tv_sec + end.tv_nsec * 1e-9
-		  - start.tv_sec - start.tv_nsec * 1e-9);
-      if (speed_option_verbose)
-	printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
-		CGT_DELAY_COUNT, unittime_string (duration));
-      if (duration < 100e-9)
-	{
-	  if (speed_option_verbose)
-	    printf ("clock_gettime id=%d not believable\n", CGT_ID);
-	  result = 0;
-	  return result;
-	}
-    }
   result = 1;
   return result;
 }
@@ -843,7 +779,7 @@ int
 mftb_works_p (void)
 {
   unsigned   a[2];
-  RETSIGTYPE (*old_handler) (int);
+  RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
   double     cycletime;
 
   /* suppress a warning about a[] unused */
@@ -1005,7 +941,7 @@ speed_time_init (void)
 
   speed_cycletime_init ();
 
-  if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
+  if (have_cycles && cycles_works_p ())
     {
       use_cycles = 1;
       DEFAULT (speed_cycletime, 1.0);
@@ -1136,7 +1072,7 @@ speed_time_init (void)
       use_cgt = 1;
       speed_unittime = cgt_unittime;
       DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
-      strcpy (speed_time_string, "microsecond accurate clock_gettime()");
+      strcpy (speed_time_string, "microsecond accurate getrusage()");
     }
   else if (have_times && clk_tck() > 1000000)
     {
@@ -1353,7 +1289,7 @@ speed_mftb_diff (const unsigned end[2], const unsigned start[2])
    psecs might overflow.  2^32 microseconds is only a bit over an hour, or
    2^32 nanoseconds only about 4 seconds.
 
-   The casts to "long" are for the benefit of timebasestruct_t, where the
+   The casts to "long" are for the beneifit of timebasestruct_t, where the
    fields are only "unsigned int", but we want a signed difference.  */
 
 #define DIFF_SECS_ROUTINE(sec, psec, punit)                     \
diff --git a/gmp/tune/tune-gcd-p.c b/gmp/tune/tune-gcd-p.c
deleted file mode 100644
index 4d52f5610c..0000000000
--- a/gmp/tune/tune-gcd-p.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/* tune-gcd-p
-
-   Tune the choice for splitting p in divide-and-conquer gcd.
-
-Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define TUNE_GCD_P 1
-
-#include "../mpn/gcd.c"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "speed.h"
-
-/* Search for minimum over a range. FIXME: Implement golden-section /
-   fibonacci search*/
-static int
-search (double *minp, double (*f)(void *, int), void *ctx, int start, int end)
-{
-  int x[4];
-  double y[4];
-
-  int best_i;
-
-  x[0] = start;
-  x[3] = end;
-
-  y[0] = f(ctx, x[0]);
-  y[3] = f(ctx, x[3]);
-
-  for (;;)
-    {
-      int i;
-      int length = x[3] - x[0];
-
-      x[1] = x[0] + length/3;
-      x[2] = x[0] + 2*length/3;
-
-      y[1] = f(ctx, x[1]);
-      y[2] = f(ctx, x[2]);
-
-#if 0
-      printf("%d: %f, %d: %f, %d:, %f %d: %f\n",
-	     x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3]);
-#endif
-      for (best_i = 0, i = 1; i < 4; i++)
-	if (y[i] < y[best_i])
-	  best_i = i;
-
-      if (length <= 4)
-	break;
-
-      if (best_i >= 2)
-	{
-	  x[0] = x[1];
-	  y[0] = y[1];
-	}
-      else
-	{
-	  x[3] = x[2];
-	  y[3] = y[2];
-	}
-    }
-  *minp = y[best_i];
-  return x[best_i];
-}
-
-static int
-compare_double(const void *ap, const void *bp)
-{
-  double a = * (const double *) ap;
-  double b = * (const double *) bp;
-
-  if (a < b)
-    return -1;
-  else if (a > b)
-    return 1;
-  else
-    return 0;
-}
-
-static double
-median (double *v, size_t n)
-{
-  qsort(v, n, sizeof(*v), compare_double);
-
-  return v[n/2];
-}
-
-#define TIME(res, code) do {				\
-  double time_measurement[5];				\
-  unsigned time_i;					\
-							\
-  for (time_i = 0; time_i < 5; time_i++)		\
-    {							\
-      speed_starttime();				\
-      code;						\
-      time_measurement[time_i] = speed_endtime();	\
-    }							\
-  res = median(time_measurement, 5);			\
-} while (0)
-
-struct bench_data
-{
-  mp_size_t n;
-  mp_ptr ap;
-  mp_ptr bp;
-  mp_ptr up;
-  mp_ptr vp;
-  mp_ptr gp;
-};
-
-static double
-bench_gcd (void *ctx, int p)
-{
-  struct bench_data *data = ctx;
-  double t;
-
-  p_table[data->n] = p;
-  TIME(t, {
-      MPN_COPY (data->up, data->ap, data->n);
-      MPN_COPY (data->vp, data->bp, data->n);
-      mpn_gcd (data->gp, data->up, data->n, data->vp, data->n);
-    });
-
-  return t;
-}
-
-int
-main(int argc, char **argv)
-{
-  gmp_randstate_t rands;  struct bench_data data;
-  mp_size_t n;
-
-  TMP_DECL;
-
-  /* Unbuffered so if output is redirected to a file it isn't lost if the
-     program is killed part way through.  */
-  setbuf (stdout, NULL);
-  setbuf (stderr, NULL);
-
-  gmp_randinit_default (rands);
-
-  TMP_MARK;
-
-  data.ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  data.bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  data.up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  data.vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-  data.gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-
-  mpn_random (data.ap, P_TABLE_SIZE);
-  mpn_random (data.bp, P_TABLE_SIZE);
-
-  memset (p_table, 0, sizeof(p_table));
-
-  for (n = 100; n < P_TABLE_SIZE; n++)
-    {
-      mp_size_t p;
-      mp_size_t best_p;
-      double best_time;
-      double lehmer_time;
-
-      if (data.ap[n-1] == 0)
-	data.ap[n-1] = 1;
-
-      if (data.bp[n-1] == 0)
-	data.bp[n-1] = 1;
-
-      data.n = n;
-
-      lehmer_time = bench_gcd (&data, 0);
-
-      best_p = search (&best_time, bench_gcd, &data, n/5, 4*n/5);
-      if (best_time > lehmer_time)
-	best_p = 0;
-
-      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
-      if (best_p > 0)
-	{
-	  double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
-	  printf(" %5.3g%%", speedup);
-	  if (speedup < 1.0)
-	    {
-	      printf(" (ignored)");
-	      best_p = 0;
-	    }
-	}
-      printf("\n");
-
-      p_table[n] = best_p;
-    }
-  TMP_FREE;
-  gmp_randclear(rands);
-  return 0;
-}
diff --git a/gmp/tune/tuneup.c b/gmp/tune/tuneup.c
index 2fba6b2955..06cb03c583 100644
--- a/gmp/tune/tuneup.c
+++ b/gmp/tune/tuneup.c
@@ -1,32 +1,21 @@
 /* Create tuned thresholds for various algorithms.
 
-Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* Usage: tuneup [-t] [-t] [-p precision]
@@ -75,11 +64,11 @@ see https://www.gnu.org/licenses/.  */
    instead.  #define TUNE_PROGRAM_BUILD does this, with help from code at
    the end of gmp-impl.h, and rules in tune/Makefile.am.
 
-   MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n.  The
+   MUL_KARATSUBA_THRESHOLD for example uses a recompiled mpn_mul_n.  The
    threshold is set to "size+1" to avoid karatsuba, or to "size" to use one
    level, but recurse into the basecase.
 
-   MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value.
+   MUL_TOOM3_THRESHOLD makes use of the tuned MUL_KARATSUBA_THRESHOLD value.
    Other routines in turn will make use of both of those.  Naturally the
    dependants must be tuned first.
 
@@ -98,7 +87,7 @@ see https://www.gnu.org/licenses/.  */
    DIVREM_1_NORM_THRESHOLD.  An assembler mpn_divrem_1 is expected to be
    written and tuned all by hand.  Assembler routines that might have hard
    limits are recompiled though, to make them accept a bigger range of sizes
-   than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr.
+   than normal, eg. mpn_sqr_basecase to compare against mpn_kara_sqr_n.
 
    Limitations:
 
@@ -153,81 +142,46 @@ int  allocdat = 0;
 
 /* This is not defined if mpn_sqr_basecase doesn't declare a limit.  In that
    case use zero here, which for params.max_size means no limit.  */
-#ifndef TUNE_SQR_TOOM2_MAX
-#define TUNE_SQR_TOOM2_MAX  0
+#ifndef TUNE_SQR_KARATSUBA_MAX
+#define TUNE_SQR_KARATSUBA_MAX  0
 #endif
 
-mp_size_t  mul_toom22_threshold         = MP_SIZE_T_MAX;
-mp_size_t  mul_toom33_threshold         = MUL_TOOM33_THRESHOLD_LIMIT;
+mp_size_t  mul_karatsuba_threshold      = MP_SIZE_T_MAX;
+mp_size_t  mul_toom3_threshold          = MUL_TOOM3_THRESHOLD_LIMIT;
 mp_size_t  mul_toom44_threshold         = MUL_TOOM44_THRESHOLD_LIMIT;
-mp_size_t  mul_toom6h_threshold         = MUL_TOOM6H_THRESHOLD_LIMIT;
-mp_size_t  mul_toom8h_threshold         = MUL_TOOM8H_THRESHOLD_LIMIT;
-mp_size_t  mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
-mp_size_t  mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
-mp_size_t  mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
-mp_size_t  mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
-mp_size_t  mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
 mp_size_t  mul_fft_threshold            = MP_SIZE_T_MAX;
 mp_size_t  mul_fft_modf_threshold       = MP_SIZE_T_MAX;
 mp_size_t  sqr_basecase_threshold       = MP_SIZE_T_MAX;
-mp_size_t  sqr_toom2_threshold
-  = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
+mp_size_t  sqr_karatsuba_threshold
+  = (TUNE_SQR_KARATSUBA_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_KARATSUBA_MAX);
 mp_size_t  sqr_toom3_threshold          = SQR_TOOM3_THRESHOLD_LIMIT;
 mp_size_t  sqr_toom4_threshold          = SQR_TOOM4_THRESHOLD_LIMIT;
-mp_size_t  sqr_toom6_threshold          = SQR_TOOM6_THRESHOLD_LIMIT;
-mp_size_t  sqr_toom8_threshold          = SQR_TOOM8_THRESHOLD_LIMIT;
 mp_size_t  sqr_fft_threshold            = MP_SIZE_T_MAX;
 mp_size_t  sqr_fft_modf_threshold       = MP_SIZE_T_MAX;
-mp_size_t  mullo_basecase_threshold     = MP_SIZE_T_MAX;
-mp_size_t  mullo_dc_threshold           = MP_SIZE_T_MAX;
-mp_size_t  mullo_mul_n_threshold        = MP_SIZE_T_MAX;
-mp_size_t  mulmid_toom42_threshold      = MP_SIZE_T_MAX;
-mp_size_t  mulmod_bnm1_threshold        = MP_SIZE_T_MAX;
-mp_size_t  sqrmod_bnm1_threshold        = MP_SIZE_T_MAX;
-mp_size_t  div_qr_2_pi2_threshold       = MP_SIZE_T_MAX;
-mp_size_t  dc_div_qr_threshold          = MP_SIZE_T_MAX;
-mp_size_t  dc_divappr_q_threshold       = MP_SIZE_T_MAX;
-mp_size_t  mu_div_qr_threshold          = MP_SIZE_T_MAX;
-mp_size_t  mu_divappr_q_threshold       = MP_SIZE_T_MAX;
-mp_size_t  mupi_div_qr_threshold        = MP_SIZE_T_MAX;
-mp_size_t  mu_div_q_threshold           = MP_SIZE_T_MAX;
-mp_size_t  dc_bdiv_qr_threshold         = MP_SIZE_T_MAX;
-mp_size_t  dc_bdiv_q_threshold          = MP_SIZE_T_MAX;
-mp_size_t  mu_bdiv_qr_threshold         = MP_SIZE_T_MAX;
-mp_size_t  mu_bdiv_q_threshold          = MP_SIZE_T_MAX;
-mp_size_t  inv_mulmod_bnm1_threshold    = MP_SIZE_T_MAX;
-mp_size_t  inv_newton_threshold         = MP_SIZE_T_MAX;
-mp_size_t  inv_appr_threshold           = MP_SIZE_T_MAX;
-mp_size_t  binv_newton_threshold        = MP_SIZE_T_MAX;
-mp_size_t  redc_1_to_redc_2_threshold   = MP_SIZE_T_MAX;
-mp_size_t  redc_1_to_redc_n_threshold   = MP_SIZE_T_MAX;
-mp_size_t  redc_2_to_redc_n_threshold   = MP_SIZE_T_MAX;
+mp_size_t  mullow_basecase_threshold    = MP_SIZE_T_MAX;
+mp_size_t  mullow_dc_threshold          = MP_SIZE_T_MAX;
+mp_size_t  mullow_mul_n_threshold       = MP_SIZE_T_MAX;
+mp_size_t  div_sb_preinv_threshold      = MP_SIZE_T_MAX;
+mp_size_t  div_dc_threshold             = MP_SIZE_T_MAX;
+mp_size_t  powm_threshold               = MP_SIZE_T_MAX;
 mp_size_t  matrix22_strassen_threshold  = MP_SIZE_T_MAX;
 mp_size_t  hgcd_threshold               = MP_SIZE_T_MAX;
-mp_size_t  hgcd_appr_threshold          = MP_SIZE_T_MAX;
-mp_size_t  hgcd_reduce_threshold        = MP_SIZE_T_MAX;
+mp_size_t  gcd_accel_threshold          = MP_SIZE_T_MAX;
 mp_size_t  gcd_dc_threshold             = MP_SIZE_T_MAX;
 mp_size_t  gcdext_dc_threshold          = MP_SIZE_T_MAX;
-int	   div_qr_1n_pi1_method		= 0;
-mp_size_t  div_qr_1_norm_threshold      = MP_SIZE_T_MAX;
-mp_size_t  div_qr_1_unnorm_threshold    = MP_SIZE_T_MAX;
 mp_size_t  divrem_1_norm_threshold      = MP_SIZE_T_MAX;
 mp_size_t  divrem_1_unnorm_threshold    = MP_SIZE_T_MAX;
 mp_size_t  mod_1_norm_threshold         = MP_SIZE_T_MAX;
 mp_size_t  mod_1_unnorm_threshold       = MP_SIZE_T_MAX;
-int	   mod_1_1p_method		= 0;
-mp_size_t  mod_1n_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
-mp_size_t  mod_1u_to_mod_1_1_threshold  = MP_SIZE_T_MAX;
-mp_size_t  mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
-mp_size_t  mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX;
-mp_size_t  preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX;
+mp_size_t  mod_1_1_threshold            = MP_SIZE_T_MAX;
+mp_size_t  mod_1_2_threshold            = MP_SIZE_T_MAX;
+mp_size_t  mod_1_3_threshold            = MP_SIZE_T_MAX;
+mp_size_t  mod_1_4_threshold            = MP_SIZE_T_MAX;
 mp_size_t  divrem_2_threshold           = MP_SIZE_T_MAX;
 mp_size_t  get_str_dc_threshold         = MP_SIZE_T_MAX;
 mp_size_t  get_str_precompute_threshold = MP_SIZE_T_MAX;
 mp_size_t  set_str_dc_threshold         = MP_SIZE_T_MAX;
 mp_size_t  set_str_precompute_threshold = MP_SIZE_T_MAX;
-mp_size_t  fac_odd_threshold            = 0;
-mp_size_t  fac_dsc_threshold            = FAC_DSC_THRESHOLD_LIMIT;
 
 mp_size_t  fft_modf_sqr_threshold = MP_SIZE_T_MAX;
 mp_size_t  fft_modf_mul_threshold = MP_SIZE_T_MAX;
@@ -236,8 +190,7 @@ struct param_t {
   const char        *name;
   speed_function_t  function;
   speed_function_t  function2;
-  double            step_factor;    /* how much to step relatively */
-  int               step;           /* how much to step absolutely */
+  double            step_factor;    /* how much to step sizes (rounded down) */
   double            function_fudge; /* multiplier for "function" speeds */
   int               stop_since_change;
   double            stop_factor;
@@ -263,9 +216,6 @@ struct param_t {
 #ifndef HAVE_NATIVE_mpn_divexact_1
 #define HAVE_NATIVE_mpn_divexact_1 0
 #endif
-#ifndef HAVE_NATIVE_mpn_div_qr_1n_pi1
-#define HAVE_NATIVE_mpn_div_qr_1n_pi1 0
-#endif
 #ifndef HAVE_NATIVE_mpn_divrem_1
 #define HAVE_NATIVE_mpn_divrem_1 0
 #endif
@@ -275,9 +225,6 @@ struct param_t {
 #ifndef HAVE_NATIVE_mpn_mod_1
 #define HAVE_NATIVE_mpn_mod_1 0
 #endif
-#ifndef HAVE_NATIVE_mpn_mod_1_1p
-#define HAVE_NATIVE_mpn_mod_1_1p 0
-#endif
 #ifndef HAVE_NATIVE_mpn_modexact_1_odd
 #define HAVE_NATIVE_mpn_modexact_1_odd 0
 #endif
@@ -383,13 +330,12 @@ analyze_dat (int final)
 }
 
 
-/* Measuring for recompiled mpn/generic/div_qr_1.c,
- * mpn/generic/divrem_1.c, mpn/generic/mod_1.c and mpz/fac_ui.c */
+/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */
 
-mp_limb_t mpn_div_qr_1_tune (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
-void mpz_fac_ui_tune (mpz_ptr, unsigned long);
+mp_limb_t mpn_divrem_1_tune
+  __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_tune
+   __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
 
 double
 speed_mpn_mod_1_tune (struct speed_params *s)
@@ -401,16 +347,7 @@ speed_mpn_divrem_1_tune (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
 }
-double
-speed_mpz_fac_ui_tune (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
-}
-double
-speed_mpn_div_qr_1_tune (struct speed_params *s)
-{
-  SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1_tune);
-}
+
 
 double
 tuneup_measure (speed_function_t fun,
@@ -453,7 +390,7 @@ tuneup_measure (speed_function_t fun,
 }
 
 
-#define PRINT_WIDTH  31
+#define PRINT_WIDTH  28
 
 void
 print_define_start (const char *name)
@@ -477,7 +414,6 @@ print_define_end_remark (const char *name, mp_size_t value, const char *remark)
   if (remark != NULL)
     printf ("  /* %s */", remark);
   printf ("\n");
-  fflush (stdout);
 }
 
 void
@@ -519,7 +455,6 @@ one (mp_size_t *threshold, struct param_t *param)
   DEFAULT (param->function_fudge, 1.0);
   DEFAULT (param->function2, param->function);
   DEFAULT (param->step_factor, 0.01);  /* small steps by default */
-  DEFAULT (param->step, 1);            /* small steps by default */
   DEFAULT (param->stop_since_change, 80);
   DEFAULT (param->stop_factor, 1.2);
   DEFAULT (param->min_size, 10);
@@ -575,10 +510,21 @@ one (mp_size_t *threshold, struct param_t *param)
 
   for (s.size = param->min_size;
        s.size < param->max_size;
-       s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step))
+       s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), 1))
     {
       double   ti, tiplus1, d;
 
+      /* If there's a size limit and it's reached then it should still
+         be sensible to analyze the data since we want the threshold put
+         either at or near the limit.  */
+      if (s.size >= param->max_size)
+        {
+          if (option_trace)
+            printf ("Reached maximum size (%ld) without otherwise stopping\n",
+                    (long) param->max_size);
+          break;
+        }
+
       /*
         FIXME: check minimum size requirements are met, possibly by just
         checking for the -1 returns from the speed functions.
@@ -638,7 +584,7 @@ one (mp_size_t *threshold, struct param_t *param)
         }
 
       /* Stop if the threshold implied hasn't changed in a certain
-         number of measurements.  (It's this condition that usually
+         number of measurements.  (It's this condition that ususally
          stops the loop.) */
       if (thresh_idx != new_thresh_idx)
         since_thresh_change = 0, thresh_idx = new_thresh_idx;
@@ -713,7 +659,6 @@ struct fft_param_t {
   mp_size_t         first_size;
   mp_size_t         max_size;
   speed_function_t  function;
-  speed_function_t  mul_modf_function;
   speed_function_t  mul_function;
   mp_size_t         sqr;
 };
@@ -728,7 +673,7 @@ fft_step_size (int k)
 {
   mp_size_t  step;
 
-  step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS;
+  step = MAX ((mp_size_t) 1 << (k-1), BITS_PER_MP_LIMB) / BITS_PER_MP_LIMB;
   step *= (mp_size_t) 1 << k;
 
   if (step <= 0)
@@ -754,435 +699,126 @@ fft_next_size (mp_size_t pl, int k)
   return pl;
 }
 
-#define NMAX_DEFAULT 1000000
-#define MAX_REPS 25
-#define MIN_REPS 5
-
-static inline size_t
-mpn_mul_fft_lcm (size_t a, unsigned int k)
-{
-  unsigned int l = k;
-
-  while (a % 2 == 0 && k > 0)
-    {
-      a >>= 1;
-      k--;
-    }
-  return a << l;
-}
-
-mp_size_t
-fftfill (mp_size_t pl, int k, int sqr)
-{
-  mp_size_t maxLK;
-  mp_bitcnt_t N, Nprime, nprime, M;
-
-  N = pl * GMP_NUMB_BITS;
-  M = N >> k;
-
-  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k);
-
-  Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
-  nprime = Nprime / GMP_NUMB_BITS;
-  if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
-    {
-      size_t K2;
-      for (;;)
-	{
-	  K2 = 1L << mpn_fft_best_k (nprime, sqr);
-	  if ((nprime & (K2 - 1)) == 0)
-	    break;
-	  nprime = (nprime + K2 - 1) & -K2;
-	  Nprime = nprime * GMP_LIMB_BITS;
-	}
-    }
-  ASSERT_ALWAYS (nprime < pl);
-
-  return Nprime;
-}
-
-static int
-compare_double (const void *ap, const void *bp)
-{
-  double a = * (const double *) ap;
-  double b = * (const double *) bp;
-
-  if (a < b)
-    return -1;
-  else if (a > b)
-    return 1;
-  else
-    return 0;
-}
-
-double
-median (double *times, int n)
-{
-  qsort (times, n, sizeof (double), compare_double);
-  return times[n/2];
-}
-
-#define FFT_CACHE_SIZE 25
-typedef struct fft_cache
-{
-  mp_size_t n;
-  double time;
-} fft_cache_t;
-
-fft_cache_t fft_cache[FFT_CACHE_SIZE];
-
-double
-cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k,
-		int n_measurements)
-{
-  int i;
-  double t, ttab[MAX_REPS];
-
-  if (fft_cache[k].n == n)
-    return fft_cache[k].time;
-
-  for (i = 0; i < n_measurements; i++)
-    {
-      speed_starttime ();
-      mpn_mul_fft (rp, n, ap, n, bp, n, k);
-      ttab[i] = speed_endtime ();
-    }
-
-  t = median (ttab, n_measurements);
-  fft_cache[k].n = n;
-  fft_cache[k].time = t;
-  return t;
-}
-
-#define INSERT_FFTTAB(idx, nval, kval)					\
-  do {									\
-    fft_tab[idx].n = nval;						\
-    fft_tab[idx].k = kval;						\
-    fft_tab[idx+1].n = -1;	/* sentinel */				\
-    fft_tab[idx+1].k = -1;						\
-  } while (0)
-
-int
-fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
-{
-  mp_size_t n, n1, prev_n1;
-  int k, best_k, last_best_k, kmax;
-  int eff, prev_eff;
-  double t0, t1;
-  int n_measurements;
-  mp_limb_t *ap, *bp, *rp;
-  mp_size_t alloc;
-  char *linepref;
-  struct fft_table_nk *fft_tab;
-
-  fft_tab = mpn_fft_table3[p->sqr];
-
-  for (k = 0; k < FFT_CACHE_SIZE; k++)
-    fft_cache[k].n = 0;
-
-  if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
-    {
-      nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD);
-    }
-
-  if (print)
-    printf ("#define %s%*s", p->table_name, 38, "");
-
-  if (idx == 0)
-    {
-      INSERT_FFTTAB (0, nmin, initial_k);
-
-      if (print)
-	{
-	  printf ("\\\n  { ");
-	  printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
-	  linepref = "    ";
-	}
-
-      idx = 1;
-    }
-
-  ap = malloc (sizeof (mp_limb_t));
-  if (p->sqr)
-    bp = ap;
-  else
-    bp = malloc (sizeof (mp_limb_t));
-  rp = malloc (sizeof (mp_limb_t));
-  alloc = 1;
-
-  /* Round n to comply to initial k value */
-  n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k);
-
-  n_measurements = (18 - initial_k) | 1;
-  n_measurements = MAX (n_measurements, MIN_REPS);
-  n_measurements = MIN (n_measurements, MAX_REPS);
-
-  last_best_k = initial_k;
-  best_k = initial_k;
-
-  while (n < nmax)
-    {
-      int start_k, end_k;
-
-      /* Assume the current best k is best until we hit its next FFT step.  */
-      t0 = 99999;
-
-      prev_n1 = n + 1;
-
-      start_k = MAX (4, best_k - 4);
-      end_k = MIN (24, best_k + 4);
-      for (k = start_k; k <= end_k; k++)
-	{
-          n1 = mpn_fft_next_size (prev_n1, k);
-
-	  eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr);
-
-	  if (eff < 70)		/* avoid measuring too slow fft:s */
-	    continue;
-
-	  if (n1 > alloc)
-	    {
-	      alloc = n1;
-	      if (p->sqr)
-		{
-		  ap = realloc (ap, sizeof (mp_limb_t));
-		  rp = realloc (rp, sizeof (mp_limb_t));
-		  ap = bp = realloc (ap, alloc * sizeof (mp_limb_t));
-		  mpn_random (ap, alloc);
-		  rp = realloc (rp, alloc * sizeof (mp_limb_t));
-		}
-	      else
-		{
-		  ap = realloc (ap, sizeof (mp_limb_t));
-		  bp = realloc (bp, sizeof (mp_limb_t));
-		  rp = realloc (rp, sizeof (mp_limb_t));
-		  ap = realloc (ap, alloc * sizeof (mp_limb_t));
-		  mpn_random (ap, alloc);
-		  bp = realloc (bp, alloc * sizeof (mp_limb_t));
-		  mpn_random (bp, alloc);
-		  rp = realloc (rp, alloc * sizeof (mp_limb_t));
-		}
-	    }
-
-	  t1 = cached_measure (rp, ap, bp, n1, k, n_measurements);
-
-	  if (t1 * n_measurements > 0.3)
-	    n_measurements -= 2;
-	  n_measurements = MAX (n_measurements, MIN_REPS);
-
-	  if (t1 < t0)
-	    {
-	      best_k = k;
-	      t0 = t1;
-	    }
-	}
-
-      n1 = mpn_fft_next_size (prev_n1, best_k);
-
-      if (last_best_k != best_k)
-	{
-	  ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1);
-
-	  if (idx >= FFT_TABLE3_SIZE)
-	    {
-	      printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
-	      abort ();
-	    }
-	  INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
-
-	  if (print)
-	    {
-	      printf (", ");
-	      if (idx % 4 == 0)
-		printf ("\\\n    ");
-	      printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
-	    }
-
-	  if (option_trace >= 2)
-	    {
-	      printf ("{%lu,%u}\n", prev_n1, best_k);
-	      fflush (stdout);
-	    }
-
-	  last_best_k = best_k;
-	  idx++;
-	}
-
-      for (;;)
-	{
-	  prev_n1 = n1;
-	  prev_eff = fftfill (prev_n1, best_k, p->sqr);
-	  n1 = mpn_fft_next_size (prev_n1 + 1, best_k);
-	  eff = fftfill (n1, best_k, p->sqr);
-
-	  if (eff != prev_eff)
-	    break;
-	}
-
-      n = prev_n1;
-    }
-
-  kmax = sizeof (mp_size_t) * 4;	/* GMP_MP_SIZE_T_BITS / 2 */
-  kmax = MIN (kmax, 25-1);
-  for (k = last_best_k + 1; k <= kmax; k++)
-    {
-      if (idx >= FFT_TABLE3_SIZE)
-	{
-	  printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
-	  abort ();
-	}
-      INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
-
-      if (print)
-	{
-	  printf (", ");
-	  if (idx % 4 == 0)
-	    printf ("\\\n    ");
-	  printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
-	}
-
-      idx++;
-    }
-
-  if (print)
-    printf (" }\n");
-
-  free (ap);
-  if (! p->sqr)
-    free (bp);
-  free (rp);
-
-  return idx;
-}
-
 void
 fft (struct fft_param_t *p)
 {
   mp_size_t  size;
-  int        k, idx, initial_k;
-
-  /*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/
+  int        i, k;
 
-#if 1
-  {
-    /* Use plain one() mechanism, for some reasonable initial values of k.  The
-       advantage is that we don't depend on mpn_fft_table3, which can therefore
-       leave it completely uninitialized.  */
+  for (i = 0; i < numberof (mpn_fft_table[p->sqr]); i++)
+    mpn_fft_table[p->sqr][i] = MP_SIZE_T_MAX;
 
-    static struct param_t param;
-    mp_size_t thres, best_thres;
-    int best_k;
-    char buf[20];
+  *p->p_threshold = MP_SIZE_T_MAX;
+  *p->p_modf_threshold = MP_SIZE_T_MAX;
 
-    best_thres = MP_SIZE_T_MAX;
-    best_k = -1;
+  option_trace = MAX (option_trace, option_fft_trace);
 
-    for (k = 5; k <= 7; k++)
-      {
-	param.name = p->modf_threshold_name;
-	param.min_size = 100;
-	param.max_size = 2000;
-	param.function  = p->mul_function;
-	param.step_factor = 0.0;
-	param.step = 4;
-	param.function2 = p->mul_modf_function;
-	param.noprint = 1;
-	s.r = k;
-	one (&thres, &param);
-	if (thres < best_thres)
-	  {
-	    best_thres = thres;
-	    best_k = k;
-	  }
-      }
+  printf ("#define %s  {", p->table_name);
+  if (option_trace >= 2)
+    printf ("\n");
 
-    *(p->p_modf_threshold) = best_thres;
-    sprintf (buf, "k = %d", best_k);
-    print_define_remark (p->modf_threshold_name, best_thres, buf);
-    initial_k = best_k;
-  }
-#else
+  k = FFT_FIRST_K;
   size = p->first_size;
   for (;;)
     {
-      double  tk, tm;
+      double  tk, tk1;
 
-      size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr));
-      k = mpn_fft_best_k (size, p->sqr);
+      size = fft_next_size (size+1, k+1);
 
       if (size >= p->max_size)
         break;
+      if (k >= FFT_FIRST_K + numberof (mpn_fft_table[p->sqr]))
+        break;
 
-      s.size = size + fft_step_size (k) / 2;
+      /* compare k to k+1 in the middle of the current k+1 step */
+      s.size = size + fft_step_size (k+1) / 2;
       s.r = k;
-      tk = tuneup_measure (p->mul_modf_function, NULL, &s);
+      tk = tuneup_measure (p->function, NULL, &s);
       if (tk == -1.0)
         abort ();
 
-      tm = tuneup_measure (p->mul_function, NULL, &s);
-      if (tm == -1.0)
+      s.r = k+1;
+      tk1 = tuneup_measure (p->function, NULL, &s);
+      if (tk1 == -1.0)
         abort ();
 
       if (option_trace >= 2)
-        printf ("at %ld   size=%ld  k=%d  %.9f   size=%ld modf %.9f\n",
-                (long) size,
-                (long) size + fft_step_size (k) / 2, k, tk,
-                (long) s.size, tm);
+        printf ("at %ld   size=%ld  k=%d  %.9f   k=%d %.9f\n",
+                (long) size, (long) s.size, k, tk, k+1, tk1);
 
-      if (tk < tm)
+      /* declare the k+1 threshold as soon as it's faster at its midpoint */
+      if (tk1 < tk)
         {
-	  *p->p_modf_threshold = s.size;
-	  print_define (p->modf_threshold_name, *p->p_modf_threshold);
-	  break;
+          mpn_fft_table[p->sqr][k-FFT_FIRST_K] = s.size;
+          printf (" %ld,", (long) s.size);
+          if (option_trace >= 2) printf ("\n");
+          k++;
         }
     }
-  initial_k = ?;
-#endif
 
-  /*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/
+  mpn_fft_table[p->sqr][k-FFT_FIRST_K] = 0;
+  printf (" 0 }\n");
 
-  idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1);
-  printf ("#define %s_SIZE %d\n", p->table_name, idx);
 
-  /*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/
+  size = p->first_size;
 
-  size = 2 * *p->p_modf_threshold;	/* OK? */
+  /* Declare an FFT faster than a plain toom4 etc multiplication found as
+     soon as one faster measurement obtained.  A multiplication in the
+     middle of the FFT step is tested.  */
   for (;;)
     {
+      int     modf = (*p->p_modf_threshold == MP_SIZE_T_MAX);
       double  tk, tm;
-      mp_size_t mulmod_size, mul_size;;
+
+      /* k=7 should be the first FFT which can beat toom4 on a full
+         multiply, so jump to that threshold and save some probing after the
+         modf threshold is found.  */
+      if (!modf && size < mpn_fft_table[p->sqr][2])
+        {
+          size = mpn_fft_table[p->sqr][2];
+          if (option_trace >= 2)
+            printf ("jump to size=%ld\n", (long) size);
+        }
+
+      size = fft_next_size (size+1, mpn_fft_best_k (size, p->sqr));
+      k = mpn_fft_best_k (size, p->sqr);
 
       if (size >= p->max_size)
         break;
 
-      mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2;
-      mul_size = (size + mulmod_size) / 2;	/* middle of step */
-
-      s.size = mulmod_size;
+      s.size = size + fft_step_size (k) / 2;
+      s.r = k;
       tk = tuneup_measure (p->function, NULL, &s);
       if (tk == -1.0)
         abort ();
 
-      s.size = mul_size;
+      if (!modf)  s.size /= 2;
       tm = tuneup_measure (p->mul_function, NULL, &s);
       if (tm == -1.0)
         abort ();
 
       if (option_trace >= 2)
-        printf ("at %ld   size=%ld  %.9f   size=%ld mul %.9f\n",
+        printf ("at %ld   size=%ld   k=%d  %.9f   size=%ld %s mul %.9f\n",
                 (long) size,
-                (long) mulmod_size, tk,
-                (long) mul_size, tm);
-
-      size = mulmod_size;
+                (long) size + fft_step_size (k) / 2, k, tk,
+                (long) s.size, modf ? "modf" : "full", tm);
 
       if (tk < tm)
         {
-	  *p->p_threshold = s.size;
-	  print_define (p->threshold_name, *p->p_threshold);
-	  break;
+          if (modf)
+            {
+              *p->p_modf_threshold = s.size;
+              print_define (p->modf_threshold_name, *p->p_modf_threshold);
+            }
+          else
+            {
+              *p->p_threshold = s.size;
+              print_define (p->threshold_name,      *p->p_threshold);
+              break;
+            }
         }
     }
+
 }
 
 
@@ -1190,232 +826,58 @@ fft (struct fft_param_t *p)
 /* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
    giving wrong results.  */
 void
-tune_mul_n (void)
+tune_mul (void)
 {
   static struct param_t  param;
-  mp_size_t next_toom_start;
-  int something_changed;
 
   param.function = speed_mpn_mul_n;
 
-  param.name = "MUL_TOOM22_THRESHOLD";
-  param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE);
-  param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
-  one (&mul_toom22_threshold, &param);
+  param.name = "MUL_KARATSUBA_THRESHOLD";
+  param.min_size = MAX (4, MPN_KARA_MUL_N_MINSIZE);
+  param.max_size = MUL_KARATSUBA_THRESHOLD_LIMIT-1;
+  one (&mul_karatsuba_threshold, &param);
 
-  param.noprint = 1;
-
-  /* Threshold sequence loop.  Disable functions that would be used in a very
-     narrow range, re-measuring things when that happens.  */
-  something_changed = 1;
-  while (something_changed)
-    {
-      something_changed = 0;
-
-	next_toom_start = mul_toom22_threshold;
-
-	if (mul_toom33_threshold != 0)
-	  {
-	    param.name = "MUL_TOOM33_THRESHOLD";
-	    param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
-	    param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
-	    one (&mul_toom33_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= mul_toom33_threshold)
-	      {
-		mul_toom33_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-
-	next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
-
-	if (mul_toom44_threshold != 0)
-	  {
-	    param.name = "MUL_TOOM44_THRESHOLD";
-	    param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
-	    param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
-	    one (&mul_toom44_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= mul_toom44_threshold)
-	      {
-		mul_toom44_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-
-	next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
-
-	if (mul_toom6h_threshold != 0)
-	  {
-	    param.name = "MUL_TOOM6H_THRESHOLD";
-	    param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
-	    param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
-	    one (&mul_toom6h_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= mul_toom6h_threshold)
-	      {
-		mul_toom6h_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-
-	next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
-
-	if (mul_toom8h_threshold != 0)
-	  {
-	    param.name = "MUL_TOOM8H_THRESHOLD";
-	    param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
-	    param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
-	    one (&mul_toom8h_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= mul_toom8h_threshold)
-	      {
-		mul_toom8h_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-    }
+  param.name = "MUL_TOOM3_THRESHOLD";
+  param.min_size = MAX (mul_karatsuba_threshold, MPN_TOOM3_MUL_N_MINSIZE);
+  param.max_size = MUL_TOOM3_THRESHOLD_LIMIT-1;
+  one (&mul_toom3_threshold, &param);
 
-    print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
-    print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
-    print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
-    print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
+  param.name = "MUL_TOOM44_THRESHOLD";
+  param.min_size = MAX (mul_toom3_threshold, MPN_TOOM44_MUL_N_MINSIZE);
+  param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
+  one (&mul_toom44_threshold, &param);
 
   /* disabled until tuned */
   MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
 }
 
-void
-tune_mul (void)
-{
-  static struct param_t  param;
-  mp_size_t thres;
-
-  param.noprint = 1;
-
-  param.function = speed_mpn_toom32_for_toom43_mul;
-  param.function2 = speed_mpn_toom43_for_toom32_mul;
-  param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
-  param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
-  one (&thres, &param);
-  mul_toom32_to_toom43_threshold = thres * 17 / 24;
-  print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
-
-  param.function = speed_mpn_toom32_for_toom53_mul;
-  param.function2 = speed_mpn_toom53_for_toom32_mul;
-  param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
-  param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
-  one (&thres, &param);
-  mul_toom32_to_toom53_threshold = thres * 19 / 30;
-  print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
-
-  param.function = speed_mpn_toom42_for_toom53_mul;
-  param.function2 = speed_mpn_toom53_for_toom42_mul;
-  param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
-  param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
-  one (&thres, &param);
-  mul_toom42_to_toom53_threshold = thres * 11 / 20;
-  print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
-
-  param.function = speed_mpn_toom42_mul;
-  param.function2 = speed_mpn_toom63_mul;
-  param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
-  param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
-  one (&thres, &param);
-  mul_toom42_to_toom63_threshold = thres / 2;
-  print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
-
-  /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
-  param.function = speed_mpn_toom43_for_toom54_mul;
-  param.function2 = speed_mpn_toom54_for_toom43_mul;
-  param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
-  param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
-  one (&thres, &param);
-  mul_toom43_to_toom54_threshold = thres * 5 / 6;
-  print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
-}
-
 
+/* This was written by the tuneup challenged tege.  Kevin, please delete
+   this comment when you've reviewed/rewritten this.  :-) */
 void
-tune_mullo (void)
+tune_mullow (void)
 {
   static struct param_t  param;
 
-  param.function = speed_mpn_mullo_n;
+  param.function = speed_mpn_mullow_n;
 
-  param.name = "MULLO_BASECASE_THRESHOLD";
-  param.min_size = 1;
+  param.name = "MULLOW_BASECASE_THRESHOLD";
+  param.min_size = 3;
   param.min_is_always = 1;
-  param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1;
-  param.stop_factor = 1.5;
-  param.noprint = 1;
-  one (&mullo_basecase_threshold, &param);
-
-  param.name = "MULLO_DC_THRESHOLD";
-  param.min_size = 8;
-  param.min_is_always = 0;
-  param.max_size = 1000;
-  one (&mullo_dc_threshold, &param);
+  param.max_size = MULLOW_BASECASE_THRESHOLD_LIMIT-1;
+  one (&mullow_basecase_threshold, &param);
 
-  if (mullo_basecase_threshold >= mullo_dc_threshold)
-    {
-      print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold);
-      print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase");
-    }
-  else
-    {
-      print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold);
-      print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold);
-    }
-
-#if WANT_FFT
-  param.name = "MULLO_MUL_N_THRESHOLD";
-  param.min_size = mullo_dc_threshold;
-  param.max_size = 2 * mul_fft_threshold;
-  param.noprint = 0;
-  param.step_factor = 0.03;
-  one (&mullo_mul_n_threshold, &param);
-#else
-  print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX,
-                           "without FFT use mullo forever");
-#endif
-}
-
-void
-tune_mulmid (void)
-{
-  static struct param_t  param;
+  param.min_is_always = 0;	/* ??? */
 
-  param.name = "MULMID_TOOM42_THRESHOLD";
-  param.function = speed_mpn_mulmid_n;
-  param.min_size = 4;
-  param.max_size = 100;
-  one (&mulmid_toom42_threshold, &param);
-}
-
-void
-tune_mulmod_bnm1 (void)
-{
-  static struct param_t  param;
-
-  param.name = "MULMOD_BNM1_THRESHOLD";
-  param.function = speed_mpn_mulmod_bnm1;
-  param.min_size = 4;
-  param.max_size = 100;
-  one (&mulmod_bnm1_threshold, &param);
-}
-
-void
-tune_sqrmod_bnm1 (void)
-{
-  static struct param_t  param;
+  param.name = "MULLOW_DC_THRESHOLD";
+  param.min_size = mul_karatsuba_threshold;
+  param.max_size = 1000;
+  one (&mullow_dc_threshold, &param);
 
-  param.name = "SQRMOD_BNM1_THRESHOLD";
-  param.function = speed_mpn_sqrmod_bnm1;
-  param.min_size = 4;
-  param.max_size = 100;
-  one (&sqrmod_bnm1_threshold, &param);
+  param.name = "MULLOW_MUL_N_THRESHOLD";
+  param.min_size = mullow_dc_threshold;
+  param.max_size = 2000;
+  one (&mullow_mul_n_threshold, &param);
 }
 
 
@@ -1438,345 +900,130 @@ tune_sqr (void)
     {
       static struct param_t  param;
       param.name = "SQR_BASECASE_THRESHOLD";
-      param.function = speed_mpn_sqr;
+      param.function = speed_mpn_sqr_n;
       param.min_size = 3;
       param.min_is_always = 1;
-      param.max_size = TUNE_SQR_TOOM2_MAX;
+      param.max_size = TUNE_SQR_KARATSUBA_MAX;
       param.noprint = 1;
       one (&sqr_basecase_threshold, &param);
     }
 
   {
     static struct param_t  param;
-    param.name = "SQR_TOOM2_THRESHOLD";
-    param.function = speed_mpn_sqr;
-    param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE);
-    param.max_size = TUNE_SQR_TOOM2_MAX;
+    param.name = "SQR_KARATSUBA_THRESHOLD";
+    param.function = speed_mpn_sqr_n;
+    param.min_size = MAX (4, MPN_KARA_SQR_N_MINSIZE);
+    param.max_size = TUNE_SQR_KARATSUBA_MAX;
     param.noprint = 1;
-    one (&sqr_toom2_threshold, &param);
+    one (&sqr_karatsuba_threshold, &param);
 
     if (! HAVE_NATIVE_mpn_sqr_basecase
-        && sqr_toom2_threshold < sqr_basecase_threshold)
+        && sqr_karatsuba_threshold < sqr_basecase_threshold)
       {
         /* Karatsuba becomes faster than mul_basecase before
            sqr_basecase does.  Arrange for the expression
-           "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which
-           selects mpn_sqr_basecase in mpn_sqr to be false, by setting
-           SQR_TOOM2_THRESHOLD to zero, making
-           SQR_BASECASE_THRESHOLD the toom2 threshold.  */
+           "BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))" which
+           selects mpn_sqr_basecase in mpn_sqr_n to be false, by setting
+           SQR_KARATSUBA_THRESHOLD to zero, making
+           SQR_BASECASE_THRESHOLD the karatsuba threshold.  */
 
-        sqr_basecase_threshold = SQR_TOOM2_THRESHOLD;
-        SQR_TOOM2_THRESHOLD = 0;
+        sqr_basecase_threshold = SQR_KARATSUBA_THRESHOLD;
+        SQR_KARATSUBA_THRESHOLD = 0;
 
         print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold,
-                             "toom2");
-        print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD,
+                             "karatsuba");
+        print_define_remark ("SQR_KARATSUBA_THRESHOLD",SQR_KARATSUBA_THRESHOLD,
                              "never sqr_basecase");
       }
     else
       {
         if (! HAVE_NATIVE_mpn_sqr_basecase)
           print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold);
-        print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD);
+        print_define ("SQR_KARATSUBA_THRESHOLD", SQR_KARATSUBA_THRESHOLD);
       }
   }
 
   {
     static struct param_t  param;
-    mp_size_t next_toom_start;
-    int something_changed;
+    mp_size_t toom3_start = MAX (sqr_karatsuba_threshold, sqr_basecase_threshold);
 
-    param.function = speed_mpn_sqr;
-    param.noprint = 1;
+    param.function = speed_mpn_sqr_n;
 
-  /* Threshold sequence loop.  Disable functions that would be used in a very
-     narrow range, re-measuring things when that happens.  */
-    something_changed = 1;
-    while (something_changed)
-      {
-	something_changed = 0;
-
-	next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
-
-	sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
-	param.name = "SQR_TOOM3_THRESHOLD";
-	param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
-	param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
-	one (&sqr_toom3_threshold, &param);
-
-	next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
-
-	if (sqr_toom4_threshold != 0)
-	  {
-	    param.name = "SQR_TOOM4_THRESHOLD";
-	    sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
-	    param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
-	    param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
-	    one (&sqr_toom4_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= sqr_toom4_threshold)
-	      {
-		sqr_toom4_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-
-	next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
-
-	if (sqr_toom6_threshold != 0)
-	  {
-	    param.name = "SQR_TOOM6_THRESHOLD";
-	    sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
-	    param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
-	    param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
-	    one (&sqr_toom6_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= sqr_toom6_threshold)
-	      {
-		sqr_toom6_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-
-	next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
-
-	if (sqr_toom8_threshold != 0)
-	  {
-	    param.name = "SQR_TOOM8_THRESHOLD";
-	    sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
-	    param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
-	    param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
-	    one (&sqr_toom8_threshold, &param);
-
-	    if (next_toom_start * 1.05 >= sqr_toom8_threshold)
-	      {
-		sqr_toom8_threshold = 0;
-		something_changed = 1;
-	      }
-	  }
-      }
+    param.name = "SQR_TOOM3_THRESHOLD";
+    param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_N_MINSIZE);
+    param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
+    one (&sqr_toom3_threshold, &param);
 
-    print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
-    print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
-    print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
-    print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
+    param.name = "SQR_TOOM4_THRESHOLD";
+    param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_N_MINSIZE);
+    param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
+    one (&sqr_toom4_threshold, &param);
   }
 }
 
 
 void
-tune_dc_div (void)
+tune_sb_preinv (void)
 {
-  s.r = 0;		/* clear to make speed function do 2n/n */
-  {
-    static struct param_t  param;
-    param.name = "DC_DIV_QR_THRESHOLD";
-    param.function = speed_mpn_sbpi1_div_qr;
-    param.function2 = speed_mpn_dcpi1_div_qr;
-    param.min_size = 6;
-    one (&dc_div_qr_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "DC_DIVAPPR_Q_THRESHOLD";
-    param.function = speed_mpn_sbpi1_divappr_q;
-    param.function2 = speed_mpn_dcpi1_divappr_q;
-    param.min_size = 6;
-    one (&dc_divappr_q_threshold, &param);
-  }
-}
+  static struct param_t  param;
 
-static double
-speed_mpn_sbordcpi1_div_qr (struct speed_params *s)
-{
-  if (s->size < DC_DIV_QR_THRESHOLD)
-    return speed_mpn_sbpi1_div_qr (s);
-  else
-    return speed_mpn_dcpi1_div_qr (s);
-}
+  if (GMP_NAIL_BITS != 0)
+    {
+      DIV_SB_PREINV_THRESHOLD = MP_SIZE_T_MAX;
+      print_define_remark ("DIV_SB_PREINV_THRESHOLD", MP_SIZE_T_MAX,
+                           "no preinv with nails");
+      return;
+    }
 
-void
-tune_mu_div (void)
-{
-  s.r = 0;		/* clear to make speed function do 2n/n */
-  {
-    static struct param_t  param;
-    param.name = "MU_DIV_QR_THRESHOLD";
-    param.function = speed_mpn_dcpi1_div_qr;
-    param.function2 = speed_mpn_mu_div_qr;
-    param.min_size = mul_toom22_threshold;
-    param.max_size = 5000;
-    param.step_factor = 0.02;
-    one (&mu_div_qr_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "MU_DIVAPPR_Q_THRESHOLD";
-    param.function = speed_mpn_dcpi1_divappr_q;
-    param.function2 = speed_mpn_mu_divappr_q;
-    param.min_size = mul_toom22_threshold;
-    param.max_size = 5000;
-    param.step_factor = 0.02;
-    one (&mu_divappr_q_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "MUPI_DIV_QR_THRESHOLD";
-    param.function = speed_mpn_sbordcpi1_div_qr;
-    param.function2 = speed_mpn_mupi_div_qr;
-    param.min_size = 6;
-    param.min_is_always = 1;
-    param.max_size = 1000;
-    param.step_factor = 0.02;
-    one (&mupi_div_qr_threshold, &param);
-  }
-}
+  if (UDIV_PREINV_ALWAYS)
+    {
+      print_define_remark ("DIV_SB_PREINV_THRESHOLD", 0L, "preinv always");
+      return;
+    }
 
-void
-tune_dc_bdiv (void)
-{
-  s.r = 0;		/* clear to make speed function do 2n/n*/
-  {
-    static struct param_t  param;
-    param.name = "DC_BDIV_QR_THRESHOLD";
-    param.function = speed_mpn_sbpi1_bdiv_qr;
-    param.function2 = speed_mpn_dcpi1_bdiv_qr;
-    param.min_size = 4;
-    one (&dc_bdiv_qr_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "DC_BDIV_Q_THRESHOLD";
-    param.function = speed_mpn_sbpi1_bdiv_q;
-    param.function2 = speed_mpn_dcpi1_bdiv_q;
-    param.min_size = 4;
-    one (&dc_bdiv_q_threshold, &param);
-  }
+  param.check_size = 256;
+  param.min_size = 3;
+  param.min_is_always = 1;
+  param.size_extra = 3;
+  param.stop_factor = 2.0;
+  param.name = "DIV_SB_PREINV_THRESHOLD";
+  param.function = speed_mpn_sb_divrem_m3;
+  one (&div_sb_preinv_threshold, &param);
 }
 
-void
-tune_mu_bdiv (void)
-{
-  s.r = 0;		/* clear to make speed function do 2n/n*/
-  {
-    static struct param_t  param;
-    param.name = "MU_BDIV_QR_THRESHOLD";
-    param.function = speed_mpn_dcpi1_bdiv_qr;
-    param.function2 = speed_mpn_mu_bdiv_qr;
-    param.min_size = mul_toom22_threshold;
-    param.max_size = 5000;
-    param.step_factor = 0.02;
-    one (&mu_bdiv_qr_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "MU_BDIV_Q_THRESHOLD";
-    param.function = speed_mpn_dcpi1_bdiv_q;
-    param.function2 = speed_mpn_mu_bdiv_q;
-    param.min_size = mul_toom22_threshold;
-    param.max_size = 5000;
-    param.step_factor = 0.02;
-    one (&mu_bdiv_q_threshold, &param);
-  }
-}
 
 void
-tune_invertappr (void)
+tune_dc (void)
 {
   static struct param_t  param;
-
-  param.function = speed_mpn_ni_invertappr;
-  param.name = "INV_MULMOD_BNM1_THRESHOLD";
-  param.min_size = 4;
-  one (&inv_mulmod_bnm1_threshold, &param);
-
-  param.function = speed_mpn_invertappr;
-  param.name = "INV_NEWTON_THRESHOLD";
-  param.min_size = 3;
-  one (&inv_newton_threshold, &param);
+  param.name = "DIV_DC_THRESHOLD";
+  param.function = speed_mpn_dc_tdiv_qr;
+  param.step_factor = 0.02;
+  one (&div_dc_threshold, &param);
 }
 
-void
-tune_invert (void)
-{
-  static struct param_t  param;
 
-  param.function = speed_mpn_invert;
-  param.name = "INV_APPR_THRESHOLD";
-  param.min_size = 3;
-  one (&inv_appr_threshold, &param);
-}
+/* This is an indirect determination, based on a comparison between redc and
+   mpz_mod.  A fudge factor of 1.04 is applied to redc, to represent
+   additional overheads it gets in mpz_powm.
+
+   stop_factor is 1.1 to hopefully help cray vector systems, where otherwise
+   currently it hits the 1000 limb limit with only a factor of about 1.18
+   (threshold should be around 650).  */
 
 void
-tune_binvert (void)
+tune_powm (void)
 {
   static struct param_t  param;
-
-  param.function = speed_mpn_binvert;
-  param.name = "BINV_NEWTON_THRESHOLD";
-  param.min_size = 8;		/* pointless with smaller operands */
-  one (&binv_newton_threshold, &param);
+  param.name = "POWM_THRESHOLD";
+  param.function = speed_mpn_redc_1;
+  param.function2 = speed_mpz_mod;
+  param.step_factor = 0.03;
+  param.stop_factor = 1.1;
+  param.function_fudge = 1.04;
+  one (&powm_threshold, &param);
 }
 
-void
-tune_redc (void)
-{
-#define TUNE_REDC_2_MAX 100
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
-
-#if WANT_REDC_2
-  {
-    static struct param_t  param;
-    param.name = "REDC_1_TO_REDC_2_THRESHOLD";
-    param.function = speed_mpn_redc_1;
-    param.function2 = speed_mpn_redc_2;
-    param.min_size = 1;
-    param.min_is_always = 1;
-    param.max_size = TUNE_REDC_2_MAX;
-    param.noprint = 1;
-    param.stop_factor = 1.5;
-    one (&redc_1_to_redc_2_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "REDC_2_TO_REDC_N_THRESHOLD";
-    param.function = speed_mpn_redc_2;
-    param.function2 = speed_mpn_redc_n;
-    param.min_size = 16;
-    param.noprint = 1;
-    one (&redc_2_to_redc_n_threshold, &param);
-  }
-  if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
-    {
-      redc_2_to_redc_n_threshold = 0;	/* disable redc_2 */
-
-      /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
-	 REDC_1_TO_REDC_2_THRESHOLD.  */
-      {
-	static struct param_t  param;
-	param.name = "REDC_1_TO_REDC_2_THRESHOLD";
-	param.function = speed_mpn_redc_1;
-	param.function2 = speed_mpn_redc_n;
-	param.min_size = 16;
-	param.noprint = 1;
-	one (&redc_1_to_redc_2_threshold, &param);
-      }
-    }
-  print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
-  print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
-#else
-  {
-    static struct param_t  param;
-    param.name = "REDC_1_TO_REDC_N_THRESHOLD";
-    param.function = speed_mpn_redc_1;
-    param.function2 = speed_mpn_redc_n;
-    param.min_size = 16;
-    one (&redc_1_to_redc_n_threshold, &param);
-  }
-#endif
-}
 
 void
 tune_matrix22_mul (void)
@@ -1799,30 +1046,17 @@ tune_hgcd (void)
   one (&hgcd_threshold, &param);
 }
 
+#if 0
 void
-tune_hgcd_appr (void)
-{
-  static struct param_t  param;
-  param.name = "HGCD_APPR_THRESHOLD";
-  param.function = speed_mpn_hgcd_appr;
-  /* We seem to get strange results for small sizes */
-  param.min_size = 50;
-  param.stop_since_change = 150;
-  one (&hgcd_appr_threshold, &param);
-}
-
-void
-tune_hgcd_reduce (void)
+tune_gcd_accel (void)
 {
   static struct param_t  param;
-  param.name = "HGCD_REDUCE_THRESHOLD";
-  param.function = speed_mpn_hgcd_reduce;
-  param.min_size = 30;
-  param.max_size = 7000;
-  param.step_factor = 0.04;
-  one (&hgcd_reduce_threshold, &param);
+  param.name = "GCD_ACCEL_THRESHOLD";
+  param.function = speed_mpn_gcd;
+  param.min_size = 1;
+  one (&gcd_accel_threshold, &param);
 }
-
+#endif
 void
 tune_gcd_dc (void)
 {
@@ -1847,139 +1081,6 @@ tune_gcdext_dc (void)
   one (&gcdext_dc_threshold, &param);
 }
 
-/* In tune_powm_sec we compute the table used by the win_size function.  The
-   cutoff points are in exponent bits, disregarding other operand sizes.  It is
-   not possible to use the one framework since it currently uses a granularity
-   of full limbs.
-*/
-
-/* This win_size replaces the variant in the powm code, allowing us to
-   control k in the k-ary algorithms.  */
-int winsize;
-int
-win_size (mp_bitcnt_t eb)
-{
-  return winsize;
-}
-
-void
-tune_powm_sec (void)
-{
-  mp_size_t n;
-  int k, i;
-  mp_size_t itch;
-  mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
-  const int n_max = 3000 / GMP_NUMB_BITS;
-  const int n_measurements = 5;
-  mp_ptr rp, bp, ep, mp, tp;
-  double ttab[n_measurements], tk, tkp1;
-  TMP_DECL;
-  TMP_MARK;
-
-  possible_nbits_cutoff = 0;
-
-  k = 1;
-
-  winsize = 10;			/* the itch function needs this */
-  itch = mpn_sec_powm_itch (n_max, n_max * GMP_NUMB_BITS, n_max);
-
-  rp = TMP_ALLOC_LIMBS (n_max);
-  bp = TMP_ALLOC_LIMBS (n_max);
-  ep = TMP_ALLOC_LIMBS (n_max);
-  mp = TMP_ALLOC_LIMBS (n_max);
-  tp = TMP_ALLOC_LIMBS (itch);
-
-  mpn_random (bp, n_max);
-  mpn_random (mp, n_max);
-  mp[0] |= 1;
-
-/* How about taking the M operand size into account?
-
-   An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
-   B = O(M)).
-
-   Using k-ary and no sliding window, the precomputation will need time
-   O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
-   O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
-
-   An operation R=powm_sec(B,E,N) will take time like powm.
-
-   Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
-   main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
-   O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
-   table reads, respectively.  */
-
-  printf ("#define POWM_SEC_TABLE  ");
-
-  /* For nbits == 1, we should always use k == 1, so no need to tune
-     that. Starting with nbits == 2 also ensure that nbits always is
-     larger than the windowsize k+1. */
-  for (nbits = 2; nbits <= n_max * GMP_NUMB_BITS; )
-    {
-      n = (nbits - 1) / GMP_NUMB_BITS + 1;
-
-      /* Generate E such that sliding-window for k and k+1 works equally
-	 well/poorly (but sliding is not used in powm_sec, of course). */
-      for (i = 0; i < n; i++)
-	ep[i] = ~CNST_LIMB(0);
-
-      winsize = k;
-      for (i = 0; i < n_measurements; i++)
-	{
-	  speed_starttime ();
-	  mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
-	  ttab[i] = speed_endtime ();
-	}
-      tk = median (ttab, n_measurements);
-
-      winsize = k + 1;
-      speed_starttime ();
-      for (i = 0; i < n_measurements; i++)
-	{
-	  speed_starttime ();
-	  mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
-	  ttab[i] = speed_endtime ();
-	}
-      tkp1 = median (ttab, n_measurements);
-/*
-      printf ("testing: %ld, %d", nbits, k, ep[n-1]);
-      printf ("   %10.5f  %10.5f\n", tk, tkp1);
-*/
-      if (tkp1 < tk)
-	{
-	  if (possible_nbits_cutoff)
-	    {
-	      /* Two consecutive sizes indicate k increase, obey.  */
-
-	      /* Must always have x[k] >= k */
-	      ASSERT_ALWAYS (possible_nbits_cutoff >= k);
-
-	      if (k > 1)
-		printf (",");
-	      printf ("%ld", (long) possible_nbits_cutoff);
-	      k++;
-	      possible_nbits_cutoff = 0;
-	    }
-	  else
-	    {
-	      /* One measurement indicate k increase, save nbits for further
-		 consideration.  */
-	      /* The new larger k gets used for sizes > the cutoff
-		 value, hence the cutoff should be one less than the
-		 smallest size where it gives a speedup. */
-	      possible_nbits_cutoff = nbits - 1;
-	    }
-	}
-      else
-	possible_nbits_cutoff = 0;
-
-      nbits_next = nbits * 65 / 64;
-      nbits = nbits_next + (nbits_next == nbits);
-    }
-  printf ("\n");
-  TMP_FREE;
-}
-
 
 /* size_extra==1 reflects the fact that with high<divisor one division is
    always skipped.  Forcing high<divisor while testing ensures consistency
@@ -1999,7 +1100,7 @@ tune_powm_sec (void)
   param.stop_factor = 2.0;
 
 
-double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
+double (*tuned_speed_mpn_divrem_1) __GMP_PROTO ((struct speed_params *));
 
 void
 tune_divrem_1 (void)
@@ -2052,59 +1153,15 @@ tune_divrem_1 (void)
   }
 }
 
-void
-tune_div_qr_1 (void)
-{
-  static struct param_t  param;
-  double            t1, t2;
-
-  if (!HAVE_NATIVE_mpn_div_qr_1n_pi1)
-    {
-      static struct param_t  param;
-      double   t1, t2;
-
-      s.size = 10;
-      s.r = randlimb_norm ();
-
-      t1 = tuneup_measure (speed_mpn_div_qr_1n_pi1_1, &param, &s);
-      t2 = tuneup_measure (speed_mpn_div_qr_1n_pi1_2, &param, &s);
-
-      if (t1 == -1.0 || t2 == -1.0)
-	{
-	  printf ("Oops, can't measure all mpn_div_qr_1n_pi1 methods at %ld\n",
-		  (long) s.size);
-	  abort ();
-	}
-      div_qr_1n_pi1_method = (t1 < t2) ? 1 : 2;
-      print_define ("DIV_QR_1N_PI1_METHOD", div_qr_1n_pi1_method);
-    }
-
-  {
-    static struct param_t  param;
-    param.name = "DIV_QR_1_NORM_THRESHOLD";
-    DIV_1_PARAMS;
-    param.min_size = 1;
-    param.min_is_always = 0;
-    s.r = randlimb_norm ();
-    param.function = speed_mpn_div_qr_1_tune;
-    one (&div_qr_1_norm_threshold, &param);
-  }
-  {
-    static struct param_t  param;
-    param.name = "DIV_QR_1_UNNORM_THRESHOLD";
-    DIV_1_PARAMS;
-    param.min_size = 1;
-    param.min_is_always = 0;
-    s.r = randlimb_half();
-    param.function = speed_mpn_div_qr_1_tune;
-    one (&div_qr_1_unnorm_threshold, &param);
-  }
-}
 
+double (*tuned_speed_mpn_mod_1) __GMP_PROTO ((struct speed_params *));
 
 void
 tune_mod_1 (void)
 {
+  /* plain version by default */
+  tuned_speed_mpn_mod_1 = speed_mpn_mod_1;
+
   /* No support for tuning native assembler code, do that by hand and put
      the results in the .asm file, there's no need for such thresholds to
      appear in gmp-mparam.h.  */
@@ -2120,27 +1177,6 @@ tune_mod_1 (void)
       return;
     }
 
-  if (!HAVE_NATIVE_mpn_mod_1_1p)
-    {
-      static struct param_t  param;
-      double   t1, t2;
-
-      s.size = 10;
-      s.r = randlimb_half ();
-
-      t1 = tuneup_measure (speed_mpn_mod_1_1_1, &param, &s);
-      t2 = tuneup_measure (speed_mpn_mod_1_1_2, &param, &s);
-
-      if (t1 == -1.0 || t2 == -1.0)
-	{
-	  printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n",
-		  (long) s.size);
-	  abort ();
-	}
-      mod_1_1p_method = (t1 < t2) ? 1 : 2;
-      print_define ("MOD_1_1P_METHOD", mod_1_1p_method);
-    }
-
   if (UDIV_PREINV_ALWAYS)
     {
       print_define ("MOD_1_NORM_THRESHOLD", 0L);
@@ -2148,6 +1184,8 @@ tune_mod_1 (void)
     }
   else
     {
+      tuned_speed_mpn_mod_1 = speed_mpn_mod_1_tune;
+
       {
 	static struct param_t  param;
 	param.name = "MOD_1_NORM_THRESHOLD";
@@ -2168,80 +1206,26 @@ tune_mod_1 (void)
   {
     static struct param_t  param;
 
-    param.check_size = 256;
-
-    s.r = randlimb_norm ();
+    s.r = GMP_NUMB_MASK / 5;
     param.function = speed_mpn_mod_1_tune;
-
-    param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD";
-    param.min_size = 2;
-    one (&mod_1n_to_mod_1_1_threshold, &param);
-  }
-
-  {
-    static struct param_t  param;
-
-    param.check_size = 256;
-    s.r = randlimb_half ();
-    param.noprint = 1;
-
-    param.function = speed_mpn_mod_1_1;
-    param.function2 = speed_mpn_mod_1_2;
-    param.min_is_always = 1;
-    param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD";
-    param.min_size = 2;
-    one (&mod_1_1_to_mod_1_2_threshold, &param);
-
-    param.function = speed_mpn_mod_1_2;
-    param.function2 = speed_mpn_mod_1_4;
-    param.min_is_always = 1;
-    param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD";
     param.min_size = 1;
-    one (&mod_1_2_to_mod_1_4_threshold, &param);
 
-    if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold)
-      {
-	/* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */
-	mod_1_2_to_mod_1_4_threshold = 0;
-
-	param.function = speed_mpn_mod_1_1;
-	param.function2 = speed_mpn_mod_1_4;
-	param.min_is_always = 1;
-	param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake";
-	param.min_size = 2;
-	one (&mod_1_1_to_mod_1_2_threshold, &param);
-      }
+    param.name = "MOD_1_1_THRESHOLD";
+    one (&mod_1_1_threshold, &param);
 
-    param.function = speed_mpn_mod_1_tune;
-    param.function2 = NULL;
-    param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD";
-    param.min_size = 2;
-    param.min_is_always = 0;
-    one (&mod_1u_to_mod_1_1_threshold, &param);
-
-    if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold)
-      mod_1_1_to_mod_1_2_threshold = 0;
-    if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold)
-      mod_1_2_to_mod_1_4_threshold = 0;
-
-    print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL);
-    print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold,
-			 mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL);
-    print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold,
-			 mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL);
-  }
+    param.name = "MOD_1_2_THRESHOLD";
+    param.min_size = mod_1_1_threshold + 1;
+    one (&mod_1_2_threshold, &param);
 
-  {
-    static struct param_t  param;
-
-    param.check_size = 256;
+#if 0
+    param.name = "MOD_1_3_THRESHOLD";
+    param.min_size = mod_1_2_threshold + 1;
+    one (&mod_1_3_threshold, &param);
+#endif
 
-    param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD";
-    s.r = randlimb_norm ();
-    param.function = speed_mpn_preinv_mod_1;
-    param.function2 = speed_mpn_mod_1_tune;
-    param.min_size = 1;
-    one (&preinv_mod_1_to_mod_1_threshold, &param);
+    param.name = "MOD_1_4_THRESHOLD";
+    param.min_size = mod_1_2_threshold + 1;
+    one (&mod_1_4_threshold, &param);
   }
 }
 
@@ -2318,6 +1302,72 @@ tune_preinv_divrem_1 (void)
 }
 
 
+/* A non-zero MOD_1_UNNORM_THRESHOLD (or MOD_1_NORM_THRESHOLD) would imply
+   that udiv_qrnnd_preinv is worth using, but it seems most straightforward
+   to compare mpn_preinv_mod_1 and mpn_mod_1_div directly.  */
+
+void
+tune_preinv_mod_1 (void)
+{
+  static struct param_t  param;
+  speed_function_t  mod_1;
+  const char        *mod_1_name;
+  double            t1, t2;
+
+  /* Any native version of mpn_preinv_mod_1 is assumed to exist because it's
+     faster than mpn_mod_1.  */
+  if (HAVE_NATIVE_mpn_preinv_mod_1)
+    {
+      print_define_remark ("USE_PREINV_MOD_1", 1, "native");
+      return;
+    }
+
+  if (GMP_NAIL_BITS != 0)
+    {
+      print_define_remark ("USE_PREINV_MOD_1", 0, "no preinv with nails");
+      return;
+    }
+
+  /* If udiv_qrnnd_preinv is the only division method then of course
+     mpn_preinv_mod_1 should be used.  */
+  if (UDIV_PREINV_ALWAYS)
+    {
+      print_define_remark ("USE_PREINV_MOD_1", 1, "preinv always");
+      return;
+    }
+
+  /* If we've got an assembler version of mpn_mod_1, then compare against
+     that, not the mpn_mod_1_div generic C.  */
+  if (HAVE_NATIVE_mpn_mod_1)
+    {
+      mod_1 = speed_mpn_mod_1;
+      mod_1_name = "mpn_mod_1";
+    }
+  else
+    {
+      mod_1 = speed_mpn_mod_1_div;
+      mod_1_name = "mpn_mod_1_div";
+    }
+
+  param.data_high = DATA_HIGH_LT_R; /* let mpn_mod_1 skip one division */
+  s.size = 200;                     /* generous but not too big */
+  s.r = randlimb_norm();            /* divisor */
+
+  t1 = tuneup_measure (speed_mpn_preinv_mod_1, &param, &s);
+  t2 = tuneup_measure (mod_1, &param, &s);
+  if (t1 == -1.0 || t2 == -1.0)
+    {
+      printf ("Oops, can't measure mpn_preinv_mod_1 and %s at %ld\n",
+              mod_1_name, (long) s.size);
+      abort ();
+    }
+  if (option_trace >= 1)
+    printf ("size=%ld, mpn_preinv_mod_1 %.9f, %s %.9f\n",
+            (long) s.size, t1, mod_1_name, t2);
+
+  print_define_remark ("USE_PREINV_MOD_1", (mp_size_t) (t1 < t2), NULL);
+}
+
 
 void
 tune_divrem_2 (void)
@@ -2361,16 +1411,6 @@ tune_divrem_2 (void)
   one (&divrem_2_threshold, &param);
 }
 
-void
-tune_div_qr_2 (void)
-{
-  static struct param_t  param;
-  param.name = "DIV_QR_2_PI2_THRESHOLD";
-  param.function = speed_mpn_div_qr_2n;
-  param.check_size = 500;
-  param.min_size = 4;
-  one (&div_qr_2_pi2_threshold, &param);
-}
 
 /* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
    tune for that.  Its speed can differ on odd or even divisor, so take an
@@ -2464,22 +1504,22 @@ tune_modexact_1_odd (void)
   static struct param_t  param;
   mp_size_t  thresh_lt, thresh_ge, average;
 
-#if 0
   /* Any native mpn_modexact_1_odd is assumed to incorporate all the speed
      of a full mpn_mod_1.  */
   if (HAVE_NATIVE_mpn_modexact_1_odd)
     {
-      print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1");
+      print_define_remark ("MODEXACT_1_ODD_THRESHOLD", 0, "always (native)");
       return;
     }
-#endif
 
-  param.name = "BMOD_1_TO_MOD_1_THRESHOLD";
+  ASSERT_ALWAYS (tuned_speed_mpn_mod_1 != NULL);
+
+  param.name = "MODEXACT_1_ODD_THRESHOLD";
   param.check_size = 256;
   param.min_size = 2;
   param.stop_factor = 1.5;
-  param.function  = speed_mpn_modexact_1c_odd;
-  param.function2 = speed_mpn_mod_1_tune;
+  param.function  = tuned_speed_mpn_mod_1;
+  param.function2 = speed_mpn_modexact_1c_odd;
   param.noprint = 1;
   s.r = randlimb_half () | 1;
 
@@ -2514,10 +1554,10 @@ void
 tune_jacobi_base (void)
 {
   static struct param_t  param;
-  double   t1, t2, t3, t4;
+  double   t1, t2, t3;
   int      method;
 
-  s.size = GMP_LIMB_BITS * 3 / 4;
+  s.size = BITS_PER_MP_LIMB * 3 / 4;
 
   t1 = tuneup_measure (speed_mpn_jacobi_base_1, &param, &s);
   if (option_trace >= 1)
@@ -2531,25 +1571,19 @@ tune_jacobi_base (void)
   if (option_trace >= 1)
     printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
 
-  t4 = tuneup_measure (speed_mpn_jacobi_base_4, &param, &s);
-  if (option_trace >= 1)
-    printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4);
-
-  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)
+  if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0)
     {
       printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
               (long) s.size);
       abort ();
     }
 
-  if (t1 < t2 && t1 < t3 && t1 < t4)
+  if (t1 < t2 && t1 < t3)
     method = 1;
-  else if (t2 < t3 && t2 < t4)
+  else if (t2 < t3)
     method = 2;
-  else if (t3 < t4)
-    method = 3;
   else
-    method = 4;
+    method = 3;
 
   print_define ("JACOBI_BASE_METHOD", method);
 }
@@ -2607,7 +1641,8 @@ speed_mpn_pre_set_str (struct speed_params *s)
   for (i = 0; i < s->size; i++)
     str[i] = s->xp[i] % base;
 
-  LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
+  wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly))
+    / BITS_PER_MP_LIMB + 2;
   SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
 
   /* use this during development to check wn is big enough */
@@ -2615,11 +1650,11 @@ speed_mpn_pre_set_str (struct speed_params *s)
   ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);
   */
 
-  speed_operand_src (s, (mp_ptr) str, s->size/GMP_LIMB_BYTES);
+  speed_operand_src (s, (mp_ptr) str, s->size/BYTES_PER_MP_LIMB);
   speed_operand_dst (s, wp, wn);
   speed_cache_fill (s);
 
-  chars_per_limb = mp_bases[base].chars_per_limb;
+  chars_per_limb = __mp_bases[base].chars_per_limb;
   un = s->size / chars_per_limb + 1;
   powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
   mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
@@ -2641,6 +1676,8 @@ speed_mpn_pre_set_str (struct speed_params *s)
 void
 tune_set_str (void)
 {
+  static struct param_t  param;
+
   s.r = 10;  /* decimal */
   {
     static struct param_t  param;
@@ -2672,15 +1709,14 @@ tune_fft_mul (void)
   if (option_fft_max_size == 0)
     return;
 
-  param.table_name          = "MUL_FFT_TABLE3";
+  param.table_name          = "MUL_FFT_TABLE";
   param.threshold_name      = "MUL_FFT_THRESHOLD";
   param.p_threshold         = &mul_fft_threshold;
   param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
   param.p_modf_threshold    = &mul_fft_modf_threshold;
-  param.first_size          = MUL_TOOM33_THRESHOLD / 2;
+  param.first_size          = MUL_TOOM3_THRESHOLD / 2;
   param.max_size            = option_fft_max_size;
-  param.function            = speed_mpn_fft_mul;
-  param.mul_modf_function   = speed_mpn_mul_fft;
+  param.function            = speed_mpn_mul_fft;
   param.mul_function        = speed_mpn_mul_n;
   param.sqr = 0;
   fft (&param);
@@ -2695,38 +1731,19 @@ tune_fft_sqr (void)
   if (option_fft_max_size == 0)
     return;
 
-  param.table_name          = "SQR_FFT_TABLE3";
+  param.table_name          = "SQR_FFT_TABLE";
   param.threshold_name      = "SQR_FFT_THRESHOLD";
   param.p_threshold         = &sqr_fft_threshold;
   param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
   param.p_modf_threshold    = &sqr_fft_modf_threshold;
   param.first_size          = SQR_TOOM3_THRESHOLD / 2;
   param.max_size            = option_fft_max_size;
-  param.function            = speed_mpn_fft_sqr;
-  param.mul_modf_function   = speed_mpn_mul_fft_sqr;
-  param.mul_function        = speed_mpn_sqr;
-  param.sqr = 1;
+  param.function            = speed_mpn_mul_fft_sqr;
+  param.mul_function        = speed_mpn_sqr_n;
+  param.sqr = 0;
   fft (&param);
 }
 
-void
-tune_fac_ui (void)
-{
-  static struct param_t  param;
-
-  param.function = speed_mpz_fac_ui_tune;
-
-  param.name = "FAC_DSC_THRESHOLD";
-  param.min_size = 70;
-  param.max_size = FAC_DSC_THRESHOLD_LIMIT;
-  one (&fac_dsc_threshold, &param);
-
-  param.name = "FAC_ODD_THRESHOLD";
-  param.min_size = 22;
-  param.stop_factor = 1.7;
-  param.min_is_always = 1;
-  one (&fac_odd_threshold, &param);
-}
 
 void
 all (void)
@@ -2794,81 +1811,52 @@ all (void)
   }
   printf ("\n");
 
-  tune_divrem_1 ();
-  tune_mod_1 ();
-  tune_preinv_divrem_1 ();
-  tune_div_qr_1 ();
-#if 0
-  tune_divrem_2 ();
-#endif
-  tune_div_qr_2 ();
-  tune_divexact_1 ();
-  tune_modexact_1_odd ();
-  printf("\n");
-
-  tune_mul_n ();
-  printf("\n");
-
   tune_mul ();
   printf("\n");
 
   tune_sqr ();
   printf("\n");
 
-  tune_mulmid ();
+  tune_mullow ();
   printf("\n");
 
-  tune_mulmod_bnm1 ();
-  tune_sqrmod_bnm1 ();
-  printf("\n");
-
-  tune_fft_mul ();
-  printf("\n");
-
-  tune_fft_sqr ();
-  printf ("\n");
-
-  tune_mullo ();
-  printf("\n");
-
-  tune_dc_div ();
-  tune_dc_bdiv ();
-
-  printf("\n");
-  tune_invertappr ();
-  tune_invert ();
-  printf("\n");
-
-  tune_binvert ();
-  tune_redc ();
-  printf("\n");
-
-  tune_mu_div ();
-  tune_mu_bdiv ();
-  printf("\n");
-
-  tune_powm_sec ();
+  tune_sb_preinv ();
+  tune_dc ();
+  tune_powm ();
   printf("\n");
 
   tune_matrix22_mul ();
   tune_hgcd ();
-  tune_hgcd_appr ();
-  tune_hgcd_reduce();
   tune_gcd_dc ();
   tune_gcdext_dc ();
+#if 0
+  tune_gcd_accel ();
+#endif
   tune_jacobi_base ();
   printf("\n");
 
+  tune_divrem_1 ();
+  tune_mod_1 ();
+  tune_preinv_divrem_1 ();
+  tune_preinv_mod_1 ();
+  tune_divrem_2 ();
+  tune_divexact_1 ();
+  tune_modexact_1_odd ();
+  printf("\n");
+
   tune_get_str ();
   tune_set_str ();
   printf("\n");
 
-  tune_fac_ui ();
+  tune_fft_mul ();
   printf("\n");
 
+  tune_fft_sqr ();
+  printf ("\n");
+
   time (&end_time);
   printf ("/* Tuneup completed successfully, took %ld seconds */\n",
-          (long) (end_time - start_time));
+          end_time - start_time);
 
   TMP_FREE;
 }
diff --git a/gmp/tune/x86_64.asm b/gmp/tune/x86_64.asm
index b7ec44c544..509909002a 100644
--- a/gmp/tune/x86_64.asm
+++ b/gmp/tune/x86_64.asm
@@ -1,32 +1,21 @@
 dnl  x86 pentium time stamp counter access routine.
 
-dnl  Copyright 1999, 2000, 2003-2005 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
+dnl  Copyright 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
-dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 
 include(`../config.m4')