summaryrefslogtreecommitdiff
path: root/gmp/tune
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/tune')
-rw-r--r--gmp/tune/Makefile.am134
-rw-r--r--gmp/tune/Makefile.in640
-rw-r--r--gmp/tune/README68
-rw-r--r--gmp/tune/alpha.asm33
-rw-r--r--gmp/tune/common.c1218
-rw-r--r--gmp/tune/div_qr_1_tune.c47
-rw-r--r--gmp/tune/div_qr_1n_pi1_1.c39
-rw-r--r--gmp/tune/div_qr_1n_pi1_2.c39
-rw-r--r--gmp/tune/divrem1div.c25
-rw-r--r--gmp/tune/divrem1inv.c25
-rw-r--r--gmp/tune/divrem2div.c25
-rw-r--r--gmp/tune/divrem2inv.c25
-rw-r--r--gmp/tune/freq.c31
-rw-r--r--gmp/tune/gcdext_double.c25
-rw-r--r--gmp/tune/gcdext_single.c25
-rw-r--r--gmp/tune/gcdextod.c25
-rw-r--r--gmp/tune/gcdextos.c25
-rw-r--r--gmp/tune/hgcd_appr_lehmer.c40
-rw-r--r--gmp/tune/hgcd_lehmer.c40
-rw-r--r--gmp/tune/hgcd_reduce_1.c41
-rw-r--r--gmp/tune/hgcd_reduce_2.c40
-rw-r--r--gmp/tune/hppa.asm33
-rw-r--r--gmp/tune/hppa2.asm33
-rw-r--r--gmp/tune/hppa2w.asm33
-rw-r--r--gmp/tune/ia64.asm33
-rw-r--r--gmp/tune/jacbase1.c25
-rw-r--r--gmp/tune/jacbase2.c25
-rw-r--r--gmp/tune/jacbase3.c25
-rw-r--r--gmp/tune/jacbase4.c38
-rwxr-xr-x[-rw-r--r--]gmp/tune/many.pl48
-rw-r--r--gmp/tune/mod_1_1-1.c41
-rw-r--r--gmp/tune/mod_1_1-2.c41
-rw-r--r--gmp/tune/mod_1_div.c29
-rw-r--r--gmp/tune/mod_1_inv.c29
-rw-r--r--gmp/tune/modlinv.c35
-rw-r--r--gmp/tune/noop.c25
-rw-r--r--gmp/tune/pentium.asm33
-rw-r--r--gmp/tune/powerpc.asm35
-rw-r--r--gmp/tune/powerpc64.asm35
-rw-r--r--gmp/tune/powm_mod.c25
-rw-r--r--gmp/tune/powm_redc.c25
-rw-r--r--gmp/tune/pre_divrem_1.c25
-rw-r--r--gmp/tune/sb_div.c30
-rw-r--r--gmp/tune/sb_inv.c30
-rw-r--r--gmp/tune/set_strb.c25
-rw-r--r--gmp/tune/set_strp.c25
-rw-r--r--gmp/tune/set_strs.c25
-rw-r--r--gmp/tune/sparcv9.asm33
-rw-r--r--gmp/tune/speed-ext.c37
-rw-r--r--gmp/tune/speed.c249
-rw-r--r--gmp/tune/speed.h2050
-rw-r--r--gmp/tune/time.c98
-rw-r--r--gmp/tune/tune-gcd-p.c225
-rw-r--r--gmp/tune/tuneup.c1722
-rw-r--r--gmp/tune/x86_64.asm35
55 files changed, 1939 insertions, 5926 deletions
diff --git a/gmp/tune/Makefile.am b/gmp/tune/Makefile.am
index bbe503a201..3c7f62c820 100644
--- a/gmp/tune/Makefile.am
+++ b/gmp/tune/Makefile.am
@@ -1,32 +1,21 @@
## Process this file with automake to generate Makefile.in
-# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
@@ -52,14 +41,13 @@ EXTRA_LTLIBRARIES = libspeed.la
libspeed_la_SOURCES = \
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
- div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1_tune.c \
- freq.c \
+ freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
- jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
- mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
+ jacbase1.c jacbase2.c jacbase3.c \
+ mod_1_div.c mod_1_inv.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
- set_strb.c set_strs.c set_strp.c time.c
+ set_strb.c set_strs.c set_strp.c time.c \
+ sb_div.c sb_inv.c
libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
$(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
@@ -81,10 +69,10 @@ $(top_builddir)/tests/libtests.la:
# program. This can always be forced with "make speed_LDFLAGS=-all-static
# ..." if desired, see tune/README.
-EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup tune-gcd-p
+EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup
DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+LDADD = $(DEPENDENCIES)
speed_SOURCES = speed.c
speed_LDFLAGS = $(STATIC)
@@ -95,15 +83,11 @@ speed_ext_SOURCES = speed-ext.c
speed_ext_LDFLAGS = $(STATIC)
tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDADD = $(tuneup_DEPENDENCIES)
tuneup_LDFLAGS = $(STATIC)
-tune_gcd_p_SOURCES = tune-gcd-p.c
-tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
-tune_gcd_p_LDFLAGS = $(STATIC)
-
tune:
$(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
@@ -113,7 +97,7 @@ allprogs: $(EXTRA_PROGRAMS)
# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
- $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+ $(TUNE_MPN_SRCS) sqr_asm.asm \
stg.gnuplot stg.data \
mtg.gnuplot mtg.data \
fibg.gnuplot fibg.data \
@@ -139,16 +123,9 @@ DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN)
# recompiled object will be rebuilt if that file changes.
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
- dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
- invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
- get_str.c set_str.c matrix22_mul.c \
- hgcd.c hgcd_appr.c hgcd_reduce.c \
- mul_n.c sqr.c sec_powm.c \
- mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
- mulmid.c mulmid_n.c toom42_mulmid.c \
- nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
- toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c gcdext.c get_str.c \
+ set_str.c matrix22_mul.c hgcd.c mul_n.c toom44_mul.c toom4_sqr.c \
+ mullow_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c
$(TUNE_MPN_SRCS_BASIC):
for i in $(TUNE_MPN_SRCS_BASIC); do \
@@ -167,15 +144,60 @@ mod_1.c:
echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c
sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
- echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+ echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm
echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
-# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
-fac_ui.c: $(top_builddir)/mpz/fac_ui.c
- echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
- echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
- echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
- echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
- echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
include ../mpn/Makeasm.am
+
+
+# "mk" is multiplication in the karatsuba range
+# "st" is squaring in the toom-cook range, etc
+# "g" forms produce graphs
+
+mk:
+ ./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n
+
+MTS = -s 50-150 -c
+mt:
+ ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n
+mtg:
+ ./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n
+
+sk:
+ ./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n
+
+STS = -s 50-150 -c
+st:
+ ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n
+stg:
+ ./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n
+
+dc:
+ ./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr
+
+fib:
+ ./speed -s 40-60 -c mpz_fib_ui
+fibg:
+ ./speed -s 10-300 -P fibg mpz_fib_ui
+
+
+gcd:
+ ./speed -s 1-20 -c mpn_gcd
+
+udiv:
+ ./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c
+
+divn:
+ ./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1
+divun:
+ ./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345
+modn:
+ ./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1
+modun:
+ ./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345
+
+
+graph:
+ ./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr
+ gnuplot graph.gnuplot
diff --git a/gmp/tune/Makefile.in b/gmp/tune/Makefile.in
index 07abb022a3..c3698319d9 100644
--- a/gmp/tune/Makefile.in
+++ b/gmp/tune/Makefile.in
@@ -1,9 +1,8 @@
-# Makefile.in generated by automake 1.11.6 from Makefile.am.
+# Makefile.in generated by automake 1.8.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
-# Foundation, Inc.
+# 2003, 2004 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@@ -15,85 +14,52 @@
@SET_MAKE@
-# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-# Copyright 1996, 1998-2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
+# Inc.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# This file is part of the GNU MP Library.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
#
-# or
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) $(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
VPATH = @srcdir@
-am__make_dryrun = \
- { \
- am__dry=no; \
- case $$MAKEFLAGS in \
- *\\[\ \ ]*) \
- echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
- | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
- *) \
- for am__flg in $$MAKEFLAGS; do \
- case $$am__flg in \
- *=*|--*) ;; \
- *n*) am__dry=yes; break;; \
- esac; \
- done;; \
- esac; \
- test $$am__dry = yes; \
- }
pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
@@ -105,110 +71,74 @@ POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
-build_triplet = @build@
host_triplet = @host@
+ANSI2KNR = $(top_builddir)/ansi2knr
EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
- speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT)
+ speed-ext$(EXEEXT) tuneup$(EXEEXT)
DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/../mpn/Makeasm.am \
$(srcdir)/Makefile.am $(srcdir)/Makefile.in
subdir = tune
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.ac
+ $(top_srcdir)/configure.in
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
+mkinstalldirs = $(mkdir_p)
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
am__DEPENDENCIES_1 =
am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
$(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
-am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \
- divrem2div.lo divrem2inv.lo div_qr_1n_pi1_1.lo \
- div_qr_1n_pi1_2.lo div_qr_1_tune.lo freq.lo gcdext_single.lo \
- gcdext_double.lo gcdextod.lo gcdextos.lo hgcd_lehmer.lo \
- hgcd_appr_lehmer.lo hgcd_reduce_1.lo hgcd_reduce_2.lo \
- jacbase1.lo jacbase2.lo jacbase3.lo jacbase4.lo mod_1_div.lo \
- mod_1_inv.lo mod_1_1-1.lo mod_1_1-2.lo modlinv.lo noop.lo \
- powm_mod.lo powm_redc.lo pre_divrem_1.lo set_strb.lo \
- set_strs.lo set_strp.lo time.lo
+am_libspeed_la_OBJECTS = common$U.lo divrem1div$U.lo divrem1inv$U.lo \
+ divrem2div$U.lo divrem2inv$U.lo freq$U.lo gcdext_single$U.lo \
+ gcdext_double$U.lo gcdextod$U.lo gcdextos$U.lo jacbase1$U.lo \
+ jacbase2$U.lo jacbase3$U.lo mod_1_div$U.lo mod_1_inv$U.lo \
+ modlinv$U.lo noop$U.lo powm_mod$U.lo powm_redc$U.lo \
+ pre_divrem_1$U.lo set_strb$U.lo set_strs$U.lo set_strp$U.lo \
+ time$U.lo sb_div$U.lo sb_inv$U.lo
libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
-libspeed_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
-am_speed_OBJECTS = speed.$(OBJEXT)
+am_speed_OBJECTS = speed$U.$(OBJEXT)
speed_OBJECTS = $(am_speed_OBJECTS)
speed_LDADD = $(LDADD)
-speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
-speed_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(speed_LDFLAGS) \
- $(LDFLAGS) -o $@
-am_speed_dynamic_OBJECTS = speed.$(OBJEXT)
+am__DEPENDENCIES_3 = libspeed.la
+speed_DEPENDENCIES = $(am__DEPENDENCIES_3)
+am_speed_dynamic_OBJECTS = speed$U.$(OBJEXT)
speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
speed_dynamic_LDADD = $(LDADD)
-speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
-am_speed_ext_OBJECTS = speed-ext.$(OBJEXT)
+speed_dynamic_DEPENDENCIES = $(am__DEPENDENCIES_3)
+am_speed_ext_OBJECTS = speed-ext$U.$(OBJEXT)
speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
speed_ext_LDADD = $(LDADD)
-speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
-speed_ext_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
-am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT)
-tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS)
-tune_gcd_p_LDADD = $(LDADD)
-tune_gcd_p_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@
-am_tuneup_OBJECTS = tuneup.$(OBJEXT)
-am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \
- dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \
- dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \
- invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \
- divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
- get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
- hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
- mul_n.$(OBJEXT) sqr.$(OBJEXT) sec_powm.$(OBJEXT) \
- mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \
- tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \
- mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
- nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \
- toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \
- toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \
- toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT)
-am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT)
-nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \
- $(am__objects_2)
+speed_ext_DEPENDENCIES = $(am__DEPENDENCIES_3)
+am_tuneup_OBJECTS = tuneup$U.$(OBJEXT)
+am__objects_1 = dc_divrem_n$U.$(OBJEXT) divrem_2$U.$(OBJEXT) \
+ gcd$U.$(OBJEXT) gcdext$U.$(OBJEXT) get_str$U.$(OBJEXT) \
+ set_str$U.$(OBJEXT) matrix22_mul$U.$(OBJEXT) hgcd$U.$(OBJEXT) \
+ mul_n$U.$(OBJEXT) toom44_mul$U.$(OBJEXT) toom4_sqr$U.$(OBJEXT) \
+ mullow_n$U.$(OBJEXT) mul_fft$U.$(OBJEXT) mul$U.$(OBJEXT) \
+ sb_divrem_mn$U.$(OBJEXT) tdiv_qr$U.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1$U.$(OBJEXT) \
+ mod_1$U.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase$U.$(OBJEXT) $(am__objects_2)
tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
-am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
-tuneup_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(tuneup_LDFLAGS) \
- $(LDFLAGS) -o $@
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) libspeed.la
+DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp =
am__depfiles_maybe =
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
+LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
$(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
- $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \
- $(nodist_tuneup_SOURCES)
+ $(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
$(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
- $(tune_gcd_p_SOURCES) $(tuneup_SOURCES)
-am__can_run_installinfo = \
- case $$AM_UPDATE_INFO_DIR in \
- n|no|NO) false;; \
- *) (install-info --version) >/dev/null 2>&1;; \
- esac
+ $(tuneup_SOURCES)
HEADERS = $(noinst_HEADERS)
ETAGS = etags
CTAGS = ctags
@@ -223,6 +153,7 @@ AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
+BITS_PER_MP_LIMB = @BITS_PER_MP_LIMB@
CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
CC = @CC@
CCAS = @CCAS@
@@ -238,17 +169,16 @@ CYGPATH_W = @CYGPATH_W@
DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
DEFS = @DEFS@
DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
+ECHO = @ECHO@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
+ENABLE_STATIC_FALSE = @ENABLE_STATIC_FALSE@
+ENABLE_STATIC_TRUE = @ENABLE_STATIC_TRUE@
EXEEXT = @EXEEXT@
EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
GMP_NAIL_BITS = @GMP_NAIL_BITS@
GREP = @GREP@
HAVE_CLOCK_01 = @HAVE_CLOCK_01@
@@ -262,12 +192,10 @@ HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
HAVE_STACK_T_01 = @HAVE_STACK_T_01@
HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
LDFLAGS = @LDFLAGS@
LEX = @LEX@
LEXLIB = @LEXLIB@
@@ -282,26 +210,20 @@ LIBOBJS = @LIBOBJS@
LIBREADLINE = @LIBREADLINE@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
M4 = @M4@
MAINT = @MAINT@
+MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@
+MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@
MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
@@ -311,31 +233,26 @@ SHELL = @SHELL@
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
-TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
+WANT_CXX_FALSE = @WANT_CXX_FALSE@
+WANT_CXX_TRUE = @WANT_CXX_TRUE@
+WANT_MPBSD_FALSE = @WANT_MPBSD_FALSE@
+WANT_MPBSD_TRUE = @WANT_MPBSD_TRUE@
WITH_READLINE_01 = @WITH_READLINE_01@
YACC = @YACC@
YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
-builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
@@ -359,6 +276,7 @@ mandir = @mandir@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
@@ -366,12 +284,8 @@ program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \
ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl
@@ -389,14 +303,13 @@ noinst_HEADERS = speed.h
EXTRA_LTLIBRARIES = libspeed.la
libspeed_la_SOURCES = \
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
- div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1_tune.c \
- freq.c \
+ freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
- jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
- mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
+ jacbase1.c jacbase2.c jacbase3.c \
+ mod_1_div.c mod_1_inv.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
- set_strb.c set_strs.c set_strp.c time.c
+ set_strb.c set_strs.c set_strp.c time.c \
+ sb_div.c sb_inv.c
libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
$(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
@@ -404,24 +317,21 @@ libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \
libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
libspeed_la_LDFLAGS = $(STATIC)
DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
+LDADD = $(DEPENDENCIES)
speed_SOURCES = speed.c
speed_LDFLAGS = $(STATIC)
speed_dynamic_SOURCES = speed.c
speed_ext_SOURCES = speed-ext.c
speed_ext_LDFLAGS = $(STATIC)
tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
+tuneup_LDADD = $(tuneup_DEPENDENCIES)
tuneup_LDFLAGS = $(STATIC)
-tune_gcd_p_SOURCES = tune-gcd-p.c
-tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
-tune_gcd_p_LDFLAGS = $(STATIC)
# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
- $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
+ $(TUNE_MPN_SRCS) sqr_asm.asm \
stg.gnuplot stg.data \
mtg.gnuplot mtg.data \
fibg.gnuplot fibg.data \
@@ -446,16 +356,9 @@ DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN)
# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
# recompiled object will be rebuilt if that file changes.
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
- dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
- invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
- get_str.c set_str.c matrix22_mul.c \
- hgcd.c hgcd_appr.c hgcd_reduce.c \
- mul_n.c sqr.c sec_powm.c \
- mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
- mulmid.c mulmid_n.c toom42_mulmid.c \
- nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
- toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
+TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c gcdext.c get_str.c \
+ set_str.c matrix22_mul.c hgcd.c mul_n.c toom44_mul.c toom4_sqr.c \
+ mullow_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c
# COMPILE minus CC.
@@ -486,6 +389,8 @@ SUFFIXES = .s .S .asm
# can be overridden during development, eg. "make RM_TMP=: mul_1.lo"
RM_TMP = rm -f
+MTS = -s 50-150 -c
+STS = -s 50-150 -c
all: all-am
.SUFFIXES:
@@ -494,14 +399,14 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/..
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+ && exit 0; \
exit 1;; \
esac; \
done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps tune/Makefile
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu --ignore-deps tune/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
@@ -511,7 +416,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
-$(srcdir)/../mpn/Makeasm.am:
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -520,30 +424,31 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES)
- $(libspeed_la_LINK) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
-speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES)
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES)
+ $(LINK) $(libspeed_la_LDFLAGS) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES)
@rm -f speed$(EXEEXT)
- $(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
-speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES)
+ $(LINK) $(speed_LDFLAGS) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES)
@rm -f speed-dynamic$(EXEEXT)
- $(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
-speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES)
+ $(LINK) $(speed_dynamic_LDFLAGS) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES)
@rm -f speed-ext$(EXEEXT)
- $(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
-tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES)
- @rm -f tune-gcd-p$(EXEEXT)
- $(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS)
-tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES)
+ $(LINK) $(speed_ext_LDFLAGS) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES)
@rm -f tuneup$(EXEEXT)
- $(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
+ $(LINK) $(tuneup_LDFLAGS) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
+$(top_builddir)/ansi2knr:
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ansi2knr
+
+mostlyclean-kr:
+ -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
@@ -553,6 +458,127 @@ distclean-compile:
.c.lo:
$(LTCOMPILE) -c -o $@ $<
+common_.c: common.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/common.c; then echo $(srcdir)/common.c; else echo common.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+dc_divrem_n_.c: dc_divrem_n.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dc_divrem_n.c; then echo $(srcdir)/dc_divrem_n.c; else echo dc_divrem_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem1div_.c: divrem1div.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1div.c; then echo $(srcdir)/divrem1div.c; else echo divrem1div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem1inv_.c: divrem1inv.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1inv.c; then echo $(srcdir)/divrem1inv.c; else echo divrem1inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem2div_.c: divrem2div.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2div.c; then echo $(srcdir)/divrem2div.c; else echo divrem2div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem2inv_.c: divrem2inv.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2inv.c; then echo $(srcdir)/divrem2inv.c; else echo divrem2inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_1_.c: divrem_1.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+divrem_2_.c: divrem_2.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+freq_.c: freq.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/freq.c; then echo $(srcdir)/freq.c; else echo freq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcd_.c: gcd.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_.c: gcdext.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_double_.c: gcdext_double.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_double.c; then echo $(srcdir)/gcdext_double.c; else echo gcdext_double.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdext_single_.c: gcdext_single.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_single.c; then echo $(srcdir)/gcdext_single.c; else echo gcdext_single.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdextod_.c: gcdextod.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextod.c; then echo $(srcdir)/gcdextod.c; else echo gcdextod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+gcdextos_.c: gcdextos.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextos.c; then echo $(srcdir)/gcdextos.c; else echo gcdextos.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+get_str_.c: get_str.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+hgcd_.c: hgcd.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase1_.c: jacbase1.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase1.c; then echo $(srcdir)/jacbase1.c; else echo jacbase1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase2_.c: jacbase2.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase2.c; then echo $(srcdir)/jacbase2.c; else echo jacbase2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+jacbase3_.c: jacbase3.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase3.c; then echo $(srcdir)/jacbase3.c; else echo jacbase3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_.c: mod_1.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_div_.c: mod_1_div.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_div.c; then echo $(srcdir)/mod_1_div.c; else echo mod_1_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mod_1_inv_.c: mod_1_inv.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_inv.c; then echo $(srcdir)/mod_1_inv.c; else echo mod_1_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+modlinv_.c: modlinv.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/modlinv.c; then echo $(srcdir)/modlinv.c; else echo modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_.c: mul.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_fft_.c: mul_fft.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mul_n_.c: mul_n.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+mullow_n_.c: mullow_n.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullow_n.c; then echo $(srcdir)/mullow_n.c; else echo mullow_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+noop_.c: noop.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/noop.c; then echo $(srcdir)/noop.c; else echo noop.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_mod_.c: powm_mod.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_mod.c; then echo $(srcdir)/powm_mod.c; else echo powm_mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+powm_redc_.c: powm_redc.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_redc.c; then echo $(srcdir)/powm_redc.c; else echo powm_redc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_div_.c: sb_div.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_div.c; then echo $(srcdir)/sb_div.c; else echo sb_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_divrem_mn_.c: sb_divrem_mn.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_divrem_mn.c; then echo $(srcdir)/sb_divrem_mn.c; else echo sb_divrem_mn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sb_inv_.c: sb_inv.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_inv.c; then echo $(srcdir)/sb_inv.c; else echo sb_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_str_.c: set_str.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strb_.c: set_strb.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strb.c; then echo $(srcdir)/set_strb.c; else echo set_strb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strp_.c: set_strp.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strp.c; then echo $(srcdir)/set_strp.c; else echo set_strp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+set_strs_.c: set_strs.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strs.c; then echo $(srcdir)/set_strs.c; else echo set_strs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+speed_.c: speed.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed.c; then echo $(srcdir)/speed.c; else echo speed.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+speed-ext_.c: speed-ext.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed-ext.c; then echo $(srcdir)/speed-ext.c; else echo speed-ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+time_.c: time.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/time.c; then echo $(srcdir)/time.c; else echo time.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+tuneup_.c: tuneup.c $(ANSI2KNR)
+ $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tuneup.c; then echo $(srcdir)/tuneup.c; else echo tuneup.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
+common_.$(OBJEXT) common_.lo dc_divrem_n_.$(OBJEXT) dc_divrem_n_.lo \
+divrem1div_.$(OBJEXT) divrem1div_.lo divrem1inv_.$(OBJEXT) \
+divrem1inv_.lo divrem2div_.$(OBJEXT) divrem2div_.lo \
+divrem2inv_.$(OBJEXT) divrem2inv_.lo divrem_1_.$(OBJEXT) divrem_1_.lo \
+divrem_2_.$(OBJEXT) divrem_2_.lo freq_.$(OBJEXT) freq_.lo \
+gcd_.$(OBJEXT) gcd_.lo gcdext_.$(OBJEXT) gcdext_.lo \
+gcdext_double_.$(OBJEXT) gcdext_double_.lo gcdext_single_.$(OBJEXT) \
+gcdext_single_.lo gcdextod_.$(OBJEXT) gcdextod_.lo gcdextos_.$(OBJEXT) \
+gcdextos_.lo get_str_.$(OBJEXT) get_str_.lo hgcd_.$(OBJEXT) hgcd_.lo \
+jacbase1_.$(OBJEXT) jacbase1_.lo jacbase2_.$(OBJEXT) jacbase2_.lo \
+jacbase3_.$(OBJEXT) jacbase3_.lo matrix22_mul_.$(OBJEXT) \
+matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo mod_1_div_.$(OBJEXT) \
+mod_1_div_.lo mod_1_inv_.$(OBJEXT) mod_1_inv_.lo modlinv_.$(OBJEXT) \
+modlinv_.lo mul_.$(OBJEXT) mul_.lo mul_fft_.$(OBJEXT) mul_fft_.lo \
+mul_n_.$(OBJEXT) mul_n_.lo mullow_n_.$(OBJEXT) mullow_n_.lo \
+noop_.$(OBJEXT) noop_.lo powm_mod_.$(OBJEXT) powm_mod_.lo \
+powm_redc_.$(OBJEXT) powm_redc_.lo pre_divrem_1_.$(OBJEXT) \
+pre_divrem_1_.lo sb_div_.$(OBJEXT) sb_div_.lo sb_divrem_mn_.$(OBJEXT) \
+sb_divrem_mn_.lo sb_inv_.$(OBJEXT) sb_inv_.lo set_str_.$(OBJEXT) \
+set_str_.lo set_strb_.$(OBJEXT) set_strb_.lo set_strp_.$(OBJEXT) \
+set_strp_.lo set_strs_.$(OBJEXT) set_strs_.lo speed_.$(OBJEXT) \
+speed_.lo speed-ext_.$(OBJEXT) speed-ext_.lo sqr_basecase_.$(OBJEXT) \
+sqr_basecase_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo time_.$(OBJEXT) \
+time_.lo toom44_mul_.$(OBJEXT) toom44_mul_.lo toom4_sqr_.$(OBJEXT) \
+toom4_sqr_.lo tuneup_.$(OBJEXT) tuneup_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
@@ -560,85 +586,83 @@ mostlyclean-libtool:
clean-libtool:
-rm -rf .libs _libs
+distclean-libtool:
+ -rm -f libtool
+uninstall-info-am:
+
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
- set x; \
+ tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+ test -z "$$unique" && unique=$$empty_fix; \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
+ $(AWK) ' { files[$$0] = 1; } \
+ END { for (i in files) print i; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
+ $$tags $$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
+ $(mkdir_p) $(distdir)/../mpn
+ @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
+ list='$(DISTFILES)'; for file in $$list; do \
+ case $$file in \
+ $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
+ $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
+ esac; \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+ dir="/$$dir"; \
+ $(mkdir_p) "$(distdir)$$dir"; \
+ else \
+ dir=''; \
+ fi; \
if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
@@ -656,23 +680,17 @@ install-am: all-am
installcheck: installcheck-am
install-strip:
- if test -z '$(STRIP)'; then \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- install; \
- else \
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
- fi
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+ -rm -f $(CONFIG_CLEAN_FILES)
-test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES)
maintainer-clean-generic:
@@ -685,7 +703,7 @@ clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-am
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
+ distclean-libtool distclean-tags
dvi: dvi-am
@@ -693,38 +711,18 @@ dvi-am:
html: html-am
-html-am:
-
info: info-am
info-am:
install-data-am:
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
install-exec-am:
-install-html: install-html-am
-
-install-html-am:
-
install-info: install-info-am
-install-info-am:
-
install-man:
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
installcheck-am:
maintainer-clean: maintainer-clean-am
@@ -733,7 +731,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
mostlyclean-libtool
pdf: pdf-am
@@ -744,22 +742,18 @@ ps: ps-am
ps-am:
-uninstall-am:
-
-.MAKE: install-am install-strip
+uninstall-am: uninstall-info-am
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool ctags distclean distclean-compile \
distclean-generic distclean-libtool distclean-tags distdir dvi \
dvi-am html html-am info info-am install install-am \
- install-data install-data-am install-dvi install-dvi-am \
- install-exec install-exec-am install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-ps install-ps-am install-strip \
+ install-data install-data-am install-exec install-exec-am \
+ install-info install-info-am install-man install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
+ pdf-am ps ps-am tags uninstall uninstall-am uninstall-info-am
$(top_builddir)/tests/libtests.la:
@@ -788,17 +782,9 @@ mod_1.c:
echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c
sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm
- echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
+ echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm
echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
-# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
-fac_ui.c: $(top_builddir)/mpz/fac_ui.c
- echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
- echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
- echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
- echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
- echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
-
# .s assembler, no preprocessing.
#
.s.o:
@@ -855,6 +841,50 @@ fac_ui.c: $(top_builddir)/mpz/fac_ui.c
.asm.lo:
$(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$<
+# "mk" is multiplication in the karatsuba range
+# "st" is squaring in the toom-cook range, etc
+# "g" forms produce graphs
+
+mk:
+ ./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n
+mt:
+ ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n
+mtg:
+ ./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n
+
+sk:
+ ./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n
+st:
+ ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n
+stg:
+ ./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n
+
+dc:
+ ./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr
+
+fib:
+ ./speed -s 40-60 -c mpz_fib_ui
+fibg:
+ ./speed -s 10-300 -P fibg mpz_fib_ui
+
+gcd:
+ ./speed -s 1-20 -c mpn_gcd
+
+udiv:
+ ./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c
+
+divn:
+ ./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1
+divun:
+ ./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345
+modn:
+ ./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1
+modun:
+ ./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345
+
+graph:
+ ./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr
+ gnuplot graph.gnuplot
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/gmp/tune/README b/gmp/tune/README
index f76407f7ca..994231d3be 100644
--- a/gmp/tune/README
+++ b/gmp/tune/README
@@ -1,30 +1,19 @@
-Copyright 2000-2002, 2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
@@ -84,27 +73,6 @@ SCO OpenUNIX 8 /etc/hw
running the speed program repeatedly then set a GMP_CPU_FREQUENCY
environment variable (see TIME BASE section below).
-Timing on GNU/Linux
-
- On Linux, timing currently uses the cycle counter. This is unreliable,
- since the counter is not saved and restored at context switches (unlike
- FreeBSD and Solaris where the cycle counter is "virtualized").
-
- Using the clock_gettime method with CLOCK_PROCESS_CPUTIME_ID (posix) or
- CLOCK_VIRTUAL (BSD) should be more reliable. To get clock_gettime
- with glibc, one has to link with -lrt (which also drags in the pthreads
- threading library). configure.in must be hacked to detect this and
- arrange proper linking. Something like
-
- old_LIBS="$LIBS"
- AC_SEARCH_LIBS(clock_gettime, rt, [AC_DEFINE(HAVE_CLOCK_GETTIME)])
- TUNE_LIBS="$LIBS"
- LIBS="$old_LIBS"
-
- AC_SUBST(TUNE_LIBS)
-
- might work.
-
Low resolution timebase
Parameter tuning can be very time consuming if the only timebase
@@ -117,7 +85,7 @@ Low resolution timebase
PARAMETER TUNING
The "tuneup" program runs some tests designed to find the best settings for
-various thresholds, like MUL_TOOM22_THRESHOLD. Its output can be put
+various thresholds, like MUL_KARATSUBA_THRESHOLD. Its output can be put
into gmp-mparam.h. The program is built and run with
make tune
@@ -298,16 +266,10 @@ mpn_divrem_1, using division by 32 as an example.
EXAMPLE COMPARISONS - MULTIPLICATION
-mul_basecase takes a ".<r>" parameter. If positive, it gives the second
-(smaller) operand size. For example to show speeds for 3x3 up to 20x3 in
-cycles,
-
- ./speed -s 3-20 -c mpn_mul_basecase.3
-
-A negative ".<-r>" parameter fixes the size of the product to the absolute
-value r. For example to show speeds for 10x10 up to 19x1 in cycles,
+mul_basecase takes a ".<r>" parameter which is the first (larger) size
+parameter. For example to show speeds for 20x1 up to 20x15 in cycles,
- ./speed -s 10-19 -c mpn_mul_basecase.-20
+ ./speed -s 1-15 -c mpn_mul_basecase.20
mul_basecase with no parameter does an NxN multiply, so for example to show
speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,
@@ -319,7 +281,7 @@ up to twice as fast as mul_basecase. In practice loop overheads and the
products on the diagonal mean it falls short of this. Here's an example
running the two and showing by what factor an NxN mul_basecase is slower
than an NxN sqr_basecase. (Some versions of sqr_basecase only allow sizes
-below SQR_TOOM2_THRESHOLD, so if it crashes at that point don't worry.)
+below SQR_KARATSUBA_THRESHOLD, so if it crashes at that point don't worry.)
./speed -s 1-20 -r mpn_sqr_basecase mpn_mul_basecase
@@ -465,12 +427,12 @@ normal libgmp.la.
Note further that the various routines may recurse into themselves on sizes
far enough above applicable thresholds. For example, mpn_kara_mul_n will
recurse into itself on sizes greater than twice the compiled-in
-MUL_TOOM22_THRESHOLD.
+MUL_KARATSUBA_THRESHOLD.
When doing the above comparison between mul_basecase and kara_mul_n what's
probably of interest is mul_basecase versus a kara_mul_n that does one level
of Karatsuba then calls to mul_basecase, but this only happens on sizes less
-than twice the compiled MUL_TOOM22_THRESHOLD. A larger value for that
+than twice the compiled MUL_KARATSUBA_THRESHOLD. A larger value for that
setting can be compiled-in to avoid the problem if necessary. The same
applies to toom3 and DC, though in a trickier fashion.
diff --git a/gmp/tune/alpha.asm b/gmp/tune/alpha.asm
index 888c77fe9d..b447462083 100644
--- a/gmp/tune/alpha.asm
+++ b/gmp/tune/alpha.asm
@@ -1,32 +1,21 @@
dnl Alpha time stamp counter access routine.
dnl Copyright 2000, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/common.c b/gmp/tune/common.c
index 8d06b4dc02..85a1ddef3f 100644
--- a/gmp/tune/common.c
+++ b/gmp/tune/common.c
@@ -1,32 +1,22 @@
/* Shared speed subroutines.
-Copyright 1999-2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define __GMP_NO_ATTRIBUTE_CONST_PURE
@@ -51,7 +41,6 @@ see https://www.gnu.org/licenses/. */
int speed_option_addrs = 0;
int speed_option_verbose = 0;
-int speed_option_cycles_broken = 0;
/* Provide __clz_tab even if it's not required, for the benefit of new code
@@ -72,9 +61,9 @@ pentium_wbinvd(void)
if (fd == -2)
{
- fd = open ("/dev/wbinvd", O_RDWR);
- if (fd == -1)
- perror ("open /dev/wbinvd");
+ fd = open ("/dev/wbinvd", O_RDWR);
+ if (fd == -1)
+ perror ("open /dev/wbinvd");
}
if (fd != -1)
@@ -132,9 +121,10 @@ double_cmp_ptr (const double *p, const double *q)
s->r, -1.0 should be returned. See the various base routines below. */
double
-speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
+speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
+ struct speed_params *s)
{
-#define TOLERANCE 1.01 /* 1% */
+#define TOLERANCE 1.005 /* 0.5% */
const int max_zeros = 10;
struct speed_params s_dummy;
@@ -157,77 +147,74 @@ speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
for (i = 0; i < numberof (t); i++)
{
for (;;)
- {
- s->src_num = 0;
- s->dst_num = 0;
-
- t[i] = (*fun) (s);
-
- if (speed_option_verbose >= 3)
- gmp_printf("size=%ld reps=%u r=%Md attempt=%d %.9f\n",
- (long) s->size, s->reps, s->r, i, t[i]);
-
- if (t[i] == 0.0)
- {
- zeros++;
- if (zeros > max_zeros)
- {
- fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
- abort ();
- }
- if (s->reps < 10000)
- s->reps *= 2;
-
- continue;
- }
-
- if (t[i] == -1.0)
- return -1.0;
-
- if (t[i] >= speed_unittime * speed_precision)
- break;
-
- /* go to a value of reps to make t[i] >= precision */
- reps_d = ceil (1.1 * s->reps
- * speed_unittime * speed_precision
- / MAX (t[i], speed_unittime));
- if (reps_d > 2e9 || reps_d < 1.0)
- {
- fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
- fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
- s->reps, speed_unittime, speed_precision, t[i]);
- abort ();
- }
- s->reps = (unsigned) reps_d;
- }
+ {
+ s->src_num = 0;
+ s->dst_num = 0;
+
+ t[i] = (*fun) (s);
+
+ if (speed_option_verbose >= 3)
+ gmp_printf("size=%ld reps=%u r=%Md attempt=%d %.9f\n",
+ (long) s->size, s->reps, s->r, i, t[i]);
+
+ if (t[i] == 0.0)
+ {
+ zeros++;
+ if (zeros > max_zeros)
+ {
+ fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros);
+ abort ();
+ }
+ continue;
+ }
+
+ if (t[i] == -1.0)
+ return -1.0;
+
+ if (t[i] >= speed_unittime * speed_precision)
+ break;
+
+ /* go to a value of reps to make t[i] >= precision */
+ reps_d = ceil (1.1 * s->reps
+ * speed_unittime * speed_precision
+ / MAX (t[i], speed_unittime));
+ if (reps_d > 2e9 || reps_d < 1.0)
+ {
+ fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d);
+ fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n",
+ s->reps, speed_unittime, speed_precision, t[i]);
+ abort ();
+ }
+ s->reps = (unsigned) reps_d;
+ }
t[i] /= s->reps;
t_unsorted[i] = t[i];
if (speed_precision == 0)
- return t[i];
+ return t[i];
/* require 3 values within TOLERANCE when >= 2 secs, 4 when below */
if (t[0] >= 2.0)
- e = 3;
+ e = 3;
else
- e = 4;
+ e = 4;
/* Look for e many t[]'s within TOLERANCE of each other to consider a
- valid measurement. Return smallest among them. */
+ valid measurement. Return smallest among them. */
if (i >= e)
- {
- qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
- for (j = e-1; j < i; j++)
- if (t[j] <= t[j-e+1] * TOLERANCE)
- return t[j-e+1] / s->time_divisor;
- }
+ {
+ qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr);
+ for (j = e-1; j < i; j++)
+ if (t[j] <= t[j-e+1] * TOLERANCE)
+ return t[j-e+1] / s->time_divisor;
+ }
}
fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n",
- e, (TOLERANCE-1.0)*100.0);
+ e, (TOLERANCE-1.0)*100.0);
fprintf (stderr, " unsorted sorted\n");
fprintf (stderr, " %.12f %.12f is about 0.5%%\n",
- t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
+ t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0));
for (i = 0; i < numberof (t); i++)
fprintf (stderr, " %.09f %.09f\n", t_unsorted[i], t[i]);
@@ -318,30 +305,30 @@ speed_cache_fill (struct speed_params *s)
different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num);
for (i = 0; i < s->dst_num; i++)
- different |= (s->dst[i].ptr != prev.dst[i].ptr);
+ different |= (s->dst[i].ptr != prev.dst[i].ptr);
for (i = 0; i < s->src_num; i++)
- different |= (s->src[i].ptr != prev.src[i].ptr);
+ different |= (s->src[i].ptr != prev.src[i].ptr);
if (different)
- {
- if (s->dst_num != 0)
- {
- printf ("dst");
- for (i = 0; i < s->dst_num; i++)
- printf (" %08lX", (unsigned long) s->dst[i].ptr);
- printf (" ");
- }
-
- if (s->src_num != 0)
- {
- printf ("src");
- for (i = 0; i < s->src_num; i++)
- printf (" %08lX", (unsigned long) s->src[i].ptr);
- printf (" ");
- }
- printf (" (cf sp approx %08lX)\n", (unsigned long) &different);
-
- }
+ {
+ if (s->dst_num != 0)
+ {
+ printf ("dst");
+ for (i = 0; i < s->dst_num; i++)
+ printf (" %08lX", (unsigned long) s->dst[i].ptr);
+ printf (" ");
+ }
+
+ if (s->src_num != 0)
+ {
+ printf ("src");
+ for (i = 0; i < s->src_num; i++)
+ printf (" %08lX", (unsigned long) s->src[i].ptr);
+ printf (" ");
+ }
+ printf (" (cf sp approx %08lX)\n", (unsigned long) &different);
+
+ }
memcpy (&prev, s, sizeof(prev));
}
@@ -360,7 +347,7 @@ speed_cache_fill (struct speed_params *s)
}
-/* Miscellaneous options accepted by tune and speed programs under -o. */
+/* Miscellanous options accepted by tune and speed programs under -o. */
void
speed_option_set (const char *s)
@@ -379,10 +366,6 @@ speed_option_set (const char *s)
{
speed_option_verbose = n;
}
- else if (strcmp (s, "cycles-broken") == 0)
- {
- speed_option_cycles_broken = 1;
- }
else
{
printf ("Unrecognised -o option: %s\n", s);
@@ -419,10 +402,10 @@ speed_option_set (const char *s)
code on most CPUs, thereby minimizing overhead in the measurement. It
can always be assumed s->reps >= 1.
- i = s->reps
- do
- foo();
- while (--i != 0);
+ i = s->reps
+ do
+ foo();
+ while (--i != 0);
Additional parameters might be added to "struct speed_params" in the
future. Routines should ignore anything they don't use.
@@ -469,14 +452,9 @@ speed_memcpy (struct speed_params *s)
SPEED_ROUTINE_MPN_COPY_BYTES (memcpy);
}
double
-speed_mpn_com (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_com);
-}
-double
-speed_mpn_sec_tabselect (struct speed_params *s)
+speed_mpn_com_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TABSELECT (mpn_sec_tabselect);
+ SPEED_ROUTINE_MPN_COPY (mpn_com_n);
}
@@ -573,20 +551,6 @@ speed_mpn_mul_4 (struct speed_params *s)
SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
}
#endif
-#if HAVE_NATIVE_mpn_mul_5
-double
-speed_mpn_mul_5 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5);
-}
-#endif
-#if HAVE_NATIVE_mpn_mul_6
-double
-speed_mpn_mul_6 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6);
-}
-#endif
double
@@ -595,11 +559,6 @@ speed_mpn_lshift (struct speed_params *s)
SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift);
}
double
-speed_mpn_lshiftc (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc);
-}
-double
speed_mpn_rshift (struct speed_params *s)
{
SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift);
@@ -699,39 +658,6 @@ speed_mpn_divrem_2_inv (struct speed_params *s)
}
double
-speed_mpn_div_qr_1n_pi1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1);
-}
-double
-speed_mpn_div_qr_1n_pi1_1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_1);
-}
-double
-speed_mpn_div_qr_1n_pi1_2 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_2);
-}
-
-double
-speed_mpn_div_qr_1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1);
-}
-
-double
-speed_mpn_div_qr_2n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1);
-}
-double
-speed_mpn_div_qr_2u (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0);
-}
-
-double
speed_mpn_mod_1 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1);
@@ -748,36 +674,6 @@ speed_mpn_preinv_mod_1 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1);
}
-double
-speed_mpn_mod_1_1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
-}
-double
-speed_mpn_mod_1_1_1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
-}
-double
-speed_mpn_mod_1_1_2 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
-}
-double
-speed_mpn_mod_1_2 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
-}
-double
-speed_mpn_mod_1_3 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3);
-}
-double
-speed_mpn_mod_1_4 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4);
-}
double
speed_mpn_divexact_1 (struct speed_params *s)
@@ -797,18 +693,6 @@ speed_mpn_bdiv_dbm1c (struct speed_params *s)
SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c);
}
-double
-speed_mpn_bdiv_q_1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1);
-}
-
-double
-speed_mpn_pi1_bdiv_q_1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1);
-}
-
#if HAVE_NATIVE_mpn_modexact_1_odd
double
speed_mpn_modexact_1_odd (struct speed_params *s)
@@ -823,145 +707,59 @@ speed_mpn_modexact_1c_odd (struct speed_params *s)
SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd);
}
-double
-speed_mpz_mod (struct speed_params *s)
-{
- SPEED_ROUTINE_MPZ_MOD (mpz_mod);
-}
double
-speed_mpn_sbpi1_div_qr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0);
-}
-double
-speed_mpn_dcpi1_div_qr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3);
-}
-double
-speed_mpn_sbpi1_divappr_q (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0);
-}
-double
-speed_mpn_dcpi1_divappr_q (struct speed_params *s)
+speed_mpn_dc_tdiv_qr (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3);
+ SPEED_ROUTINE_MPN_DC_TDIV_QR (mpn_tdiv_qr);
}
double
-speed_mpn_mu_div_qr (struct speed_params *s)
+speed_mpn_dc_divrem_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch);
+ SPEED_ROUTINE_MPN_DC_DIVREM_N (mpn_dc_divrem_n);
}
double
-speed_mpn_mu_divappr_q (struct speed_params *s)
+speed_mpn_dc_divrem_sb (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch);
+ SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn);
}
double
-speed_mpn_mu_div_q (struct speed_params *s)
+speed_mpn_dc_divrem_sb_div (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch);
+ SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_div);
}
double
-speed_mpn_mupi_div_qr (struct speed_params *s)
+speed_mpn_dc_divrem_sb_inv (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch);
+ SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_inv);
}
double
-speed_mpn_sbpi1_bdiv_qr (struct speed_params *s)
+speed_mpn_sb_divrem_m3 (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr);
+ SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn);
}
double
-speed_mpn_dcpi1_bdiv_qr (struct speed_params *s)
+speed_mpn_sb_divrem_m3_div (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr);
+ SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_div);
}
double
-speed_mpn_sbpi1_bdiv_q (struct speed_params *s)
+speed_mpn_sb_divrem_m3_inv (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q);
-}
-double
-speed_mpn_dcpi1_bdiv_q (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q);
-}
-double
-speed_mpn_mu_bdiv_q (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch);
-}
-double
-speed_mpn_mu_bdiv_qr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
-}
-
-double
-speed_mpn_broot (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BROOT (mpn_broot);
-}
-double
-speed_mpn_broot_invm1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
-}
-double
-speed_mpn_brootinv (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+ SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_inv);
}
double
-speed_mpn_binvert (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch);
-}
-
-double
-speed_mpn_invert (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch);
-}
-
-double
-speed_mpn_invertappr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch);
-}
-
-double
-speed_mpn_ni_invertappr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch);
-}
-
-double
-speed_mpn_sec_invert (struct speed_params *s)
+speed_mpz_mod (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_SEC_INVERT (mpn_sec_invert, mpn_sec_invert_itch);
+ SPEED_ROUTINE_MPZ_MOD (mpz_mod);
}
-
double
speed_mpn_redc_1 (struct speed_params *s)
{
SPEED_ROUTINE_REDC_1 (mpn_redc_1);
}
-double
-speed_mpn_redc_2 (struct speed_params *s)
-{
- SPEED_ROUTINE_REDC_2 (mpn_redc_2);
-}
-double
-speed_mpn_redc_n (struct speed_params *s)
-{
- SPEED_ROUTINE_REDC_N (mpn_redc_n);
-}
double
@@ -987,172 +785,28 @@ speed_mpn_sub_n (struct speed_params *s)
SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
}
+#if HAVE_NATIVE_mpn_addsub_n
double
-speed_mpn_add_err1_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
-}
-double
-speed_mpn_sub_err1_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
-}
-double
-speed_mpn_add_err2_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
-}
-double
-speed_mpn_sub_err2_n (struct speed_params *s)
+speed_mpn_addsub_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
-}
-double
-speed_mpn_add_err3_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
-}
-double
-speed_mpn_sub_err3_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
-}
-
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-double
-speed_mpn_add_n_sub_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_addsub_n (ap, sp, s->xp, s->yp, s->size));
}
#endif
-#if HAVE_NATIVE_mpn_addlsh1_n == 1
+#if HAVE_NATIVE_mpn_addlsh1_n
double
speed_mpn_addlsh1_n (struct speed_params *s)
{
SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n);
}
#endif
-#if HAVE_NATIVE_mpn_sublsh1_n == 1
+#if HAVE_NATIVE_mpn_sublsh1_n
double
speed_mpn_sublsh1_n (struct speed_params *s)
{
SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
}
#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip1
-double
-speed_mpn_addlsh1_n_ip1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip2
-double
-speed_mpn_addlsh1_n_ip2 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2);
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh1_n_ip1
-double
-speed_mpn_sublsh1_n_ip1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_rsblsh1_n == 1
-double
-speed_mpn_rsblsh1_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n == 1
-double
-speed_mpn_addlsh2_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n == 1
-double
-speed_mpn_sublsh2_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip1
-double
-speed_mpn_addlsh2_n_ip1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip2
-double
-speed_mpn_addlsh2_n_ip2 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2);
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-double
-speed_mpn_sublsh2_n_ip1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1);
-}
-#endif
-#if HAVE_NATIVE_mpn_rsblsh2_n == 1
-double
-speed_mpn_rsblsh2_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n
-double
-speed_mpn_addlsh_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n
-double
-speed_mpn_sublsh_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip1
-double
-speed_mpn_addlsh_n_ip1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip2
-double
-speed_mpn_addlsh_n_ip2 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n_ip1
-double
-speed_mpn_sublsh_n_ip1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7));
-}
-#endif
-#if HAVE_NATIVE_mpn_rsblsh_n
-double
-speed_mpn_rsblsh_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7));
-}
-#endif
#if HAVE_NATIVE_mpn_rsh1add_n
double
speed_mpn_rsh1add_n (struct speed_params *s)
@@ -1168,58 +822,47 @@ speed_mpn_rsh1sub_n (struct speed_params *s)
}
#endif
-double
-speed_mpn_cnd_add_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_add_n (1, wp, xp, yp, s->size));
-}
-double
-speed_mpn_cnd_sub_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_sub_n (1, wp, xp, yp, s->size));
-}
-
/* mpn_and_n etc can be macros and so have to be handled with
SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
double
speed_mpn_and_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_andn_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size));
+SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_nand_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_ior_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size));
+SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_iorn_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_nior_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_xor_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size));
}
double
speed_mpn_xnor_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size));
}
@@ -1229,9 +872,9 @@ speed_mpn_mul_n (struct speed_params *s)
SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n);
}
double
-speed_mpn_sqr (struct speed_params *s)
+speed_mpn_sqr_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_SQR (mpn_sqr);
+ SPEED_ROUTINE_MPN_SQR (mpn_sqr_n);
}
double
speed_mpn_mul_n_sqr (struct speed_params *s)
@@ -1264,141 +907,29 @@ speed_mpn_sqr_diagonal (struct speed_params *s)
}
#endif
-#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
-double
-speed_mpn_sqr_diag_addlsh1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size));
-}
-#endif
-
-double
-speed_mpn_toom2_sqr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr);
-}
-double
-speed_mpn_toom3_sqr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr);
-}
-double
-speed_mpn_toom4_sqr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr);
-}
-double
-speed_mpn_toom6_sqr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr);
-}
-double
-speed_mpn_toom8_sqr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr);
-}
-double
-speed_mpn_toom22_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul);
-}
-double
-speed_mpn_toom33_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul);
-}
double
-speed_mpn_toom44_mul (struct speed_params *s)
+speed_mpn_kara_mul_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul);
+ SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n);
}
double
-speed_mpn_toom6h_mul (struct speed_params *s)
+speed_mpn_kara_sqr_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul);
-}
-double
-speed_mpn_toom8h_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul);
+ SPEED_ROUTINE_MPN_KARA_SQR_N (mpn_kara_sqr_n);
}
double
-speed_mpn_toom32_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom42_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul);
-}
-double
-speed_mpn_toom43_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom63_mul (struct speed_params *s)
+speed_mpn_toom3_mul_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul);
+ SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n);
}
double
-speed_mpn_toom32_for_toom43_mul (struct speed_params *s)
+speed_mpn_toom3_sqr_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom43_for_toom32_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom32_for_toom53_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul);
-}
-double
-speed_mpn_toom53_for_toom32_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul);
-}
-double
-speed_mpn_toom42_for_toom53_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul);
-}
-double
-speed_mpn_toom53_for_toom42_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
-}
-double
-speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
-}
-double
-speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
+ SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n);
}
double
-speed_mpn_nussbaumer_mul (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MUL_N_CALL
- (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size));
-}
-double
-speed_mpn_nussbaumer_mul_sqr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_SQR_CALL
- (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size));
-}
-
-#if WANT_OLD_FFT_FULL
-double
speed_mpn_mul_fft_full (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MUL_N_CALL
@@ -1410,7 +941,7 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s)
SPEED_ROUTINE_MPN_SQR_CALL
(mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size));
}
-#endif
+
/* These are mod 2^N+1 multiplies and squares. If s->r is supplied it's
used as k, otherwise the best k for the size is used. If s->size isn't a
@@ -1424,31 +955,31 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s)
unsigned i; \
double t; \
TMP_DECL; \
- \
+ \
SPEED_RESTRICT_COND (s->size >= 1); \
- \
+ \
if (s->r != 0) \
k = s->r; \
else \
k = mpn_fft_best_k (s->size, sqr); \
- \
+ \
TMP_MARK; \
pl = mpn_fft_next_size (s->size, k); \
SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp); \
- \
+ \
speed_operand_src (s, s->xp, s->size); \
if (!sqr) \
speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, pl+1); \
speed_cache_fill (s); \
- \
+ \
speed_starttime (); \
i = s->reps; \
do \
call; \
while (--i != 0); \
t = speed_endtime (); \
- \
+ \
TMP_FREE; \
return t; \
}
@@ -1468,174 +999,179 @@ speed_mpn_mul_fft_sqr (struct speed_params *s)
}
double
-speed_mpn_fft_mul (struct speed_params *s)
+speed_mpn_mullow_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size));
+ SPEED_ROUTINE_MPN_MULLOW_N (mpn_mullow_n);
}
-
double
-speed_mpn_fft_sqr (struct speed_params *s)
+speed_mpn_mullow_basecase (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size));
+ SPEED_ROUTINE_MPN_MULLOW_BASECASE (mpn_mullow_basecase);
}
double
-speed_mpn_mullo_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n);
-}
-double
-speed_mpn_mullo_basecase (struct speed_params *s)
+speed_mpn_matrix22_mul (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
-}
+ /* Speed params only includes 2 inputs, so we have to invent the
+ other 6. */
-double
-speed_mpn_mulmid_basecase (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
-}
+ mp_ptr a1, a2, a3;
+ mp_ptr r0, r1, r2, r3;
+ mp_ptr b1, b2, b3;
+ mp_ptr tp;
+ mp_size_t scratch;
+ unsigned i;
+ double t;
+ TMP_DECL;
-double
-speed_mpn_mulmid (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
-}
+ TMP_MARK;
+ SPEED_TMP_ALLOC_LIMBS (a1, s->size, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (a2, s->size, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (a3, s->size, s->align_xp);
-double
-speed_mpn_mulmid_n (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
-}
+ SPEED_TMP_ALLOC_LIMBS (b1, s->size, s->align_yp);
+ SPEED_TMP_ALLOC_LIMBS (b2, s->size, s->align_yp);
+ SPEED_TMP_ALLOC_LIMBS (b3, s->size, s->align_yp);
-double
-speed_mpn_toom42_mulmid (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
-}
+ SPEED_TMP_ALLOC_LIMBS (r0, 2 * s->size +1, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (r1, 2 * s->size +1, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (r2, 2 * s->size +1, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (r3, 2 * s->size +1, s->align_xp);
-double
-speed_mpn_mulmod_bnm1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp));
-}
+ mpn_random (a1, s->size);
+ mpn_random (a2, s->size);
+ mpn_random (a3, s->size);
+ mpn_random (b1, s->size);
+ mpn_random (b2, s->size);
+ mpn_random (b3, s->size);
-double
-speed_mpn_bc_mulmod_bnm1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp));
-}
+ scratch = mpn_matrix22_mul_itch (s->size, s->size);
+ SPEED_TMP_ALLOC_LIMBS (tp, scratch, s->align_wp);
-double
-speed_mpn_mulmod_bnm1_rounded (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1);
-}
-
-double
-speed_mpn_sqrmod_bnm1 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp));
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ MPN_COPY (r0, s->xp, s->size);
+ MPN_COPY (r1, a1, s->size);
+ MPN_COPY (r2, a2, s->size);
+ MPN_COPY (r3, a3, s->size);
+ mpn_matrix22_mul (r0, r1, r2, r3, s->size, s->yp, b1, b2, b3, s->size, tp);
+ }
+ while (--i != 0);
+ t = speed_endtime();
+ TMP_FREE;
+ return t;
}
double
-speed_mpn_matrix22_mul (struct speed_params *s)
+speed_mpn_hgcd (struct speed_params *s)
{
- /* Speed params only includes 2 inputs, so we have to invent the
- other 6. */
+ mp_ptr wp;
+ mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
+ mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
+ mp_ptr ap;
+ mp_ptr bp;
+ mp_ptr tmp1, tmp2;
- mp_ptr a;
- mp_ptr r;
- mp_ptr b;
- mp_ptr tp;
- mp_size_t itch;
+ struct hgcd_matrix hgcd;
+ int res;
unsigned i;
double t;
TMP_DECL;
+ if (s->size < 2)
+ return -1;
+
TMP_MARK;
- SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp);
- SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp);
- MPN_COPY (a, s->xp, s->size);
- mpn_random (a + s->size, 3 * s->size);
- MPN_COPY (b, s->yp, s->size);
- mpn_random (b + s->size, 3 * s->size);
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
- itch = mpn_matrix22_mul_itch (s->size, s->size);
- SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2);
+ s->xp[s->size - 1] |= 1;
+ s->yp[s->size - 1] |= 1;
- speed_operand_src (s, a, 4 * s->size);
- speed_operand_src (s, b, 4 * s->size);
- speed_operand_dst (s, r, 8 * s->size + 4);
- speed_operand_dst (s, tp, itch);
- speed_cache_fill (s);
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
speed_starttime ();
i = s->reps;
do
{
- mp_size_t sz = s->size;
- MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz);
- MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz);
- MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz);
- MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz);
- mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz,
- b, b + 1 * sz, b + 2 * sz, b + 3 * sz, sz,
- tp);
+ MPN_COPY (ap, s->xp, s->size);
+ MPN_COPY (bp, s->yp, s->size);
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
+ res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
}
while (--i != 0);
- t = speed_endtime();
+ t = speed_endtime ();
TMP_FREE;
return t;
}
double
-speed_mpn_hgcd (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
-}
-
-double
speed_mpn_hgcd_lehmer (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
-}
+ mp_ptr wp;
+ mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
+ mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
+ mp_ptr ap;
+ mp_ptr bp;
+ mp_ptr tmp1, tmp2;
-double
-speed_mpn_hgcd_appr (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
-}
+ struct hgcd_matrix hgcd;
+ int res;
+ unsigned i;
+ double t;
+ TMP_DECL;
-double
-speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+ if (s->size < 2)
+ return -1;
+
+ TMP_MARK;
+
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
+
+ s->xp[s->size - 1] |= 1;
+ s->yp[s->size - 1] |= 1;
+
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ MPN_COPY (ap, s->xp, s->size);
+ MPN_COPY (bp, s->yp, s->size);
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
+ res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+ TMP_FREE;
+ return t;
}
double
-speed_mpn_hgcd_reduce (struct speed_params *s)
+speed_mpn_gcd (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+ SPEED_ROUTINE_MPN_GCD (mpn_gcd);
}
+#if 0
double
-speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+speed_mpn_gcd_binary (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+ SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary);
}
double
-speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+speed_mpn_gcd_accel (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
+ SPEED_ROUTINE_MPN_GCD (mpn_gcd_accel);
}
+#endif
-double
-speed_mpn_gcd (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_GCD (mpn_gcd);
-}
double
speed_mpn_gcdext (struct speed_params *s)
@@ -1706,11 +1242,6 @@ speed_mpn_jacobi_base_3 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
}
-double
-speed_mpn_jacobi_base_4 (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);
-}
double
@@ -1776,11 +1307,6 @@ speed_mpz_powm_redc (struct speed_params *s)
SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
}
double
-speed_mpz_powm_sec (struct speed_params *s)
-{
- SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec);
-}
-double
speed_mpz_powm_ui (struct speed_params *s)
{
SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
@@ -1856,12 +1382,12 @@ speed_noop_wxys (struct speed_params *s)
{ \
unsigned i; \
variables; \
- \
+ \
speed_starttime (); \
i = s->reps; \
do \
{ \
- calls; \
+ calls; \
} \
while (--i != 0); \
return speed_endtime (); \
@@ -1876,39 +1402,39 @@ speed_noop_wxys (struct speed_params *s)
double
speed_malloc_free (struct speed_params *s)
{
- size_t bytes = s->size * GMP_LIMB_BYTES;
+ size_t bytes = s->size * BYTES_PER_MP_LIMB;
SPEED_ROUTINE_ALLOC_FREE (void *p,
- p = malloc (bytes);
- free (p));
+ p = malloc (bytes);
+ free (p));
}
double
speed_malloc_realloc_free (struct speed_params *s)
{
- size_t bytes = s->size * GMP_LIMB_BYTES;
+ size_t bytes = s->size * BYTES_PER_MP_LIMB;
SPEED_ROUTINE_ALLOC_FREE (void *p,
- p = malloc (GMP_LIMB_BYTES);
- p = realloc (p, bytes);
- free (p));
+ p = malloc (BYTES_PER_MP_LIMB);
+ p = realloc (p, bytes);
+ free (p));
}
double
speed_gmp_allocate_free (struct speed_params *s)
{
- size_t bytes = s->size * GMP_LIMB_BYTES;
+ size_t bytes = s->size * BYTES_PER_MP_LIMB;
SPEED_ROUTINE_ALLOC_FREE (void *p,
- p = (*__gmp_allocate_func) (bytes);
- (*__gmp_free_func) (p, bytes));
+ p = (*__gmp_allocate_func) (bytes);
+ (*__gmp_free_func) (p, bytes));
}
double
speed_gmp_allocate_reallocate_free (struct speed_params *s)
{
- size_t bytes = s->size * GMP_LIMB_BYTES;
+ size_t bytes = s->size * BYTES_PER_MP_LIMB;
SPEED_ROUTINE_ALLOC_FREE
(void *p,
- p = (*__gmp_allocate_func) (GMP_LIMB_BYTES);
- p = (*__gmp_reallocate_func) (p, bytes, GMP_LIMB_BYTES);
+ p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB);
(*__gmp_free_func) (p, bytes));
}
@@ -1916,38 +1442,38 @@ double
speed_mpz_init_clear (struct speed_params *s)
{
SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
- mpz_init (z);
- mpz_clear (z));
+ mpz_init (z);
+ mpz_clear (z));
}
double
speed_mpz_init_realloc_clear (struct speed_params *s)
{
SPEED_ROUTINE_ALLOC_FREE (mpz_t z,
- mpz_init (z);
- _mpz_realloc (z, s->size);
- mpz_clear (z));
+ mpz_init (z);
+ _mpz_realloc (z, s->size);
+ mpz_clear (z));
}
double
speed_mpq_init_clear (struct speed_params *s)
{
SPEED_ROUTINE_ALLOC_FREE (mpq_t q,
- mpq_init (q);
- mpq_clear (q));
+ mpq_init (q);
+ mpq_clear (q));
}
double
speed_mpf_init_clear (struct speed_params *s)
{
SPEED_ROUTINE_ALLOC_FREE (mpf_t f,
- mpf_init (f);
- mpf_clear (f));
+ mpf_init (f);
+ mpf_clear (f));
}
/* Compare this to mpn_add_n to see how much overhead mpz_add adds. Note
- that repeatedly calling mpz_add with the same data gives branch prediction
+ that repeatedly calling mpz_add with the same data gives branch predition
in it an advantage. */
double
@@ -2011,40 +1537,6 @@ speed_mpz_bin_uiui (struct speed_params *s)
return t;
}
-/* If r==0, calculate binomial(2^size,size),
- otherwise calculate binomial(2^size,r). */
-
-double
-speed_mpz_bin_ui (struct speed_params *s)
-{
- mpz_t w, x;
- unsigned long k;
- unsigned i;
- double t;
-
- mpz_init (w);
- mpz_init_set_ui (x, 0);
-
- mpz_setbit (x, s->size);
-
- if (s->r != 0)
- k = s->r;
- else
- k = s->size;
-
- speed_starttime ();
- i = s->reps;
- do
- {
- mpz_bin_ui (w, x, k);
- }
- while (--i != 0);
- t = speed_endtime ();
-
- mpz_clear (w);
- mpz_clear (x);
- return t;
-}
/* The multiplies are successively dependent so the latency is measured, not
the issue rate. There's only 10 per loop so the code doesn't get too big
@@ -2072,41 +1564,41 @@ speed_mpz_bin_ui (struct speed_params *s)
mp_limb_t h, l; \
unsigned i; \
double t; \
- \
+ \
s->time_divisor = 10; \
- \
+ \
h = s->xp[0]; \
l = s->yp[0]; \
- \
+ \
if (s->r == 1) \
{ \
- speed_starttime (); \
- i = s->reps; \
- do \
- {
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ {
#define SPEED_MACRO_UMUL_PPMM_B \
- } \
- while (--i != 0); \
- t = speed_endtime (); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
} \
else \
{ \
- speed_starttime (); \
- i = s->reps; \
- do \
- {
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ {
#define SPEED_MACRO_UMUL_PPMM_C \
- } \
- while (--i != 0); \
- t = speed_endtime (); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
} \
- \
+ \
/* stop the compiler optimizing away the whole calculation! */ \
noop_1 (h); \
noop_1 (l); \
- \
+ \
return t; \
}
@@ -2242,25 +1734,25 @@ speed_mpn_umul_ppmm_r (struct speed_params *s)
unsigned i; \
mp_limb_t q, r, d; \
mp_limb_t dinv; \
- \
+ \
s->time_divisor = 10; \
- \
+ \
/* divisor from "r" parameter, or a default */ \
d = s->r; \
if (d == 0) \
- d = mp_bases[10].big_base; \
- \
+ d = __mp_bases[10].big_base; \
+ \
if (normalize) \
{ \
- unsigned norm; \
- count_leading_zeros (norm, d); \
- d <<= norm; \
- invert_limb (dinv, d); \
+ unsigned norm; \
+ count_leading_zeros (norm, d); \
+ d <<= norm; \
+ invert_limb (dinv, d); \
} \
- \
+ \
q = s->xp[0]; \
r = s->yp[0] % d; \
- \
+ \
speed_starttime (); \
i = s->reps; \
do \
@@ -2270,11 +1762,11 @@ speed_mpn_umul_ppmm_r (struct speed_params *s)
} \
while (--i != 0); \
t = speed_endtime (); \
- \
+ \
/* stop the compiler optimizing away the whole calculation! */ \
noop_1 (q); \
noop_1 (r); \
- \
+ \
return t; \
}
@@ -2298,6 +1790,44 @@ speed_udiv_qrnnd (struct speed_params *s)
}
double
+speed_udiv_qrnnd_preinv1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_UDIV_QRNND_A (1);
+ {
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
+ }
+ SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
+speed_udiv_qrnnd_preinv2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_UDIV_QRNND_A (1);
+ {
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
+ }
+ SPEED_ROUTINE_UDIV_QRNND_B;
+}
+
+double
speed_udiv_qrnnd_c (struct speed_params *s)
{
SPEED_ROUTINE_UDIV_QRNND_A (1);
@@ -2380,7 +1910,7 @@ speed_operator_div (struct speed_params *s)
/* divisor from "r" parameter, or a default */
d = s->r;
if (d == 0)
- d = mp_bases[10].big_base;
+ d = __mp_bases[10].big_base;
x = s->xp[0];
q = 0;
@@ -2421,7 +1951,7 @@ speed_operator_mod (struct speed_params *s)
/* divisor from "r" parameter, or a default */
d = s->r;
if (d == 0)
- d = mp_bases[10].big_base;
+ d = __mp_bases[10].big_base;
x = s->xp[0];
r = 0;
@@ -2455,12 +1985,12 @@ speed_operator_mod (struct speed_params *s)
be typical for count_trailing_zeros in a GCD etc.
r==1 measures on data with the resultant count uniformly distributed
- between 0 and GMP_LIMB_BITS-1. This is probably sensible for
+ between 0 and BITS_PER_MP_LIMB-1. This is probably sensible for
count_leading_zeros on the high limbs of divisors. */
int
speed_routine_count_zeros_setup (struct speed_params *s,
- mp_ptr xp, int leading, int zero)
+ mp_ptr xp, int leading, int zero)
{
int i, c;
mp_limb_t n;
@@ -2468,26 +1998,26 @@ speed_routine_count_zeros_setup (struct speed_params *s,
if (s->r == 0)
{
/* Make uniformly distributed data. If zero isn't allowed then change
- it to 1 for leading, or 0x800..00 for trailing. */
+ it to 1 for leading, or 0x800..00 for trailing. */
MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE);
if (! zero)
- for (i = 0; i < SPEED_BLOCK_SIZE; i++)
- if (xp[i] == 0)
- xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
+ for (i = 0; i < SPEED_BLOCK_SIZE; i++)
+ if (xp[i] == 0)
+ xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT;
}
else if (s->r == 1)
{
/* Make counts uniformly distributed. A randomly chosen bit is set, and
- for leading the rest above it are cleared, or for trailing then the
- rest below. */
+ for leading the rest above it are cleared, or for trailing then the
+ rest below. */
for (i = 0; i < SPEED_BLOCK_SIZE; i++)
- {
- mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS);
- mp_limb_t keep_below = set-1;
- mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below;
- mp_limb_t keep = (leading ? keep_below : keep_above);
- xp[i] = (s->xp_block[i] & keep) | set;
- }
+ {
+ mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % BITS_PER_MP_LIMB);
+ mp_limb_t keep_below = set-1;
+ mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below;
+ mp_limb_t keep = (leading ? keep_below : keep_above);
+ xp[i] = (s->xp_block[i] & keep) | set;
+ }
}
else
{
@@ -2502,9 +2032,9 @@ speed_routine_count_zeros_setup (struct speed_params *s,
xp[i] ^= c;
if (leading)
- count_leading_zeros (c, n);
+ count_leading_zeros (c, n);
else
- count_trailing_zeros (c, n);
+ count_trailing_zeros (c, n);
}
return 1;
@@ -2620,7 +2150,7 @@ speed_gmp_randseed_ui (struct speed_params *s)
gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]);
j++;
if (j >= SPEED_BLOCK_SIZE)
- j = 0;
+ j = 0;
}
while (--i != 0);
t = speed_endtime ();
diff --git a/gmp/tune/div_qr_1_tune.c b/gmp/tune/div_qr_1_tune.c
deleted file mode 100644
index 7e928dcce9..0000000000
--- a/gmp/tune/div_qr_1_tune.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* mpn/generic/div_qr_1, using tuned threshold and method.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define TUNE_PROGRAM_BUILD 1
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-
-#if !HAVE_NATIVE_mpn_div_qr_1n_pi1
-#define __gmpn_div_qr_1n_pi1 \
- (div_qr_1n_pi1_method == 1 ? mpn_div_qr_1n_pi1_1 : mpn_div_qr_1n_pi1_2)
-#endif
-
-#undef mpn_div_qr_1
-#define mpn_div_qr_1 mpn_div_qr_1_tune
-
-#include "mpn/generic/div_qr_1.c"
diff --git a/gmp/tune/div_qr_1n_pi1_1.c b/gmp/tune/div_qr_1n_pi1_1.c
deleted file mode 100644
index 6dd8ceb438..0000000000
--- a/gmp/tune/div_qr_1n_pi1_1.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* mpn/generic/div_qr_1n_pi1.c method 1.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef DIV_QR_1N_METHOD
-#define DIV_QR_1N_METHOD 1
-#undef mpn_div_qr_1n_pi1
-#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_1
-
-#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp/tune/div_qr_1n_pi1_2.c b/gmp/tune/div_qr_1n_pi1_2.c
deleted file mode 100644
index acc80d4695..0000000000
--- a/gmp/tune/div_qr_1n_pi1_2.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* mpn/generic/div_qr_1n_pi1.c method 2.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef DIV_QR_1N_METHOD
-#define DIV_QR_1N_METHOD 2
-#undef mpn_div_qr_1n_pi1
-#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_2
-
-#include "mpn/generic/div_qr_1n_pi1.c"
diff --git a/gmp/tune/divrem1div.c b/gmp/tune/divrem1div.c
index b680f9d222..5580f80578 100644
--- a/gmp/tune/divrem1div.c
+++ b/gmp/tune/divrem1div.c
@@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define OPERATION_divrem_1
diff --git a/gmp/tune/divrem1inv.c b/gmp/tune/divrem1inv.c
index 598c03c739..73ed57f411 100644
--- a/gmp/tune/divrem1inv.c
+++ b/gmp/tune/divrem1inv.c
@@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define OPERATION_divrem_1
diff --git a/gmp/tune/divrem2div.c b/gmp/tune/divrem2div.c
index cd7f3f5a88..10b50e2f83 100644
--- a/gmp/tune/divrem2div.c
+++ b/gmp/tune/divrem2div.c
@@ -6,28 +6,17 @@ Copyright 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/divrem2inv.c b/gmp/tune/divrem2inv.c
index bd7c4268f7..05644b2560 100644
--- a/gmp/tune/divrem2inv.c
+++ b/gmp/tune/divrem2inv.c
@@ -6,28 +6,17 @@ Copyright 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/freq.c b/gmp/tune/freq.c
index 210f42564e..f1092e2640 100644
--- a/gmp/tune/freq.c
+++ b/gmp/tune/freq.c
@@ -1,32 +1,21 @@
/* CPU frequency determination.
-Copyright 1999-2004 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/* Currently we don't get a CPU frequency on the following systems,
@@ -470,7 +459,7 @@ freq_sunos_sysinfo (int help)
/* "/etc/hw -r cpu" for SCO OpenUnix 8, printing a line like
- The speed of the CPU is approximately 450MHz
+ The speed of the CPU is approximately 450Mhz
*/
static int
freq_sco_etchw (int help)
@@ -491,7 +480,7 @@ freq_sco_etchw (int help)
while (fgets (buf, sizeof (buf), fp) != NULL)
{
end = 0;
- if (sscanf (buf, " The speed of the CPU is approximately %lfMHz%n",
+ if (sscanf (buf, " The speed of the CPU is approximately %lfMhz%n",
&val, &end) == 1 && end != 0)
{
speed_cycletime = 1e-6 / val;
diff --git a/gmp/tune/gcdext_double.c b/gmp/tune/gcdext_double.c
index c72f07ea9f..5470f1aff5 100644
--- a/gmp/tune/gcdext_double.c
+++ b/gmp/tune/gcdext_double.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/gcdext_single.c b/gmp/tune/gcdext_single.c
index 292e9e87e0..1bc47e786e 100644
--- a/gmp/tune/gcdext_single.c
+++ b/gmp/tune/gcdext_single.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/gcdextod.c b/gmp/tune/gcdextod.c
index c08087d480..957864c5e9 100644
--- a/gmp/tune/gcdextod.c
+++ b/gmp/tune/gcdextod.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/gcdextos.c b/gmp/tune/gcdextos.c
index fb8af29279..afde776f7d 100644
--- a/gmp/tune/gcdextos.c
+++ b/gmp/tune/gcdextos.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/hgcd_appr_lehmer.c b/gmp/tune/hgcd_appr_lehmer.c
deleted file mode 100644
index 790e61e3cb..0000000000
--- a/gmp/tune/hgcd_appr_lehmer.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
-
-/*
-Copyright 2010, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef HGCD_APPR_THRESHOLD
-#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
-#define __gmpn_hgcd_appr mpn_hgcd_appr_lehmer
-#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
-
-#include "../mpn/generic/hgcd_appr.c"
diff --git a/gmp/tune/hgcd_lehmer.c b/gmp/tune/hgcd_lehmer.c
deleted file mode 100644
index 11d0ef8821..0000000000
--- a/gmp/tune/hgcd_lehmer.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */
-
-/*
-Copyright 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef HGCD_THRESHOLD
-#define HGCD_THRESHOLD MP_SIZE_T_MAX
-#define __gmpn_hgcd mpn_hgcd_lehmer
-#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch
-
-#include "../mpn/generic/hgcd.c"
diff --git a/gmp/tune/hgcd_reduce_1.c b/gmp/tune/hgcd_reduce_1.c
deleted file mode 100644
index 383c2d7009..0000000000
--- a/gmp/tune/hgcd_reduce_1.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
-
-/*
-Copyright 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef HGCD_REDUCE_THRESHOLD
-#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
-#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1
-#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch
-
-
-#include "../mpn/generic/hgcd_reduce.c"
diff --git a/gmp/tune/hgcd_reduce_2.c b/gmp/tune/hgcd_reduce_2.c
deleted file mode 100644
index ac18b6033a..0000000000
--- a/gmp/tune/hgcd_reduce_2.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
-
-/*
-Copyright 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef HGCD_REDUCE_THRESHOLD
-#define HGCD_REDUCE_THRESHOLD 0
-#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
-#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
-
-#include "../mpn/generic/hgcd_reduce.c"
diff --git a/gmp/tune/hppa.asm b/gmp/tune/hppa.asm
index fc9d62e3b2..e99a399e4a 100644
--- a/gmp/tune/hppa.asm
+++ b/gmp/tune/hppa.asm
@@ -1,32 +1,21 @@
dnl HPPA 32-bit time stamp counter access routine.
dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/hppa2.asm b/gmp/tune/hppa2.asm
index 57ef4c4683..9755c907d2 100644
--- a/gmp/tune/hppa2.asm
+++ b/gmp/tune/hppa2.asm
@@ -1,32 +1,21 @@
dnl HPPA 64-bit time stamp counter access routine.
dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/hppa2w.asm b/gmp/tune/hppa2w.asm
index 215a0cc5c2..ddf0ea9b0a 100644
--- a/gmp/tune/hppa2w.asm
+++ b/gmp/tune/hppa2w.asm
@@ -1,32 +1,21 @@
dnl HPPA 64-bit time stamp counter access routine.
dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/ia64.asm b/gmp/tune/ia64.asm
index 0651111031..ef487db8f6 100644
--- a/gmp/tune/ia64.asm
+++ b/gmp/tune/ia64.asm
@@ -1,32 +1,21 @@
dnl IA-64 time stamp counter access routine.
dnl Copyright 2000, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/jacbase1.c b/gmp/tune/jacbase1.c
index a73df8b723..2a0b859c2c 100644
--- a/gmp/tune/jacbase1.c
+++ b/gmp/tune/jacbase1.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/jacbase2.c b/gmp/tune/jacbase2.c
index b99ebe9061..6bbe7e9e93 100644
--- a/gmp/tune/jacbase2.c
+++ b/gmp/tune/jacbase2.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/jacbase3.c b/gmp/tune/jacbase3.c
index 408b0fed6b..f8f89d49be 100644
--- a/gmp/tune/jacbase3.c
+++ b/gmp/tune/jacbase3.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/jacbase4.c b/gmp/tune/jacbase4.c
deleted file mode 100644
index 70d535240b..0000000000
--- a/gmp/tune/jacbase4.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/* mpn/generic/jacbase.c method 4.
-
-Copyright 2002, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef JACOBI_BASE_METHOD
-#define JACOBI_BASE_METHOD 4
-#define __gmpn_jacobi_base mpn_jacobi_base_4
-
-#include "mpn/generic/jacbase.c"
diff --git a/gmp/tune/many.pl b/gmp/tune/many.pl
index 524a67dd1e..11b5cf4521 100644..100755
--- a/gmp/tune/many.pl
+++ b/gmp/tune/many.pl
@@ -1,32 +1,21 @@
#! /usr/bin/perl -w
-# Copyright 2000-2002 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
# Usage: cd $builddir/tune
@@ -380,14 +369,23 @@ my @table =
},
{
- 'regexp'=> 'add_n_sub_n',
+ 'regexp'=> 'addsub_n',
'ret' => 'mp_limb_t',
'args' => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size',
'speed_flags'=> 'FLAG_R_OPTIONAL',
},
{
- 'regexp'=> 'com|copyi|copyd',
+ 'regexp'=> 'bdivmod',
+ 'ret' => 'mp_limb_t',
+ 'args' => 'mp_ptr qp, mp_ptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize, unsigned long int d',
+ 'carrys'=> [''],
+ 'try' => 'none',
+ 'speed' => 'none',
+ },
+
+ {
+ 'regexp'=> 'com_n|copyi|copyd',
'ret' => 'void',
'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size',
'speed' => 'SPEED_ROUTINE_MPN_COPY',
diff --git a/gmp/tune/mod_1_1-1.c b/gmp/tune/mod_1_1-1.c
deleted file mode 100644
index 7eb7fcdf79..0000000000
--- a/gmp/tune/mod_1_1-1.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn/generic/mod_1_1.c method 1.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef MOD_1_1P_METHOD
-#define MOD_1_1P_METHOD 1
-#undef mpn_mod_1_1p
-#undef mpn_mod_1_1p_cps
-#define mpn_mod_1_1p mpn_mod_1_1p_1
-#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
-
-#include "mpn/generic/mod_1_1.c"
diff --git a/gmp/tune/mod_1_1-2.c b/gmp/tune/mod_1_1-2.c
deleted file mode 100644
index 52ca57749b..0000000000
--- a/gmp/tune/mod_1_1-2.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn/generic/mod_1_1.c method 2.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef MOD_1_1P_METHOD
-#define MOD_1_1P_METHOD 2
-#undef mpn_mod_1_1p
-#undef mpn_mod_1_1p_cps
-#define mpn_mod_1_1p mpn_mod_1_1p_2
-#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
-
-#include "mpn/generic/mod_1_1.c"
diff --git a/gmp/tune/mod_1_div.c b/gmp/tune/mod_1_div.c
index a0663be055..1c01e8c692 100644
--- a/gmp/tune/mod_1_div.c
+++ b/gmp/tune/mod_1_div.c
@@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define OPERATION_mod_1
@@ -35,12 +24,8 @@ see https://www.gnu.org/licenses/. */
#undef MOD_1_NORM_THRESHOLD
#undef MOD_1_UNNORM_THRESHOLD
-#undef MOD_1N_TO_MOD_1_1_THRESHOLD
-#undef MOD_1U_TO_MOD_1_1_THRESHOLD
#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX
#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
#define __gmpn_mod_1 mpn_mod_1_div
#include "mpn/generic/mod_1.c"
diff --git a/gmp/tune/mod_1_inv.c b/gmp/tune/mod_1_inv.c
index 92c936ddcf..448d093f31 100644
--- a/gmp/tune/mod_1_inv.c
+++ b/gmp/tune/mod_1_inv.c
@@ -5,28 +5,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define OPERATION_mod_1
@@ -35,12 +24,8 @@ see https://www.gnu.org/licenses/. */
#undef MOD_1_NORM_THRESHOLD
#undef MOD_1_UNNORM_THRESHOLD
-#undef MOD_1N_TO_MOD_1_1_THRESHOLD
-#undef MOD_1U_TO_MOD_1_1_THRESHOLD
#define MOD_1_NORM_THRESHOLD 0
#define MOD_1_UNNORM_THRESHOLD 0
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
#define __gmpn_mod_1 mpn_mod_1_inv
#include "mpn/generic/mod_1.c"
diff --git a/gmp/tune/modlinv.c b/gmp/tune/modlinv.c
index e3f2063e07..3ed0c8a03f 100644
--- a/gmp/tune/modlinv.c
+++ b/gmp/tune/modlinv.c
@@ -6,28 +6,17 @@ Copyright 2000, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include "gmp.h"
@@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/. */
dependent chain, whereas the "2*" in the standard version isn't.
Depending on the CPU this should be the same or a touch slower. */
-#if GMP_LIMB_BITS <= 32
+#if BITS_PER_MP_LIMB <= 32
#define binvert_limb_mul1(inv,n) \
do { \
mp_limb_t __n = (n); \
@@ -55,7 +44,7 @@ see https://www.gnu.org/licenses/. */
} while (0)
#endif
-#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
+#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
#define binvert_limb_mul1(inv,n) \
do { \
mp_limb_t __n = (n); \
@@ -111,7 +100,7 @@ see https://www.gnu.org/licenses/. */
\
ASSERT ((__n & 1) == 1); \
\
- __count = GMP_LIMB_BITS-1; \
+ __count = BITS_PER_MP_LIMB-1; \
do \
{ \
__inv >>= 1; \
@@ -142,11 +131,11 @@ see https://www.gnu.org/licenses/. */
\
ASSERT ((__n & 1) == 1); \
\
- __count = GMP_LIMB_BITS-1; \
+ __count = BITS_PER_MP_LIMB-1; \
do \
{ \
__lowbit = __rem & 1; \
- __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1)); \
+ __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1)); \
__rem = (__rem - (__n & -__lowbit)) >> 1; \
} \
while (-- __count); \
diff --git a/gmp/tune/noop.c b/gmp/tune/noop.c
index 5c13c96ee3..7c7f1b5fe6 100644
--- a/gmp/tune/noop.c
+++ b/gmp/tune/noop.c
@@ -9,28 +9,17 @@ Copyright 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/pentium.asm b/gmp/tune/pentium.asm
index fb1e8332c8..369a8ea76f 100644
--- a/gmp/tune/pentium.asm
+++ b/gmp/tune/pentium.asm
@@ -1,32 +1,21 @@
dnl x86 pentium time stamp counter access routine.
dnl Copyright 1999, 2000, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/powerpc.asm b/gmp/tune/powerpc.asm
index 2f4ac27bea..19ab901386 100644
--- a/gmp/tune/powerpc.asm
+++ b/gmp/tune/powerpc.asm
@@ -1,32 +1,21 @@
dnl PowerPC mftb_function -- read time base registers.
dnl Copyright 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundationn; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -43,7 +32,7 @@ L(again):
mftbu r4
mftb r5
mftbu r6
- cmpw cr0, r4, r6
+ cmp cr0, r4, r6
bne L(again)
stw r5, 0(r3)
diff --git a/gmp/tune/powerpc64.asm b/gmp/tune/powerpc64.asm
index 1ade99638a..eb705466da 100644
--- a/gmp/tune/powerpc64.asm
+++ b/gmp/tune/powerpc64.asm
@@ -1,32 +1,21 @@
dnl PowerPC mftb_function -- read time base registers, 64-bit integer.
-dnl Copyright 2002-2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundationn; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/powm_mod.c b/gmp/tune/powm_mod.c
index 7c20f53e70..e65f512e85 100644
--- a/gmp/tune/powm_mod.c
+++ b/gmp/tune/powm_mod.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/powm_redc.c b/gmp/tune/powm_redc.c
index c34bb2e0a5..a9e4bb502a 100644
--- a/gmp/tune/powm_redc.c
+++ b/gmp/tune/powm_redc.c
@@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/pre_divrem_1.c b/gmp/tune/pre_divrem_1.c
index 388ca4150a..2b3fb79e22 100644
--- a/gmp/tune/pre_divrem_1.c
+++ b/gmp/tune/pre_divrem_1.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/tune/sb_div.c b/gmp/tune/sb_div.c
new file mode 100644
index 0000000000..4d52a24ce3
--- /dev/null
+++ b/gmp/tune/sb_div.c
@@ -0,0 +1,30 @@
+/* mpn/generic/sb_divrem_mn.c forced to use plain udiv_qrnnd. */
+
+/*
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef DIV_SB_PREINV_THRESHOLD
+#undef DIV_SB_PREINV_THRESHOLD
+#endif
+#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_sb_divrem_mn mpn_sb_divrem_mn_div
+
+#include "mpn/generic/sb_divrem_mn.c"
diff --git a/gmp/tune/sb_inv.c b/gmp/tune/sb_inv.c
new file mode 100644
index 0000000000..f8d798b43b
--- /dev/null
+++ b/gmp/tune/sb_inv.c
@@ -0,0 +1,30 @@
+/* mpn/generic/sb_divrem_mn.c forced to use udiv_qrnnd_preinv. */
+
+/*
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifdef DIV_SB_PREINV_THRESHOLD
+#undef DIV_SB_PREINV_THRESHOLD
+#endif
+#define DIV_SB_PREINV_THRESHOLD 0
+#define __gmpn_sb_divrem_mn mpn_sb_divrem_mn_inv
+
+#include "mpn/generic/sb_divrem_mn.c"
diff --git a/gmp/tune/set_strb.c b/gmp/tune/set_strb.c
index 842ec4cd44..c67b09ccaa 100644
--- a/gmp/tune/set_strb.c
+++ b/gmp/tune/set_strb.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define __gmpn_set_str mpn_set_str_basecase
#define __gmpn_bc_set_str mpn_bc_set_str_basecase
diff --git a/gmp/tune/set_strp.c b/gmp/tune/set_strp.c
index 5520f28696..701ab2bf2b 100644
--- a/gmp/tune/set_strp.c
+++ b/gmp/tune/set_strp.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define TUNE_PROGRAM_BUILD 1 /* for gmp-impl.h */
diff --git a/gmp/tune/set_strs.c b/gmp/tune/set_strs.c
index 75b6f39b4d..d8edc7dfde 100644
--- a/gmp/tune/set_strs.c
+++ b/gmp/tune/set_strs.c
@@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#define __gmpn_set_str mpn_set_str_subquad
#define __gmpn_bc_set_str mpn_bc_set_str_subquad
diff --git a/gmp/tune/sparcv9.asm b/gmp/tune/sparcv9.asm
index f0981c70fe..b951ff3de2 100644
--- a/gmp/tune/sparcv9.asm
+++ b/gmp/tune/sparcv9.asm
@@ -1,32 +1,21 @@
dnl Sparc v9 32-bit time stamp counter access routine.
dnl Copyright 2000, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/tune/speed-ext.c b/gmp/tune/speed-ext.c
index e7fb8b9f60..04760a5eab 100644
--- a/gmp/tune/speed-ext.c
+++ b/gmp/tune/speed-ext.c
@@ -5,28 +5,17 @@ Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/* The extension here is three versions of an mpn arithmetic mean. These
@@ -68,9 +57,9 @@ see https://www.gnu.org/licenses/. */
#define SPEED_EXTRA_PROTOS \
- double speed_mean_calls (struct speed_params *s); \
- double speed_mean_open (struct speed_params *s); \
- double speed_mean_open2 (struct speed_params *s);
+ double speed_mean_calls __GMP_PROTO ((struct speed_params *s)); \
+ double speed_mean_open __GMP_PROTO ((struct speed_params *s)); \
+ double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
#define SPEED_EXTRA_ROUTINES \
{ "mean_calls", speed_mean_calls }, \
@@ -93,8 +82,8 @@ mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
ASSERT (size >= 1);
c = mpn_add_n (wp, xp, yp, size);
- ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
- wp[size-1] |= (c << (GMP_LIMB_BITS-1));
+ ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
+ wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
return ret;
}
@@ -118,7 +107,7 @@ mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
c = (wprev < x);
ret = (wprev & 1);
-#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
+#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
for (i = 1; i < size; i++)
{
diff --git a/gmp/tune/speed.c b/gmp/tune/speed.c
index 12d53bcaa3..2bb7f7d933 100644
--- a/gmp/tune/speed.c
+++ b/gmp/tune/speed.c
@@ -1,32 +1,21 @@
/* Speed measuring program.
-Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/* Usage message is in the code below, run with no arguments to print it.
See README for interesting applications.
@@ -100,10 +89,10 @@ SPEED_EXTRA_PROTOS2
} while (0)
-#if GMP_LIMB_BITS == 32
+#if BITS_PER_MP_LIMB == 32
#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
#endif
-#if GMP_LIMB_BITS == 64
+#if BITS_PER_MP_LIMB == 64
#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
#endif
@@ -163,15 +152,8 @@ const struct routine_t {
{ "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL },
{ "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL },
- { "mpn_add_err1_n", speed_mpn_add_err1_n },
- { "mpn_add_err2_n", speed_mpn_add_err2_n },
- { "mpn_add_err3_n", speed_mpn_add_err3_n },
- { "mpn_sub_err1_n", speed_mpn_sub_err1_n },
- { "mpn_sub_err2_n", speed_mpn_sub_err2_n },
- { "mpn_sub_err3_n", speed_mpn_sub_err3_n },
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
- { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_addsub_n
+ { "mpn_addsub_n", speed_mpn_addsub_n, FLAG_R_OPTIONAL },
#endif
{ "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R },
@@ -208,12 +190,6 @@ const struct routine_t {
#if HAVE_NATIVE_mpn_mul_4
{ "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL },
#endif
-#if HAVE_NATIVE_mpn_mul_5
- { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_mul_6
- { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL },
-#endif
{ "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
{ "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
@@ -229,13 +205,6 @@ const struct routine_t {
{ "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
{ "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R },
- { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R },
- { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R },
- { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R },
- { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R },
- { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R },
- { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R },
-
{ "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R },
{ "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R },
{ "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
@@ -247,19 +216,9 @@ const struct routine_t {
{ "mpn_divrem_2_div", speed_mpn_divrem_2_div, },
{ "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, },
- { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R },
- { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R },
- { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R },
- { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R },
-
- { "mpn_div_qr_2n", speed_mpn_div_qr_2n, },
- { "mpn_div_qr_2u", speed_mpn_div_qr_2u, },
-
{ "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R },
{ "mpn_divexact_by3", speed_mpn_divexact_by3 },
- { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R },
- { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL },
{ "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL },
#if HAVE_NATIVE_mpn_modexact_1_odd
@@ -271,8 +230,17 @@ const struct routine_t {
{ "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 },
#endif
+ { "mpn_dc_tdiv_qr", speed_mpn_dc_tdiv_qr },
+ { "mpn_dc_divrem_n", speed_mpn_dc_divrem_n },
+ { "mpn_dc_divrem_sb", speed_mpn_dc_divrem_sb },
+ { "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div },
+ { "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv },
+
+ { "mpn_sb_divrem_m3", speed_mpn_sb_divrem_m3 },
+ { "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div },
+ { "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv },
+
{ "mpn_lshift", speed_mpn_lshift, FLAG_R },
- { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R },
{ "mpn_rshift", speed_mpn_rshift, FLAG_R },
{ "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL },
@@ -283,7 +251,7 @@ const struct routine_t {
{ "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL },
{ "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL },
{ "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL },
- { "mpn_com", speed_mpn_com },
+ { "mpn_com_n", speed_mpn_com_n },
{ "mpn_popcount", speed_mpn_popcount },
{ "mpn_hamdist", speed_mpn_hamdist },
@@ -292,17 +260,16 @@ const struct routine_t {
{ "mpn_hgcd", speed_mpn_hgcd },
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
- { "mpn_hgcd_appr", speed_mpn_hgcd_appr },
- { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
-
- { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
- { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
- { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 },
{ "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
{ "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
{ "mpn_gcd", speed_mpn_gcd },
+#if 0
+ { "mpn_gcd_binary", speed_mpn_gcd_binary },
+ { "mpn_gcd_accel", speed_mpn_gcd_accel },
+ { "find_a", speed_find_a, FLAG_NODATA },
+#endif
{ "mpn_gcdext", speed_mpn_gcdext },
{ "mpn_gcdext_single", speed_mpn_gcdext_single },
@@ -317,7 +284,6 @@ const struct routine_t {
{ "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 },
{ "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 },
{ "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 },
- { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 },
{ "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL },
{ "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
@@ -325,74 +291,27 @@ const struct routine_t {
#if HAVE_NATIVE_mpn_sqr_diagonal
{ "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
#endif
-#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
- { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
-#endif
{ "mpn_mul_n", speed_mpn_mul_n },
- { "mpn_sqr", speed_mpn_sqr },
-
- { "mpn_toom2_sqr", speed_mpn_toom2_sqr },
- { "mpn_toom3_sqr", speed_mpn_toom3_sqr },
- { "mpn_toom4_sqr", speed_mpn_toom4_sqr },
- { "mpn_toom6_sqr", speed_mpn_toom6_sqr },
- { "mpn_toom8_sqr", speed_mpn_toom8_sqr },
- { "mpn_toom22_mul", speed_mpn_toom22_mul },
- { "mpn_toom33_mul", speed_mpn_toom33_mul },
- { "mpn_toom44_mul", speed_mpn_toom44_mul },
- { "mpn_toom6h_mul", speed_mpn_toom6h_mul },
- { "mpn_toom8h_mul", speed_mpn_toom8h_mul },
- { "mpn_toom32_mul", speed_mpn_toom32_mul },
- { "mpn_toom42_mul", speed_mpn_toom42_mul },
- { "mpn_toom43_mul", speed_mpn_toom43_mul },
- { "mpn_toom63_mul", speed_mpn_toom63_mul },
- { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul },
- { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
-#if WANT_OLD_FFT_FULL
+ { "mpn_sqr", speed_mpn_sqr_n },
+
+ { "mpn_kara_mul_n", speed_mpn_kara_mul_n },
+ { "mpn_kara_sqr_n", speed_mpn_kara_sqr_n },
+ { "mpn_toom3_mul_n", speed_mpn_toom3_mul_n },
+ { "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n },
{ "mpn_mul_fft_full", speed_mpn_mul_fft_full },
{ "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
-#endif
+
{ "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
{ "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
- { "mpn_mullo_n", speed_mpn_mullo_n },
- { "mpn_mullo_basecase", speed_mpn_mullo_basecase },
-
- { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
- { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid },
- { "mpn_mulmid_n", speed_mpn_mulmid_n },
- { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL },
-
- { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 },
- { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 },
- { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
- { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 },
-
- { "mpn_invert", speed_mpn_invert },
- { "mpn_invertappr", speed_mpn_invertappr },
- { "mpn_ni_invertappr", speed_mpn_ni_invertappr },
- { "mpn_binvert", speed_mpn_binvert },
- { "mpn_sec_invert", speed_mpn_sec_invert },
-
- { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL},
- { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL},
- { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL},
- { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL},
- { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
- { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
-
- { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr },
- { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr },
- { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q },
- { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q },
-
- { "mpn_broot", speed_mpn_broot, FLAG_R },
- { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R },
- { "mpn_brootinv", speed_mpn_brootinv, FLAG_R },
-
- { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
- { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
- { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
+ { "mpn_mullow_n", speed_mpn_mullow_n },
+ { "mpn_mullow_basecase", speed_mpn_mullow_basecase},
+
+ { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
+
+ { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
+ { "mpn_set_str_basecase",speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
{ "mpn_sqrtrem", speed_mpn_sqrtrem },
{ "mpn_rootrem", speed_mpn_rootrem, FLAG_R },
@@ -405,18 +324,14 @@ const struct routine_t {
{ "mpz_add", speed_mpz_add },
{ "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
- { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL },
{ "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA },
{ "mpz_powm", speed_mpz_powm },
{ "mpz_powm_mod", speed_mpz_powm_mod },
{ "mpz_powm_redc", speed_mpz_powm_redc },
- { "mpz_powm_sec", speed_mpz_powm_sec },
{ "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL },
{ "mpz_mod", speed_mpz_mod },
{ "mpn_redc_1", speed_mpn_redc_1 },
- { "mpn_redc_2", speed_mpn_redc_2 },
- { "mpn_redc_n", speed_mpn_redc_n },
{ "MPN_COPY", speed_MPN_COPY },
{ "MPN_COPY_INCR", speed_MPN_COPY_INCR },
@@ -428,71 +343,19 @@ const struct routine_t {
#if HAVE_NATIVE_mpn_copyd
{ "mpn_copyd", speed_mpn_copyd },
#endif
- { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL },
-#if HAVE_NATIVE_mpn_addlsh1_n == 1
- { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_sublsh1_n == 1
- { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip1
- { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 },
-#endif
-#if HAVE_NATIVE_mpn_addlsh1_n_ip2
- { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 },
-#endif
-#if HAVE_NATIVE_mpn_sublsh1_n_ip1
- { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 },
-#endif
-#if HAVE_NATIVE_mpn_rsblsh1_n == 1
- { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n == 1
- { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n == 1
- { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip1
- { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 },
+#if HAVE_NATIVE_mpn_addlsh1_n
+ { "mpn_addlsh1_n", speed_mpn_addlsh1_n },
#endif
-#if HAVE_NATIVE_mpn_addlsh2_n_ip2
- { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 },
-#endif
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
- { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 },
-#endif
-#if HAVE_NATIVE_mpn_rsblsh2_n == 1
- { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n
- { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n
- { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip1
- { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 },
-#endif
-#if HAVE_NATIVE_mpn_addlsh_n_ip2
- { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 },
-#endif
-#if HAVE_NATIVE_mpn_sublsh_n_ip1
- { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 },
-#endif
-#if HAVE_NATIVE_mpn_rsblsh_n
- { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
+#if HAVE_NATIVE_mpn_sublsh1_n
+ { "mpn_sublsh1_n", speed_mpn_sublsh1_n },
#endif
#if HAVE_NATIVE_mpn_rsh1add_n
- { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
+ { "mpn_rsh1add_n", speed_mpn_rsh1add_n },
#endif
#if HAVE_NATIVE_mpn_rsh1sub_n
- { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
+ { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n },
#endif
- { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL },
- { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL },
-
{ "MPN_ZERO", speed_MPN_ZERO },
{ "binvert_limb", speed_binvert_limb, FLAG_NODATA },
@@ -522,6 +385,8 @@ const struct routine_t {
{ "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
{ "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
+ { "udiv_qrnnd_preinv1", speed_udiv_qrnnd_preinv1, FLAG_R_OPTIONAL },
+ { "udiv_qrnnd_preinv2", speed_udiv_qrnnd_preinv2, FLAG_R_OPTIONAL },
{ "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL },
#if HAVE_NATIVE_mpn_udiv_qrnnd
{ "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
@@ -884,7 +749,7 @@ run_gnuplot (int argc, char *argv[])
fprintf (fp, "set key left\n");
/* designed to make it possible to see crossovers easily */
- fprintf (fp, "set style data lines\n");
+ fprintf (fp, "set data style lines\n");
fprintf (fp, "plot ");
for (i = 0; i < num_choices; i++)
@@ -914,7 +779,7 @@ run_gnuplot (int argc, char *argv[])
/* Return a limb with n many one bits (starting from the least significant) */
#define LIMB_ONES(n) \
- ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \
+ ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX \
: (n) == 0 ? CNST_LIMB(0) \
: (CNST_LIMB(1) << (n)) - 1)
@@ -941,7 +806,7 @@ r_string (const char *s)
{
if (siz > 1 || siz < -1)
printf ("Warning, r parameter %s truncated to %d bits\n",
- s_orig, GMP_LIMB_BITS);
+ s_orig, BITS_PER_MP_LIMB);
return l;
}
}
@@ -954,10 +819,10 @@ r_string (const char *s)
if (strcmp (s, "bits") == 0)
{
mp_limb_t l;
- if (n > GMP_LIMB_BITS)
+ if (n > BITS_PER_MP_LIMB)
{
fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
- n, GMP_LIMB_BITS);
+ n, BITS_PER_MP_LIMB);
exit (1);
}
mpn_random (&l, 1);
@@ -965,10 +830,10 @@ r_string (const char *s)
}
else if (strcmp (s, "ones") == 0)
{
- if (n > GMP_LIMB_BITS)
+ if (n > BITS_PER_MP_LIMB)
{
fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
- n, GMP_LIMB_BITS);
+ n, BITS_PER_MP_LIMB);
exit (1);
}
return LIMB_ONES (n);
diff --git a/gmp/tune/speed.h b/gmp/tune/speed.h
index d9474adb35..ca021409ef 100644
--- a/gmp/tune/speed.h
+++ b/gmp/tune/speed.h
@@ -1,32 +1,22 @@
/* Header for speed and threshold things.
-Copyright 1999-2003, 2005, 2006, 2008-2013 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#ifndef __SPEED_H__
#define __SPEED_H__
@@ -41,9 +31,9 @@ see https://www.gnu.org/licenses/. */
} while (0)
/* A mask of the least significant n bits. Note 1<<32 doesn't give zero on
- x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */
+ x86 family CPUs, hence the separate case for BITS_PER_MP_LIMB. */
#define MP_LIMB_T_LOWBITMASK(n) \
- ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
+ ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1)
/* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */
@@ -66,7 +56,7 @@ see https://www.gnu.org/licenses/. */
*/
#define CACHE_LINE_SIZE 64 /* bytes */
-#define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/GMP_LIMB_BYTES - 1)
+#define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/BYTES_PER_MP_LIMB - 1)
/* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb
alignment. */
@@ -75,7 +65,7 @@ see https://www.gnu.org/licenses/. */
mp_ptr __ptr; \
mp_size_t __ptr_align, __ptr_add; \
\
- ASSERT ((CACHE_LINE_SIZE % GMP_LIMB_BYTES) == 0); \
+ ASSERT ((CACHE_LINE_SIZE % BYTES_PER_MP_LIMB) == 0); \
__ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK); \
__ptr_align = (__ptr - (mp_ptr) NULL); \
__ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK; \
@@ -97,13 +87,13 @@ extern double speed_unittime;
extern double speed_cycletime;
extern int speed_precision;
extern char speed_time_string[];
-void speed_time_init (void);
-void speed_cycletime_fail (const char *str);
-void speed_cycletime_init (void);
-void speed_cycletime_need_cycles (void);
-void speed_cycletime_need_seconds (void);
-void speed_starttime (void);
-double speed_endtime (void);
+void speed_time_init __GMP_PROTO ((void));
+void speed_cycletime_fail __GMP_PROTO ((const char *str));
+void speed_cycletime_init __GMP_PROTO ((void));
+void speed_cycletime_need_cycles __GMP_PROTO ((void));
+void speed_cycletime_need_seconds __GMP_PROTO ((void));
+void speed_starttime __GMP_PROTO ((void));
+double speed_endtime __GMP_PROTO ((void));
struct speed_params {
@@ -127,304 +117,203 @@ struct speed_params {
struct {
mp_ptr ptr;
mp_size_t size;
- } src[5], dst[4];
+ } src[2], dst[3];
};
-typedef double (*speed_function_t) (struct speed_params *);
+typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
-double speed_measure (speed_function_t fun, struct speed_params *);
+double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s));
/* Prototypes for speed measuring routines */
-double speed_back_to_back (struct speed_params *);
-double speed_count_leading_zeros (struct speed_params *);
-double speed_count_trailing_zeros (struct speed_params *);
-double speed_find_a (struct speed_params *);
-double speed_gmp_allocate_free (struct speed_params *);
-double speed_gmp_allocate_reallocate_free (struct speed_params *);
-double speed_invert_limb (struct speed_params *);
-double speed_malloc_free (struct speed_params *);
-double speed_malloc_realloc_free (struct speed_params *);
-double speed_memcpy (struct speed_params *);
-double speed_binvert_limb (struct speed_params *);
-double speed_binvert_limb_mul1 (struct speed_params *);
-double speed_binvert_limb_loop (struct speed_params *);
-double speed_binvert_limb_cond (struct speed_params *);
-double speed_binvert_limb_arith (struct speed_params *);
-
-double speed_mpf_init_clear (struct speed_params *);
-
-double speed_mpn_add_n (struct speed_params *);
-double speed_mpn_add_err1_n (struct speed_params *);
-double speed_mpn_add_err2_n (struct speed_params *);
-double speed_mpn_add_err3_n (struct speed_params *);
-double speed_mpn_addlsh_n (struct speed_params *);
-double speed_mpn_addlsh1_n (struct speed_params *);
-double speed_mpn_addlsh2_n (struct speed_params *);
-double speed_mpn_addlsh_n_ip1 (struct speed_params *);
-double speed_mpn_addlsh1_n_ip1 (struct speed_params *);
-double speed_mpn_addlsh2_n_ip1 (struct speed_params *);
-double speed_mpn_addlsh_n_ip2 (struct speed_params *);
-double speed_mpn_addlsh1_n_ip2 (struct speed_params *);
-double speed_mpn_addlsh2_n_ip2 (struct speed_params *);
-double speed_mpn_add_n_sub_n (struct speed_params *);
-double speed_mpn_and_n (struct speed_params *);
-double speed_mpn_andn_n (struct speed_params *);
-double speed_mpn_addmul_1 (struct speed_params *);
-double speed_mpn_addmul_2 (struct speed_params *);
-double speed_mpn_addmul_3 (struct speed_params *);
-double speed_mpn_addmul_4 (struct speed_params *);
-double speed_mpn_addmul_5 (struct speed_params *);
-double speed_mpn_addmul_6 (struct speed_params *);
-double speed_mpn_addmul_7 (struct speed_params *);
-double speed_mpn_addmul_8 (struct speed_params *);
-double speed_mpn_cnd_add_n (struct speed_params *);
-double speed_mpn_cnd_sub_n (struct speed_params *);
-double speed_mpn_com (struct speed_params *);
-double speed_mpn_copyd (struct speed_params *);
-double speed_mpn_copyi (struct speed_params *);
-double speed_MPN_COPY (struct speed_params *);
-double speed_MPN_COPY_DECR (struct speed_params *);
-double speed_MPN_COPY_INCR (struct speed_params *);
-double speed_mpn_sec_tabselect (struct speed_params *);
-double speed_mpn_divexact_1 (struct speed_params *);
-double speed_mpn_divexact_by3 (struct speed_params *);
-double speed_mpn_bdiv_q_1 (struct speed_params *);
-double speed_mpn_pi1_bdiv_q_1 (struct speed_params *);
-double speed_mpn_bdiv_dbm1c (struct speed_params *);
-double speed_mpn_divrem_1 (struct speed_params *);
-double speed_mpn_divrem_1f (struct speed_params *);
-double speed_mpn_divrem_1c (struct speed_params *);
-double speed_mpn_divrem_1cf (struct speed_params *);
-double speed_mpn_divrem_1_div (struct speed_params *);
-double speed_mpn_divrem_1f_div (struct speed_params *);
-double speed_mpn_divrem_1_inv (struct speed_params *);
-double speed_mpn_divrem_1f_inv (struct speed_params *);
-double speed_mpn_divrem_2 (struct speed_params *);
-double speed_mpn_divrem_2_div (struct speed_params *);
-double speed_mpn_divrem_2_inv (struct speed_params *);
-double speed_mpn_div_qr_1n_pi1 (struct speed_params *);
-double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *);
-double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *);
-double speed_mpn_div_qr_1 (struct speed_params *);
-double speed_mpn_div_qr_2n (struct speed_params *);
-double speed_mpn_div_qr_2u (struct speed_params *);
-double speed_mpn_fib2_ui (struct speed_params *);
-double speed_mpn_matrix22_mul (struct speed_params *);
-double speed_mpn_hgcd (struct speed_params *);
-double speed_mpn_hgcd_lehmer (struct speed_params *);
-double speed_mpn_hgcd_appr (struct speed_params *);
-double speed_mpn_hgcd_appr_lehmer (struct speed_params *);
-double speed_mpn_hgcd_reduce (struct speed_params *);
-double speed_mpn_hgcd_reduce_1 (struct speed_params *);
-double speed_mpn_hgcd_reduce_2 (struct speed_params *);
-double speed_mpn_gcd (struct speed_params *);
-double speed_mpn_gcd_1 (struct speed_params *);
-double speed_mpn_gcd_1N (struct speed_params *);
-double speed_mpn_gcdext (struct speed_params *);
-double speed_mpn_gcdext_double (struct speed_params *);
-double speed_mpn_gcdext_one_double (struct speed_params *);
-double speed_mpn_gcdext_one_single (struct speed_params *);
-double speed_mpn_gcdext_single (struct speed_params *);
-double speed_mpn_get_str (struct speed_params *);
-double speed_mpn_hamdist (struct speed_params *);
-double speed_mpn_ior_n (struct speed_params *);
-double speed_mpn_iorn_n (struct speed_params *);
-double speed_mpn_jacobi_base (struct speed_params *);
-double speed_mpn_jacobi_base_1 (struct speed_params *);
-double speed_mpn_jacobi_base_2 (struct speed_params *);
-double speed_mpn_jacobi_base_3 (struct speed_params *);
-double speed_mpn_jacobi_base_4 (struct speed_params *);
-double speed_mpn_lshift (struct speed_params *);
-double speed_mpn_lshiftc (struct speed_params *);
-double speed_mpn_mod_1 (struct speed_params *);
-double speed_mpn_mod_1c (struct speed_params *);
-double speed_mpn_mod_1_div (struct speed_params *);
-double speed_mpn_mod_1_inv (struct speed_params *);
-double speed_mpn_mod_1_1 (struct speed_params *);
-double speed_mpn_mod_1_1_1 (struct speed_params *);
-double speed_mpn_mod_1_1_2 (struct speed_params *);
-double speed_mpn_mod_1_2 (struct speed_params *);
-double speed_mpn_mod_1_3 (struct speed_params *);
-double speed_mpn_mod_1_4 (struct speed_params *);
-double speed_mpn_mod_34lsub1 (struct speed_params *);
-double speed_mpn_modexact_1_odd (struct speed_params *);
-double speed_mpn_modexact_1c_odd (struct speed_params *);
-double speed_mpn_mul_1 (struct speed_params *);
-double speed_mpn_mul_1_inplace (struct speed_params *);
-double speed_mpn_mul_2 (struct speed_params *);
-double speed_mpn_mul_3 (struct speed_params *);
-double speed_mpn_mul_4 (struct speed_params *);
-double speed_mpn_mul_5 (struct speed_params *);
-double speed_mpn_mul_6 (struct speed_params *);
-double speed_mpn_mul (struct speed_params *);
-double speed_mpn_mul_basecase (struct speed_params *);
-double speed_mpn_mulmid (struct speed_params *);
-double speed_mpn_mulmid_basecase (struct speed_params *);
-double speed_mpn_mul_fft (struct speed_params *);
-double speed_mpn_mul_fft_sqr (struct speed_params *);
-double speed_mpn_fft_mul (struct speed_params *);
-double speed_mpn_fft_sqr (struct speed_params *);
-#if WANT_OLD_FFT_FULL
-double speed_mpn_mul_fft_full (struct speed_params *);
-double speed_mpn_mul_fft_full_sqr (struct speed_params *);
-#endif
-double speed_mpn_nussbaumer_mul (struct speed_params *);
-double speed_mpn_nussbaumer_mul_sqr (struct speed_params *);
-double speed_mpn_mul_n (struct speed_params *);
-double speed_mpn_mul_n_sqr (struct speed_params *);
-double speed_mpn_mulmid_n (struct speed_params *);
-double speed_mpn_mullo_n (struct speed_params *);
-double speed_mpn_mullo_basecase (struct speed_params *);
-double speed_mpn_nand_n (struct speed_params *);
-double speed_mpn_nior_n (struct speed_params *);
-double speed_mpn_popcount (struct speed_params *);
-double speed_mpn_preinv_divrem_1 (struct speed_params *);
-double speed_mpn_preinv_divrem_1f (struct speed_params *);
-double speed_mpn_preinv_mod_1 (struct speed_params *);
-double speed_mpn_sbpi1_div_qr (struct speed_params *);
-double speed_mpn_dcpi1_div_qr (struct speed_params *);
-double speed_mpn_sbpi1_divappr_q (struct speed_params *);
-double speed_mpn_dcpi1_divappr_q (struct speed_params *);
-double speed_mpn_mu_div_qr (struct speed_params *);
-double speed_mpn_mu_divappr_q (struct speed_params *);
-double speed_mpn_mupi_div_qr (struct speed_params *);
-double speed_mpn_mu_div_q (struct speed_params *);
-double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
-double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
-double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
-double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
-double speed_mpn_mu_bdiv_q (struct speed_params *);
-double speed_mpn_mu_bdiv_qr (struct speed_params *);
-double speed_mpn_broot (struct speed_params *);
-double speed_mpn_broot_invm1 (struct speed_params *);
-double speed_mpn_brootinv (struct speed_params *);
-double speed_mpn_invert (struct speed_params *);
-double speed_mpn_invertappr (struct speed_params *);
-double speed_mpn_ni_invertappr (struct speed_params *);
-double speed_mpn_sec_invert (struct speed_params *s);
-double speed_mpn_binvert (struct speed_params *);
-double speed_mpn_redc_1 (struct speed_params *);
-double speed_mpn_redc_2 (struct speed_params *);
-double speed_mpn_redc_n (struct speed_params *);
-double speed_mpn_rsblsh_n (struct speed_params *);
-double speed_mpn_rsblsh1_n (struct speed_params *);
-double speed_mpn_rsblsh2_n (struct speed_params *);
-double speed_mpn_rsh1add_n (struct speed_params *);
-double speed_mpn_rsh1sub_n (struct speed_params *);
-double speed_mpn_rshift (struct speed_params *);
-double speed_mpn_sb_divrem_m3 (struct speed_params *);
-double speed_mpn_sb_divrem_m3_div (struct speed_params *);
-double speed_mpn_sb_divrem_m3_inv (struct speed_params *);
-double speed_mpn_set_str (struct speed_params *);
-double speed_mpn_bc_set_str (struct speed_params *);
-double speed_mpn_dc_set_str (struct speed_params *);
-double speed_mpn_set_str_pre (struct speed_params *);
-double speed_mpn_sqr_basecase (struct speed_params *);
-double speed_mpn_sqr_diag_addlsh1 (struct speed_params *);
-double speed_mpn_sqr_diagonal (struct speed_params *);
-double speed_mpn_sqr (struct speed_params *);
-double speed_mpn_sqrtrem (struct speed_params *);
-double speed_mpn_rootrem (struct speed_params *);
-double speed_mpn_sub_n (struct speed_params *);
-double speed_mpn_sub_err1_n (struct speed_params *);
-double speed_mpn_sub_err2_n (struct speed_params *);
-double speed_mpn_sub_err3_n (struct speed_params *);
-double speed_mpn_sublsh_n (struct speed_params *);
-double speed_mpn_sublsh1_n (struct speed_params *);
-double speed_mpn_sublsh2_n (struct speed_params *);
-double speed_mpn_sublsh_n_ip1 (struct speed_params *);
-double speed_mpn_sublsh1_n_ip1 (struct speed_params *);
-double speed_mpn_sublsh2_n_ip1 (struct speed_params *);
-double speed_mpn_submul_1 (struct speed_params *);
-double speed_mpn_toom2_sqr (struct speed_params *);
-double speed_mpn_toom3_sqr (struct speed_params *);
-double speed_mpn_toom4_sqr (struct speed_params *);
-double speed_mpn_toom6_sqr (struct speed_params *);
-double speed_mpn_toom8_sqr (struct speed_params *);
-double speed_mpn_toom22_mul (struct speed_params *);
-double speed_mpn_toom33_mul (struct speed_params *);
-double speed_mpn_toom44_mul (struct speed_params *);
-double speed_mpn_toom6h_mul (struct speed_params *);
-double speed_mpn_toom8h_mul (struct speed_params *);
-double speed_mpn_toom32_mul (struct speed_params *);
-double speed_mpn_toom42_mul (struct speed_params *);
-double speed_mpn_toom43_mul (struct speed_params *);
-double speed_mpn_toom63_mul (struct speed_params *);
-double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
-double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
-double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
-double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
-double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
-double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
-double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
-double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
-double speed_mpn_toom42_mulmid (struct speed_params *);
-double speed_mpn_mulmod_bnm1 (struct speed_params *);
-double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
-double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
-double speed_mpn_sqrmod_bnm1 (struct speed_params *);
-double speed_mpn_udiv_qrnnd (struct speed_params *);
-double speed_mpn_udiv_qrnnd_r (struct speed_params *);
-double speed_mpn_umul_ppmm (struct speed_params *);
-double speed_mpn_umul_ppmm_r (struct speed_params *);
-double speed_mpn_xnor_n (struct speed_params *);
-double speed_mpn_xor_n (struct speed_params *);
-double speed_MPN_ZERO (struct speed_params *);
-
-double speed_mpq_init_clear (struct speed_params *);
-
-double speed_mpz_add (struct speed_params *);
-double speed_mpz_bin_uiui (struct speed_params *);
-double speed_mpz_bin_ui (struct speed_params *);
-double speed_mpz_fac_ui (struct speed_params *);
-double speed_mpz_fib_ui (struct speed_params *);
-double speed_mpz_fib2_ui (struct speed_params *);
-double speed_mpz_init_clear (struct speed_params *);
-double speed_mpz_init_realloc_clear (struct speed_params *);
-double speed_mpz_jacobi (struct speed_params *);
-double speed_mpz_lucnum_ui (struct speed_params *);
-double speed_mpz_lucnum2_ui (struct speed_params *);
-double speed_mpz_mod (struct speed_params *);
-double speed_mpz_powm (struct speed_params *);
-double speed_mpz_powm_mod (struct speed_params *);
-double speed_mpz_powm_redc (struct speed_params *);
-double speed_mpz_powm_sec (struct speed_params *);
-double speed_mpz_powm_ui (struct speed_params *);
-double speed_mpz_urandomb (struct speed_params *);
-
-double speed_gmp_randseed (struct speed_params *);
-double speed_gmp_randseed_ui (struct speed_params *);
-
-double speed_noop (struct speed_params *);
-double speed_noop_wxs (struct speed_params *);
-double speed_noop_wxys (struct speed_params *);
-
-double speed_operator_div (struct speed_params *);
-double speed_operator_mod (struct speed_params *);
-
-double speed_udiv_qrnnd (struct speed_params *);
-double speed_udiv_qrnnd_preinv1 (struct speed_params *);
-double speed_udiv_qrnnd_preinv2 (struct speed_params *);
-double speed_udiv_qrnnd_preinv3 (struct speed_params *);
-double speed_udiv_qrnnd_c (struct speed_params *);
-double speed_umul_ppmm (struct speed_params *);
+double speed_back_to_back __GMP_PROTO ((struct speed_params *s));
+double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s));
+double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s));
+double speed_find_a __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s));
+double speed_invert_limb __GMP_PROTO ((struct speed_params *s));
+double speed_malloc_free __GMP_PROTO ((struct speed_params *s));
+double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s));
+double speed_memcpy __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s));
+double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addsub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_com_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_sb __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_sb_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_divrem_sb_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_tdiv_qr __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_binary __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcd_accel __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_kara_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_kara_sqr_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mullow_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mullow_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqr_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom3_mul_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_toom3_sqr_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s));
+double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s));
+
+double speed_mpz_add __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_mod __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s));
+double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s));
+
+double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s));
+double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s));
+
+double speed_noop __GMP_PROTO ((struct speed_params *s));
+double speed_noop_wxs __GMP_PROTO ((struct speed_params *s));
+double speed_noop_wxys __GMP_PROTO ((struct speed_params *s));
+
+double speed_operator_div __GMP_PROTO ((struct speed_params *s));
+double speed_operator_mod __GMP_PROTO ((struct speed_params *s));
+
+double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s));
+double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s));
+double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s));
/* Prototypes for other routines */
/* low 32-bits in p[0], high 32-bits in p[1] */
-void speed_cyclecounter (unsigned p[2]);
+void speed_cyclecounter __GMP_PROTO ((unsigned p[2]));
-void mftb_function (unsigned p[2]);
+void mftb_function __GMP_PROTO ((unsigned p[2]));
/* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy
output or a clobber for the cpuid, hence an explicit save and restore. A
clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use
the dummy output style in non-PIC, so there's an error if somehow -fPIC
- is used without a -DPIC to tell us about it. */
+ is used without a -DPIC to tell us about it. */
#if defined(__GNUC__) && ! defined (NO_ASM) \
&& (defined (__i386__) || defined (__i486__))
-#if defined (PIC) || defined (__APPLE_CC__)
+#ifdef PIC
#define speed_cyclecounter(p) \
do { \
int __speed_cyclecounter__save_ebx; \
@@ -453,80 +342,71 @@ void mftb_function (unsigned p[2]);
#endif
#endif
-double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]);
-int gettimeofday_microseconds_p (void);
-int getrusage_microseconds_p (void);
-int cycles_works_p (void);
-long clk_tck (void);
-double freq_measure (const char *, double (*)(void));
-
-int double_cmp_ptr (const double *, const double *);
-void pentium_wbinvd (void);
-typedef int (*qsort_function_t) (const void *, const void *);
-
-void noop (void);
-void noop_1 (mp_limb_t);
-void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
-void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
-void mpn_cache_fill (mp_srcptr, mp_size_t);
-void mpn_cache_fill_dummy (mp_limb_t);
-void speed_cache_fill (struct speed_params *);
-void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t);
-void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t);
+double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2]));
+int gettimeofday_microseconds_p __GMP_PROTO ((void));
+int getrusage_microseconds_p __GMP_PROTO ((void));
+int cycles_works_p __GMP_PROTO ((void));
+long clk_tck __GMP_PROTO ((void));
+double freq_measure __GMP_PROTO ((const char *, double (*)(void)));
+
+int double_cmp_ptr __GMP_PROTO ((const double *, const double *));
+void pentium_wbinvd __GMP_PROTO ((void));
+typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *));
+
+void noop __GMP_PROTO ((void));
+void noop_1 __GMP_PROTO ((mp_limb_t));
+void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t));
+void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t));
+void speed_cache_fill __GMP_PROTO ((struct speed_params *));
+void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
+void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
extern int speed_option_addrs;
extern int speed_option_verbose;
-extern int speed_option_cycles_broken;
-void speed_option_set (const char *);
-
-mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
-
-mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
-mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+void speed_option_set __GMP_PROTO((const char *));
-int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int);
-int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int);
-int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
-int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
+mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
-mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
+int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
-mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
-void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
-void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+mp_size_t mpn_gcd_binary
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcd_accel
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_one_double
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_one_single
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_single
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_size_t mpn_gcdext_double
+ __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
-mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
-mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
+mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
-mp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
-mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
+mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
-mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
-mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
+void mpn_toom3_mul_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+void mpn_toom3_sqr_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+void mpn_toom3_mul_n_mpn __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+void mpn_toom3_sqr_n_mpn __GMP_PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
-mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
+void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
-mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
-mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
-
-mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int);
-void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
-
-void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
-void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
-
-int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
+int speed_routine_count_zeros_setup
+ __GMP_PROTO ((struct speed_params *, mp_ptr, int, int));
/* "get" is called repeatedly until it ticks over, just in case on a fast
@@ -623,7 +503,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
#define SPEED_RESTRICT_COND(cond) if (!(cond)) return -1.0;
/* For mpn_copy or similar. */
-#define SPEED_ROUTINE_MPN_COPY_CALL(call) \
+#define SPEED_ROUTINE_MPN_COPY(function) \
{ \
mp_ptr wp; \
unsigned i; \
@@ -642,47 +522,13 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
speed_starttime (); \
i = s->reps; \
do \
- call; \
+ function (wp, s->xp, s->size); \
while (--i != 0); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
-#define SPEED_ROUTINE_MPN_COPY(function) \
- SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
-
-#define SPEED_ROUTINE_MPN_TABSELECT(function) \
- { \
- mp_ptr xp, wp; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 0); \
- \
- if (s->r == 0) \
- s->r = s->size; /* default to a quadratic shape */ \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (xp, s->size * s->r, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
- \
- speed_operand_src (s, xp, s->size * s->r); \
- speed_operand_dst (s, wp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (wp, xp, s->size, s->r, (s->r) / 2); \
- while (--i != 0); \
- t = speed_endtime () / s->r; \
- \
- TMP_FREE; \
- return t; \
- }
-
#define SPEED_ROUTINE_MPN_COPYC(function) \
{ \
@@ -712,7 +558,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
}
/* s->size is still in limbs, and it's limbs which are copied, but
- "function" takes a size in bytes not limbs. */
+ "function" takes a size in bytes not limbs. */
#define SPEED_ROUTINE_MPN_COPY_BYTES(function) \
{ \
mp_ptr wp; \
@@ -732,7 +578,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
speed_starttime (); \
i = s->reps; \
do \
- function (wp, s->xp, s->size * GMP_LIMB_BYTES); \
+ function (wp, s->xp, s->size * BYTES_PER_MP_LIMB); \
while (--i != 0); \
t = speed_endtime (); \
\
@@ -788,72 +634,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-
-/* For mpn_aors_errK_n, where 1 <= K <= 3. */
-#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K) \
- { \
- mp_ptr wp; \
- mp_ptr xp, yp; \
- mp_ptr zp[K]; \
- mp_limb_t ep[2*K]; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
- \
- /* (don't have a mechanism to specify zp alignments) */ \
- for (i = 0; i < K; i++) \
- SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0); \
- \
- xp = s->xp; \
- yp = s->yp; \
- \
- if (s->r == 0) ; \
- else if (s->r == 1) { xp = wp; } \
- else if (s->r == 2) { yp = wp; } \
- else if (s->r == 3) { xp = wp; yp = wp; } \
- else if (s->r == 4) { yp = xp; } \
- else { \
- TMP_FREE; \
- return -1.0; \
- } \
- \
- /* initialize wp if operand overlap */ \
- if (xp == wp || yp == wp) \
- MPN_COPY (wp, s->xp, s->size); \
- \
- speed_operand_src (s, xp, s->size); \
- speed_operand_src (s, yp, s->size); \
- for (i = 0; i < K; i++) \
- speed_operand_src (s, zp[i], s->size); \
- speed_operand_dst (s, wp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- call; \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function) \
- SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
-
-#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function) \
- SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
-
-#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function) \
- SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
-
-
/* For mpn_add_n, mpn_sub_n, or similar. */
#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call) \
{ \
@@ -954,26 +734,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
#define SPEED_ROUTINE_MPN_DIVEXACT_1(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
-#define SPEED_ROUTINE_MPN_BDIV_Q_1(function) \
- SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r))
-
-#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call) \
- { \
- unsigned shift; \
- mp_limb_t dinv; \
- \
- SPEED_RESTRICT_COND (s->size > 0); \
- SPEED_RESTRICT_COND (s->r != 0); \
- \
- count_trailing_zeros (shift, s->r); \
- binvert_limb (dinv, s->r >> shift); \
- \
- SPEED_ROUTINE_MPN_UNARY_1_CALL (call); \
- }
-#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function) \
- SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL \
- ((*function) (wp, s->xp, s->size, s->r, dinv, shift))
-
#define SPEED_ROUTINE_MPN_BDIV_DBM1C(function) \
SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0))
@@ -1069,30 +829,30 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MUL(function) \
{ \
- mp_ptr wp; \
+ mp_ptr wp, xp; \
mp_size_t size1; \
unsigned i; \
double t; \
TMP_DECL; \
\
size1 = (s->r == 0 ? s->size : s->r); \
- if (size1 < 0) size1 = -size1 - s->size; \
\
- SPEED_RESTRICT_COND (size1 >= 1); \
- SPEED_RESTRICT_COND (s->size >= size1); \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
\
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, s->yp, size1); \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
speed_operand_dst (s, wp, size1 + s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
- function (wp, s->xp, s->size, s->yp, size1); \
+ function (wp, xp, size1, s->yp, s->size); \
while (--i != 0); \
t = speed_endtime (); \
\
@@ -1132,7 +892,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
#define SPEED_ROUTINE_MPN_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size));
-#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call) \
+#define SPEED_ROUTINE_MPN_MULLOW_N_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
@@ -1160,11 +920,11 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-#define SPEED_ROUTINE_MPN_MULLO_N(function) \
- SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size));
+#define SPEED_ROUTINE_MPN_MULLOW_N(function) \
+ SPEED_ROUTINE_MPN_MULLOW_N_CALL (function (wp, s->xp, s->yp, s->size));
/* For mpn_mul_basecase, xsize=r, ysize=s->size. */
-#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function) \
+#define SPEED_ROUTINE_MPN_MULLOW_BASECASE(function) \
{ \
mp_ptr wp; \
unsigned i; \
@@ -1192,172 +952,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
-#define SPEED_ROUTINE_MPN_MULMID(function) \
- { \
- mp_ptr wp, xp; \
- mp_size_t size1; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- size1 = (s->r == 0 ? (2 * s->size - 1) : s->r); \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- SPEED_RESTRICT_COND (size1 >= s->size); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
- \
- speed_operand_src (s, xp, size1); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, wp, size1 - s->size + 3); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (wp, xp, size1, s->yp, s->size); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_MULMID_N(function) \
- { \
- mp_ptr wp, xp; \
- mp_size_t size1; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- size1 = 2 * s->size - 1; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
- \
- speed_operand_src (s, xp, size1); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, wp, size1 - s->size + 3); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (wp, xp, s->yp, s->size); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function) \
- { \
- mp_ptr wp, xp, scratch; \
- mp_size_t size1, scratch_size; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- size1 = 2 * s->size - 1; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
- scratch_size = mpn_toom42_mulmid_itch (s->size); \
- SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0); \
- \
- speed_operand_src (s, xp, size1); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, wp, size1 - s->size + 3); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (wp, xp, s->yp, s->size, scratch); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call) \
- { \
- mp_ptr wp, tp; \
- unsigned i; \
- double t; \
- mp_size_t itch; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, wp, 2 * s->size); \
- speed_operand_dst (s, tp, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- call; \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function) \
- { \
- mp_ptr wp, tp; \
- unsigned i; \
- double t; \
- mp_size_t size, itch; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- size = mpn_mulmod_bnm1_next_size (s->size); \
- itch = mpn_mulmod_bnm1_itch (size, size, size); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, wp, size); \
- speed_operand_dst (s, tp, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (wp, size, s->xp, s->size, s->yp, s->size, tp); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
#define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \
{ \
mp_ptr wp, tspace; \
@@ -1388,104 +982,17 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom22_mul_itch (s->size, s->size), \
- MPN_TOOM22_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom33_mul_itch (s->size, s->size), \
- MPN_TOOM33_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \
+#define SPEED_ROUTINE_MPN_KARA_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom44_mul_itch (s->size, s->size), \
- MPN_TOOM44_MUL_MINSIZE)
+ (function (wp, s->xp, s->xp, s->size, tspace), \
+ MPN_KARA_MUL_N_TSIZE (s->size), \
+ MPN_KARA_MUL_N_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \
+#define SPEED_ROUTINE_MPN_TOOM3_MUL_N(function) \
SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom6h_mul_itch (s->size, s->size), \
- MPN_TOOM6H_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size, tspace), \
- mpn_toom8h_mul_itch (s->size, s->size), \
- MPN_TOOM8H_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \
- mpn_toom32_mul_itch (s->size, 2*s->size/3), \
- MPN_TOOM32_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
- mpn_toom42_mul_itch (s->size, s->size/2), \
- MPN_TOOM42_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace), \
- mpn_toom43_mul_itch (s->size, s->size*3/4), \
- MPN_TOOM43_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \
- mpn_toom63_mul_itch (s->size, s->size/2), \
- MPN_TOOM63_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \
- mpn_toom32_mul_itch (s->size, 17*s->size/24), \
- MPN_TOOM32_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \
- mpn_toom43_mul_itch (s->size, 17*s->size/24), \
- MPN_TOOM43_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \
- mpn_toom32_mul_itch (s->size, 19*s->size/30), \
- MPN_TOOM32_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \
- mpn_toom53_mul_itch (s->size, 19*s->size/30), \
- MPN_TOOM53_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \
- mpn_toom42_mul_itch (s->size, 11*s->size/20), \
- MPN_TOOM42_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \
- mpn_toom53_mul_itch (s->size, 11*s->size/20), \
- MPN_TOOM53_MUL_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \
- mpn_toom42_mul_itch (s->size, 5*s->size/6), \
- MPN_TOOM54_MUL_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function) \
- SPEED_ROUTINE_MPN_MUL_N_TSPACE \
- (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \
- mpn_toom54_mul_itch (s->size, 5*s->size/6), \
- MPN_TOOM54_MUL_MINSIZE)
-
+ (function (wp, s->xp, s->yp, s->size, tspace), \
+ MPN_TOOM3_MUL_N_TSIZE (s->size), \
+ MPN_TOOM3_MUL_N_MINSIZE)
#define SPEED_ROUTINE_MPN_SQR_CALL(call) \
@@ -1518,34 +1025,9 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
#define SPEED_ROUTINE_MPN_SQR(function) \
SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
-#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call) \
- { \
- mp_ptr wp, tp; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, tp, 2 * s->size); \
- speed_operand_dst (s, wp, 2 * s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- call; \
- while (--i != 0); \
- t = speed_endtime () / 2; \
- \
- TMP_FREE; \
- return t; \
- }
+#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function) \
+ SPEED_ROUTINE_MPN_SQR (function)
+
#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize) \
{ \
@@ -1576,32 +1058,17 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-#define SPEED_ROUTINE_MPN_TOOM2_SQR(function) \
+#define SPEED_ROUTINE_MPN_KARA_SQR_N(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
- mpn_toom2_sqr_itch (s->size), \
- MPN_TOOM2_SQR_MINSIZE)
+ MPN_KARA_SQR_N_TSIZE (s->size), \
+ MPN_KARA_SQR_N_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM3_SQR(function) \
+#define SPEED_ROUTINE_MPN_TOOM3_SQR_N(function) \
SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
- mpn_toom3_sqr_itch (s->size), \
- MPN_TOOM3_SQR_MINSIZE)
+ MPN_TOOM3_SQR_N_TSIZE (s->size), \
+ MPN_TOOM3_SQR_N_MINSIZE)
-#define SPEED_ROUTINE_MPN_TOOM4_SQR(function) \
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
- mpn_toom4_sqr_itch (s->size), \
- MPN_TOOM4_SQR_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM6_SQR(function) \
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
- mpn_toom6_sqr_itch (s->size), \
- MPN_TOOM6_SQR_MINSIZE)
-
-#define SPEED_ROUTINE_MPN_TOOM8_SQR(function) \
- SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \
- mpn_toom8_sqr_itch (s->size), \
- MPN_TOOM8_SQR_MINSIZE)
-
#define SPEED_ROUTINE_MPN_MOD_CALL(call) \
{ \
unsigned i; \
@@ -1656,47 +1123,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return speed_endtime (); \
}
-#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc) \
- { \
- unsigned i; \
- mp_limb_t inv[4]; \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- \
- mpn_mod_1_1p_cps (inv, s->r); \
- speed_operand_src (s, s->xp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- pfunc (inv, s->r); \
- function (s->xp, s->size, s->r << inv[1], inv); \
- } while (--i != 0); \
- \
- return speed_endtime (); \
- }
-#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N) \
- { \
- unsigned i; \
- mp_limb_t inv[N+3]; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- pfunc (inv, s->r); \
- function (s->xp, s->size, s->r, inv); \
- } while (--i != 0); \
- \
- return speed_endtime (); \
- }
-
/* A division of 2*s->size by s->size limbs */
@@ -1705,7 +1131,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
unsigned i; \
mp_ptr a, d, q, r; \
double t; \
- gmp_pi1_t dinv; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 1); \
@@ -1725,8 +1150,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
d[s->size-1] |= GMP_NUMB_HIGHBIT; \
a[2*s->size-1] = d[s->size-1] - 1; \
\
- invert_pi1 (dinv, d[s->size-1], d[s->size-2]); \
- \
speed_operand_src (s, a, 2*s->size); \
speed_operand_src (s, d, s->size); \
speed_operand_dst (s, q, s->size+1); \
@@ -1744,522 +1167,52 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
+#define SPEED_ROUTINE_MPN_DC_DIVREM_N(function) \
+ SPEED_ROUTINE_MPN_DC_DIVREM_CALL((*function) (q, a, d, s->size))
-/* A remainder 2*s->size by s->size limbs */
+#define SPEED_ROUTINE_MPN_DC_DIVREM_SB(function) \
+ SPEED_ROUTINE_MPN_DC_DIVREM_CALL \
+ ((*function) (q, a, 2*s->size, d, s->size))
-#define SPEED_ROUTINE_MPZ_MOD(function) \
- { \
- unsigned i; \
- mpz_t a, d, r; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- mpz_init_set_n (d, s->yp, s->size); \
- \
- /* high part less than d, low part a duplicate copied in */ \
- mpz_init_set_n (a, s->xp, s->size); \
- mpz_mod (a, a, d); \
- mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size); \
- MPN_COPY (PTR(a), s->xp, s->size); \
- \
- mpz_init (r); \
- \
- speed_operand_src (s, PTR(a), SIZ(a)); \
- speed_operand_src (s, PTR(d), SIZ(d)); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (r, a, d); \
- while (--i != 0); \
- return speed_endtime (); \
- }
+#define SPEED_ROUTINE_MPN_DC_TDIV_QR(function) \
+ SPEED_ROUTINE_MPN_DC_DIVREM_CALL \
+ ((*function) (q, r, 0, a, 2*s->size, d, s->size))
-#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN) \
- { \
- unsigned i; \
- mp_ptr dp, tp, ap, qp; \
- gmp_pi1_t inv; \
- double t; \
- mp_size_t size1; \
- TMP_DECL; \
- \
- size1 = (s->r == 0 ? 2 * s->size : s->r); \
- \
- SPEED_RESTRICT_COND (s->size >= DMIN); \
- SPEED_RESTRICT_COND (size1 - s->size >= QMIN); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2); \
- \
- /* we don't fill in dividend completely when size1 > s->size */ \
- MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (ap + size1 - s->size, s->xp, s->size); \
- \
- MPN_COPY (dp, s->yp, s->size); \
- \
- /* normalize the data */ \
- dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- ap[size1 - 1] = dp[s->size - 1] - 1; \
- \
- invert_pi1 (inv, dp[s->size-1], dp[s->size-2]); \
- \
- speed_operand_src (s, ap, size1); \
- speed_operand_dst (s, tp, size1); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, qp, size1 - s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- MPN_COPY (tp, ap, size1); \
- function (qp, tp, size1, dp, s->size, INV); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn) \
- { \
- unsigned i; \
- mp_ptr dp, tp, qp, scratch; \
- double t; \
- mp_size_t itch; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- \
- itch = itchfn (2 * s->size, s->size, 0); \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
- \
- MPN_COPY (tp, s->xp, s->size); \
- MPN_COPY (tp+s->size, s->xp, s->size); \
- \
- /* normalize the data */ \
- dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- tp[2*s->size-1] = dp[s->size-1] - 1; \
- \
- speed_operand_dst (s, qp, s->size); \
- speed_operand_src (s, tp, 2 * s->size); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, scratch, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- function (qp, tp, 2 * s->size, dp, s->size, scratch); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn) \
- { \
- unsigned i; \
- mp_ptr dp, tp, qp, rp, scratch; \
- double t; \
- mp_size_t size1, itch; \
- TMP_DECL; \
- \
- size1 = (s->r == 0 ? 2 * s->size : s->r); \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- SPEED_RESTRICT_COND (size1 >= s->size); \
- \
- itch = itchfn (size1, s->size, 0); \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
- SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
- \
- /* we don't fill in dividend completely when size1 > s->size */ \
- MPN_COPY (tp, s->xp, s->size); \
- MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
- \
- MPN_COPY (dp, s->yp, s->size); \
- \
- /* normalize the data */ \
- dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- tp[size1 - 1] = dp[s->size - 1] - 1; \
- \
- speed_operand_dst (s, qp, size1 - s->size); \
- speed_operand_dst (s, rp, s->size); \
- speed_operand_src (s, tp, size1); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, scratch, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- function (qp, rp, tp, size1, dp, s->size, scratch); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn) \
- { \
- unsigned i; \
- mp_ptr dp, tp, qp, rp, ip, scratch, tmp; \
- double t; \
- mp_size_t size1, itch; \
- TMP_DECL; \
- \
- size1 = (s->r == 0 ? 2 * s->size : s->r); \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- SPEED_RESTRICT_COND (size1 >= s->size); \
- \
- itch = itchfn (size1, s->size, s->size); \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
- SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */ \
- \
- /* we don't fill in dividend completely when size1 > s->size */ \
- MPN_COPY (tp, s->xp, s->size); \
- MPN_COPY (tp + size1 - s->size, s->xp, s->size); \
- \
- MPN_COPY (dp, s->yp, s->size); \
- \
- /* normalize the data */ \
- dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
- tp[size1 - 1] = dp[s->size-1] - 1; \
- \
- tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size)); \
- mpn_invert (ip, dp, s->size, tmp); \
- \
- speed_operand_dst (s, qp, size1 - s->size); \
- speed_operand_dst (s, rp, s->size); \
- speed_operand_src (s, tp, size1); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_src (s, ip, s->size); \
- speed_operand_dst (s, scratch, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function) \
- { \
- unsigned i; \
- mp_ptr dp, tp, ap, qp; \
- mp_limb_t inv; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \
- \
- MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (ap+s->size, s->xp, s->size); \
- \
- /* divisor must be odd */ \
- MPN_COPY (dp, s->yp, s->size); \
- dp[0] |= 1; \
- binvert_limb (inv, dp[0]); \
- inv = -inv; \
- \
- speed_operand_src (s, ap, 2*s->size); \
- speed_operand_dst (s, tp, 2*s->size); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, qp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- MPN_COPY (tp, ap, 2*s->size); \
- function (qp, tp, 2*s->size, dp, s->size, inv); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function) \
- { \
- unsigned i; \
- mp_ptr dp, tp, qp; \
- mp_limb_t inv; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2); \
- \
- /* divisor must be odd */ \
- MPN_COPY (dp, s->yp, s->size); \
- dp[0] |= 1; \
- binvert_limb (inv, dp[0]); \
- inv = -inv; \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_dst (s, tp, s->size); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, qp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- MPN_COPY (tp, s->xp, s->size); \
- function (qp, tp, s->size, dp, s->size, inv); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn) \
- { \
- unsigned i; \
- mp_ptr dp, qp, scratch; \
- double t; \
- mp_size_t itch; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- \
- itch = itchfn (s->size, s->size); \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
- \
- /* divisor must be odd */ \
- MPN_COPY (dp, s->yp, s->size); \
- dp[0] |= 1; \
- \
- speed_operand_dst (s, qp, s->size); \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, scratch, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- function (qp, s->xp, s->size, dp, s->size, scratch); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn) \
+/* A division of s->size by 3 limbs */
+
+#define SPEED_ROUTINE_MPN_SB_DIVREM_M3(function) \
{ \
unsigned i; \
- mp_ptr dp, tp, qp, rp, scratch; \
+ mp_ptr a, d, q; \
+ mp_size_t qsize; \
double t; \
- mp_size_t itch; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- \
- itch = itchfn (2 * s->size, s->size); \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \
- SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \
- \
- MPN_COPY (tp, s->xp, s->size); \
- MPN_COPY (tp+s->size, s->xp, s->size); \
- \
- /* divisor must be odd */ \
- MPN_COPY (dp, s->yp, s->size); \
- dp[0] |= 1; \
- \
- speed_operand_dst (s, qp, s->size); \
- speed_operand_dst (s, rp, s->size); \
- speed_operand_src (s, tp, 2 * s->size); \
- speed_operand_src (s, dp, s->size); \
- speed_operand_dst (s, scratch, itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- function (qp, rp, tp, 2 * s->size, dp, s->size, scratch); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_BROOT(function) \
- { \
- SPEED_RESTRICT_COND (s->r & 1); \
- s->xp[0] |= 1; \
- SPEED_ROUTINE_MPN_UNARY_1_CALL \
- ((*function) (wp, s->xp, s->size, s->r)); \
- }
-
-#define SPEED_ROUTINE_MPN_BROOTINV(function, itch) \
- { \
- mp_ptr wp, tp; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- TMP_MARK; \
- SPEED_RESTRICT_COND (s->size >= 1); \
- SPEED_RESTRICT_COND (s->r & 1); \
- wp = TMP_ALLOC_LIMBS (s->size); \
- tp = TMP_ALLOC_LIMBS ( (itch)); \
- s->xp[0] |= 1; \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_dst (s, wp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- (*function) (wp, s->xp, s->size, s->r, tp); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_INVERT(function,itchfn) \
- { \
- long i; \
- mp_ptr up, tp, ip; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
- \
- MPN_COPY (up, s->xp, s->size); \
- \
- /* normalize the data */ \
- up[s->size-1] |= GMP_NUMB_HIGHBIT; \
- \
- speed_operand_src (s, up, s->size); \
- speed_operand_dst (s, tp, s->size); \
- speed_operand_dst (s, ip, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (ip, up, s->size, tp); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn) \
- { \
- long i; \
- mp_ptr up, tp, ip; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
- \
- MPN_COPY (up, s->xp, s->size); \
- \
- /* normalize the data */ \
- up[s->size-1] |= GMP_NUMB_HIGHBIT; \
- \
- speed_operand_src (s, up, s->size); \
- speed_operand_dst (s, tp, s->size); \
- speed_operand_dst (s, ip, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (ip, up, s->size, tp); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn) \
- { \
- long i; \
- mp_ptr up, tp, ip; \
- double t; \
TMP_DECL; \
\
SPEED_RESTRICT_COND (s->size >= 3); \
\
TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp); \
\
- MPN_COPY (up, s->xp, s->size); \
+ SPEED_TMP_ALLOC_LIMBS (d, 3, s->align_yp); \
+ MPN_COPY (d, s->yp, 3); \
+ d[2] |= GMP_NUMB_HIGHBIT; \
\
- /* normalize the data */ \
- up[s->size-1] |= GMP_NUMB_HIGHBIT; \
+ qsize = s->size - 3; \
+ SPEED_TMP_ALLOC_LIMBS (q, qsize, s->align_wp); \
\
- speed_operand_src (s, up, s->size); \
- speed_operand_dst (s, tp, s->size); \
- speed_operand_dst (s, ip, s->size); \
+ speed_operand_dst (s, a, s->size); \
+ speed_operand_src (s, d, 3); \
+ speed_operand_dst (s, q, qsize); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
- function (ip, up, s->size, tp); \
+ { \
+ MPN_COPY (a, s->xp, s->size); \
+ function (q, a, s->size, d, 3); \
+ } \
while (--i != 0); \
t = speed_endtime (); \
\
@@ -2267,83 +1220,43 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn) \
+/* A remainder 2*s->size by s->size limbs */
+
+#define SPEED_ROUTINE_MPZ_MOD(function) \
{ \
- long i; \
- mp_ptr up, tp, ip; \
- double t; \
- TMP_DECL; \
+ unsigned i; \
+ mpz_t a, d, r; \
\
SPEED_RESTRICT_COND (s->size >= 1); \
\
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
+ mpz_init_set_n (d, s->yp, s->size); \
\
- MPN_COPY (up, s->xp, s->size); \
+ /* high part less than d, low part a duplicate copied in */ \
+ mpz_init_set_n (a, s->xp, s->size); \
+ mpz_mod (a, a, d); \
+ mpz_mul_2exp (a, a, BITS_PER_MP_LIMB * s->size); \
+ MPN_COPY (PTR(a), s->xp, s->size); \
\
- /* normalize the data */ \
- up[0] |= 1; \
+ mpz_init (r); \
\
- speed_operand_src (s, up, s->size); \
- speed_operand_dst (s, tp, s->size); \
- speed_operand_dst (s, ip, s->size); \
+ speed_operand_src (s, PTR(a), SIZ(a)); \
+ speed_operand_src (s, PTR(d), SIZ(d)); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
- function (ip, up, s->size, tp); \
+ function (r, a, d); \
while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
+ return speed_endtime (); \
}
-#define SPEED_ROUTINE_MPN_SEC_INVERT(function,itchfn) \
- { \
- long i; \
- mp_ptr up, mp, tp, ip; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \
- \
- speed_operand_src (s, up, s->size); \
- speed_operand_dst (s, tp, s->size); \
- speed_operand_dst (s, ip, s->size); \
- speed_cache_fill (s); \
- \
- MPN_COPY (mp, s->yp, s->size); \
- /* Must be odd */ \
- mp[0] |= 1; \
- speed_starttime (); \
- i = s->reps; \
- do \
- { \
- MPN_COPY (up, s->xp, s->size); \
- function (ip, up, mp, s->size, 2*s->size*GMP_NUMB_BITS, tp); \
- } \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
#define SPEED_ROUTINE_REDC_1(function) \
{ \
unsigned i; \
mp_ptr cp, mp, tp, ap; \
- mp_limb_t inv; \
+ mp_limb_t Nprim; \
double t; \
TMP_DECL; \
\
@@ -2361,8 +1274,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
/* modulus must be odd */ \
MPN_COPY (mp, s->yp, s->size); \
mp[0] |= 1; \
- binvert_limb (inv, mp[0]); \
- inv = -inv; \
+ binvert_limb (Nprim, mp[0]); \
\
speed_operand_src (s, ap, 2*s->size+1); \
speed_operand_dst (s, tp, 2*s->size+1); \
@@ -2374,90 +1286,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
i = s->reps; \
do { \
MPN_COPY (tp, ap, 2*s->size); \
- function (cp, tp, mp, s->size, inv); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_REDC_2(function) \
- { \
- unsigned i; \
- mp_ptr cp, mp, tp, ap; \
- mp_limb_t invp[2]; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \
- \
- MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (ap+s->size, s->xp, s->size); \
- \
- /* modulus must be odd */ \
- MPN_COPY (mp, s->yp, s->size); \
- mp[0] |= 1; \
- mpn_binvert (invp, mp, 2, tp); \
- invp[0] = -invp[0]; invp[1] = ~invp[1]; \
- \
- speed_operand_src (s, ap, 2*s->size+1); \
- speed_operand_dst (s, tp, 2*s->size+1); \
- speed_operand_src (s, mp, s->size); \
- speed_operand_dst (s, cp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- MPN_COPY (tp, ap, 2*s->size); \
- function (cp, tp, mp, s->size, invp); \
- } while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-#define SPEED_ROUTINE_REDC_N(function) \
- { \
- unsigned i; \
- mp_ptr cp, mp, tp, ap, invp; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size > 8); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \
- SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \
- SPEED_TMP_ALLOC_LIMBS (invp, s->size, s->align_wp2); /* align? */ \
- \
- MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (ap+s->size, s->xp, s->size); \
- \
- /* modulus must be odd */ \
- MPN_COPY (mp, s->yp, s->size); \
- mp[0] |= 1; \
- mpn_binvert (invp, mp, s->size, tp); \
- \
- speed_operand_src (s, ap, 2*s->size+1); \
- speed_operand_dst (s, tp, 2*s->size+1); \
- speed_operand_src (s, mp, s->size); \
- speed_operand_dst (s, cp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do { \
- MPN_COPY (tp, ap, 2*s->size); \
- function (cp, tp, mp, s->size, invp); \
+ function (cp, tp, mp, s->size, Nprim); \
} while (--i != 0); \
t = speed_endtime (); \
\
@@ -2805,107 +1634,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
function (px[j-1], py[j-1], 0))
-#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \
- { \
- mp_size_t hgcd_init_itch, hgcd_itch; \
- mp_ptr ap, bp, wp, tmp1; \
- struct hgcd_matrix hgcd; \
- int res; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- if (s->size < 2) \
- return -1; \
- \
- TMP_MARK; \
- \
- SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
- \
- s->xp[s->size - 1] |= 1; \
- s->yp[s->size - 1] |= 1; \
- \
- hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
- hgcd_itch = itchfunc (s->size); \
- \
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, ap, s->size + 1); \
- speed_operand_dst (s, bp, s->size + 1); \
- speed_operand_dst (s, wp, hgcd_itch); \
- speed_operand_dst (s, tmp1, hgcd_init_itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- { \
- MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (bp, s->yp, s->size); \
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
- res = func (ap, bp, s->size, &hgcd, wp); \
- } \
- while (--i != 0); \
- t = speed_endtime (); \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \
- { \
- mp_size_t hgcd_init_itch, hgcd_step_itch; \
- mp_ptr ap, bp, wp, tmp1; \
- struct hgcd_matrix hgcd; \
- mp_size_t p = s->size/2; \
- int res; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- if (s->size < 2) \
- return -1; \
- \
- TMP_MARK; \
- \
- SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
- SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
- \
- s->xp[s->size - 1] |= 1; \
- s->yp[s->size - 1] |= 1; \
- \
- hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
- hgcd_step_itch = itchfunc (s->size, p); \
- \
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \
- \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, s->yp, s->size); \
- speed_operand_dst (s, ap, s->size + 1); \
- speed_operand_dst (s, bp, s->size + 1); \
- speed_operand_dst (s, wp, hgcd_step_itch); \
- speed_operand_dst (s, tmp1, hgcd_init_itch); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- { \
- MPN_COPY (ap, s->xp, s->size); \
- MPN_COPY (bp, s->yp, s->size); \
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
- res = func (&hgcd, ap, bp, s->size, p, wp); \
- } \
- while (--i != 0); \
- t = speed_endtime (); \
- TMP_FREE; \
- return t; \
- }
-
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s->size**2, since GCD is a quadratic algorithm.
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
@@ -3169,111 +1897,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
return t; \
}
-#define SPEED_ROUTINE_MPN_DIV_QR_1(function) \
- { \
- mp_ptr wp, xp; \
- mp_limb_t d; \
- mp_limb_t r; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
- \
- d = s->r; \
- if (d == 0) \
- d = 1; \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_dst (s, wp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- r = function (wp, wp+s->size-1, s->xp, s->size, d); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function) \
- { \
- mp_ptr wp, xp; \
- mp_limb_t d, dinv; \
- mp_limb_t r; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 1); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
- \
- d = s->r; \
- /* divisor must be normalized */ \
- SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT); \
- invert_limb (dinv, d); \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_dst (s, wp, s->size); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- r = function (wp, s->xp, s->size, 0, d, dinv); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
-
-#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm) \
- { \
- mp_ptr wp, xp; \
- mp_limb_t yp[2]; \
- mp_limb_t rp[2]; \
- unsigned i; \
- double t; \
- TMP_DECL; \
- \
- SPEED_RESTRICT_COND (s->size >= 2); \
- \
- TMP_MARK; \
- SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
- \
- /* divisor must be normalized */ \
- MPN_COPY (yp, s->yp_block, 2); \
- if (norm) \
- yp[1] |= GMP_NUMB_HIGHBIT; \
- else \
- { \
- yp[1] &= ~GMP_NUMB_HIGHBIT; \
- if (yp[1] == 0) \
- yp[1] = 1; \
- } \
- speed_operand_src (s, s->xp, s->size); \
- speed_operand_src (s, yp, 2); \
- speed_operand_dst (s, wp, s->size); \
- speed_operand_dst (s, rp, 2); \
- speed_cache_fill (s); \
- \
- speed_starttime (); \
- i = s->reps; \
- do \
- function (wp, rp, s->xp, s->size, yp); \
- while (--i != 0); \
- t = speed_endtime (); \
- \
- TMP_FREE; \
- return t; \
- }
#define SPEED_ROUTINE_MODLIMB_INVERT(function) \
{ \
@@ -3402,7 +2025,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
\
speed_operand_src (s, s->xp, s->size); \
speed_operand_dst (s, xp, s->size); \
- speed_operand_dst (s, (mp_ptr) wp, wn/GMP_LIMB_BYTES); \
+ speed_operand_dst (s, (mp_ptr) wp, wn/BYTES_PER_MP_LIMB); \
speed_cache_fill (s); \
\
speed_starttime (); \
@@ -3442,7 +2065,8 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
for (i = 0; i < s->size; i++) \
xp[i] = s->xp[i] % base; \
\
- LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base); \
+ wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly)) \
+ / BITS_PER_MP_LIMB + 2; \
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \
\
/* use this during development to check wn is big enough */ \
@@ -3450,7 +2074,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn); \
*/ \
\
- speed_operand_src (s, (mp_ptr) xp, s->size/GMP_LIMB_BYTES); \
+ speed_operand_src (s, (mp_ptr) xp, s->size/BYTES_PER_MP_LIMB); \
speed_operand_dst (s, wp, wn); \
speed_cache_fill (s); \
\
@@ -3466,7 +2090,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
}
-/* Run an accel gcd find_a() function over various data values. A set of
+/* Run an accel gcd find_a() function over various data values. A set of
values is used in case some run particularly fast or slow. The size
parameter is ignored, the amount of data tested is fixed. */
@@ -3602,6 +2226,9 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
}
+#endif
+
+
#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function) \
{ \
unsigned i; \
@@ -3641,6 +2268,3 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
#define SPEED_ROUTINE_MPN_ZERO(function) \
SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
-
-
-#endif
diff --git a/gmp/tune/time.c b/gmp/tune/time.c
index 0178b345af..865a92c758 100644
--- a/gmp/tune/time.c
+++ b/gmp/tune/time.c
@@ -1,32 +1,21 @@
-/* Time routines for speed measurements.
+/* Time routines for speed measurments.
-Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/* Usage:
@@ -266,7 +255,7 @@ static const int use_stck = 1; /* always use when available */
typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
#define STCK(timestamp) \
do { \
- asm ("stck %0" : "=Q" (timestamp)); \
+ asm ("stck %0" : "=m" (timestamp)); \
} while (0)
#else
static const int have_stck = 0;
@@ -467,22 +456,9 @@ cycles_works_p (void)
if (result != -1)
goto done;
- /* FIXME: On linux, the cycle counter is not saved and restored over
- * context switches, making it almost useless for precise cputime
- * measurements. When available, it's better to use clock_gettime,
- * which seems to have reasonable accuracy (tested on x86_32,
- * linux-2.6.26, glibc-2.7). However, there are also some linux
- * systems where clock_gettime is broken in one way or the other,
- * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
- * kind-of implemented but broken (needs code to detect that), and
- * on those systems a wall-clock cycle counter is the least bad
- * fallback.
- *
- * So we need some code to disable the cycle counter on some but not
- * all linux systems. */
#ifdef SIGILL
{
- RETSIGTYPE (*old_handler) (int);
+ RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
unsigned cycles[2];
old_handler = signal (SIGILL, cycles_works_handler);
@@ -695,8 +671,8 @@ getrusage_backwards_p (void)
if (speed_option_verbose)
printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
i,
- (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
- (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
+ prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
+ next.ru_utime.tv_sec, next.ru_utime.tv_usec);
result = 1;
break;
}
@@ -733,8 +709,6 @@ const int have_cgt_id = 0;
# define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
#endif
-#define CGT_DELAY_COUNT 1000
-
int
cgt_works_p (void)
{
@@ -776,44 +750,6 @@ cgt_works_p (void)
cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
printf ("clock_gettime is %s accurate\n",
unittime_string (cgt_unittime));
-
- if (cgt_unittime < 10e-9)
- {
- /* Do we believe this? */
- struct timespec start, end;
- static volatile int counter;
- double duration;
- if (clock_gettime (CGT_ID, &start))
- {
- if (speed_option_verbose)
- printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
- result = 0;
- return result;
- }
- /* Loop of at least 1000 memory accesses, ought to take at
- least 100 ns*/
- for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
- ;
- if (clock_gettime (CGT_ID, &end))
- {
- if (speed_option_verbose)
- printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
- result = 0;
- return result;
- }
- duration = (end.tv_sec + end.tv_nsec * 1e-9
- - start.tv_sec - start.tv_nsec * 1e-9);
- if (speed_option_verbose)
- printf ("delay loop of %d rounds took %s (according to clock_gettime)\n",
- CGT_DELAY_COUNT, unittime_string (duration));
- if (duration < 100e-9)
- {
- if (speed_option_verbose)
- printf ("clock_gettime id=%d not believable\n", CGT_ID);
- result = 0;
- return result;
- }
- }
result = 1;
return result;
}
@@ -843,7 +779,7 @@ int
mftb_works_p (void)
{
unsigned a[2];
- RETSIGTYPE (*old_handler) (int);
+ RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
double cycletime;
/* suppress a warning about a[] unused */
@@ -1005,7 +941,7 @@ speed_time_init (void)
speed_cycletime_init ();
- if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
+ if (have_cycles && cycles_works_p ())
{
use_cycles = 1;
DEFAULT (speed_cycletime, 1.0);
@@ -1136,7 +1072,7 @@ speed_time_init (void)
use_cgt = 1;
speed_unittime = cgt_unittime;
DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
- strcpy (speed_time_string, "microsecond accurate clock_gettime()");
+ strcpy (speed_time_string, "microsecond accurate getrusage()");
}
else if (have_times && clk_tck() > 1000000)
{
@@ -1353,7 +1289,7 @@ speed_mftb_diff (const unsigned end[2], const unsigned start[2])
psecs might overflow. 2^32 microseconds is only a bit over an hour, or
2^32 nanoseconds only about 4 seconds.
- The casts to "long" are for the benefit of timebasestruct_t, where the
+ The casts to "long" are for the beneifit of timebasestruct_t, where the
fields are only "unsigned int", but we want a signed difference. */
#define DIFF_SECS_ROUTINE(sec, psec, punit) \
diff --git a/gmp/tune/tune-gcd-p.c b/gmp/tune/tune-gcd-p.c
deleted file mode 100644
index 4d52f5610c..0000000000
--- a/gmp/tune/tune-gcd-p.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/* tune-gcd-p
-
- Tune the choice for splitting p in divide-and-conquer gcd.
-
-Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define TUNE_GCD_P 1
-
-#include "../mpn/gcd.c"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "speed.h"
-
-/* Search for minimum over a range. FIXME: Implement golden-section /
- fibonacci search*/
-static int
-search (double *minp, double (*f)(void *, int), void *ctx, int start, int end)
-{
- int x[4];
- double y[4];
-
- int best_i;
-
- x[0] = start;
- x[3] = end;
-
- y[0] = f(ctx, x[0]);
- y[3] = f(ctx, x[3]);
-
- for (;;)
- {
- int i;
- int length = x[3] - x[0];
-
- x[1] = x[0] + length/3;
- x[2] = x[0] + 2*length/3;
-
- y[1] = f(ctx, x[1]);
- y[2] = f(ctx, x[2]);
-
-#if 0
- printf("%d: %f, %d: %f, %d:, %f %d: %f\n",
- x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3]);
-#endif
- for (best_i = 0, i = 1; i < 4; i++)
- if (y[i] < y[best_i])
- best_i = i;
-
- if (length <= 4)
- break;
-
- if (best_i >= 2)
- {
- x[0] = x[1];
- y[0] = y[1];
- }
- else
- {
- x[3] = x[2];
- y[3] = y[2];
- }
- }
- *minp = y[best_i];
- return x[best_i];
-}
-
-static int
-compare_double(const void *ap, const void *bp)
-{
- double a = * (const double *) ap;
- double b = * (const double *) bp;
-
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- else
- return 0;
-}
-
-static double
-median (double *v, size_t n)
-{
- qsort(v, n, sizeof(*v), compare_double);
-
- return v[n/2];
-}
-
-#define TIME(res, code) do { \
- double time_measurement[5]; \
- unsigned time_i; \
- \
- for (time_i = 0; time_i < 5; time_i++) \
- { \
- speed_starttime(); \
- code; \
- time_measurement[time_i] = speed_endtime(); \
- } \
- res = median(time_measurement, 5); \
-} while (0)
-
-struct bench_data
-{
- mp_size_t n;
- mp_ptr ap;
- mp_ptr bp;
- mp_ptr up;
- mp_ptr vp;
- mp_ptr gp;
-};
-
-static double
-bench_gcd (void *ctx, int p)
-{
- struct bench_data *data = ctx;
- double t;
-
- p_table[data->n] = p;
- TIME(t, {
- MPN_COPY (data->up, data->ap, data->n);
- MPN_COPY (data->vp, data->bp, data->n);
- mpn_gcd (data->gp, data->up, data->n, data->vp, data->n);
- });
-
- return t;
-}
-
-int
-main(int argc, char **argv)
-{
- gmp_randstate_t rands; struct bench_data data;
- mp_size_t n;
-
- TMP_DECL;
-
- /* Unbuffered so if output is redirected to a file it isn't lost if the
- program is killed part way through. */
- setbuf (stdout, NULL);
- setbuf (stderr, NULL);
-
- gmp_randinit_default (rands);
-
- TMP_MARK;
-
- data.ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- data.bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- data.up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- data.vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- data.gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
-
- mpn_random (data.ap, P_TABLE_SIZE);
- mpn_random (data.bp, P_TABLE_SIZE);
-
- memset (p_table, 0, sizeof(p_table));
-
- for (n = 100; n < P_TABLE_SIZE; n++)
- {
- mp_size_t p;
- mp_size_t best_p;
- double best_time;
- double lehmer_time;
-
- if (data.ap[n-1] == 0)
- data.ap[n-1] = 1;
-
- if (data.bp[n-1] == 0)
- data.bp[n-1] = 1;
-
- data.n = n;
-
- lehmer_time = bench_gcd (&data, 0);
-
- best_p = search (&best_time, bench_gcd, &data, n/5, 4*n/5);
- if (best_time > lehmer_time)
- best_p = 0;
-
- printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
- if (best_p > 0)
- {
- double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
- printf(" %5.3g%%", speedup);
- if (speedup < 1.0)
- {
- printf(" (ignored)");
- best_p = 0;
- }
- }
- printf("\n");
-
- p_table[n] = best_p;
- }
- TMP_FREE;
- gmp_randclear(rands);
- return 0;
-}
diff --git a/gmp/tune/tuneup.c b/gmp/tune/tuneup.c
index 2fba6b2955..06cb03c583 100644
--- a/gmp/tune/tuneup.c
+++ b/gmp/tune/tuneup.c
@@ -1,32 +1,21 @@
/* Create tuned thresholds for various algorithms.
-Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/* Usage: tuneup [-t] [-t] [-p precision]
@@ -75,11 +64,11 @@ see https://www.gnu.org/licenses/. */
instead. #define TUNE_PROGRAM_BUILD does this, with help from code at
the end of gmp-impl.h, and rules in tune/Makefile.am.
- MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n. The
+ MUL_KARATSUBA_THRESHOLD for example uses a recompiled mpn_mul_n. The
threshold is set to "size+1" to avoid karatsuba, or to "size" to use one
level, but recurse into the basecase.
- MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value.
+ MUL_TOOM3_THRESHOLD makes use of the tuned MUL_KARATSUBA_THRESHOLD value.
Other routines in turn will make use of both of those. Naturally the
dependants must be tuned first.
@@ -98,7 +87,7 @@ see https://www.gnu.org/licenses/. */
DIVREM_1_NORM_THRESHOLD. An assembler mpn_divrem_1 is expected to be
written and tuned all by hand. Assembler routines that might have hard
limits are recompiled though, to make them accept a bigger range of sizes
- than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr.
+ than normal, eg. mpn_sqr_basecase to compare against mpn_kara_sqr_n.
Limitations:
@@ -153,81 +142,46 @@ int allocdat = 0;
/* This is not defined if mpn_sqr_basecase doesn't declare a limit. In that
case use zero here, which for params.max_size means no limit. */
-#ifndef TUNE_SQR_TOOM2_MAX
-#define TUNE_SQR_TOOM2_MAX 0
+#ifndef TUNE_SQR_KARATSUBA_MAX
+#define TUNE_SQR_KARATSUBA_MAX 0
#endif
-mp_size_t mul_toom22_threshold = MP_SIZE_T_MAX;
-mp_size_t mul_toom33_threshold = MUL_TOOM33_THRESHOLD_LIMIT;
+mp_size_t mul_karatsuba_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom3_threshold = MUL_TOOM3_THRESHOLD_LIMIT;
mp_size_t mul_toom44_threshold = MUL_TOOM44_THRESHOLD_LIMIT;
-mp_size_t mul_toom6h_threshold = MUL_TOOM6H_THRESHOLD_LIMIT;
-mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT;
-mp_size_t mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX;
-mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
-mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
-mp_size_t mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
-mp_size_t mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
mp_size_t mul_fft_threshold = MP_SIZE_T_MAX;
mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX;
mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX;
-mp_size_t sqr_toom2_threshold
- = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX);
+mp_size_t sqr_karatsuba_threshold
+ = (TUNE_SQR_KARATSUBA_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_KARATSUBA_MAX);
mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
-mp_size_t sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
-mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX;
mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
-mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
-mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX;
-mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX;
-mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX;
-mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
-mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX;
-mp_size_t div_qr_2_pi2_threshold = MP_SIZE_T_MAX;
-mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX;
-mp_size_t dc_divappr_q_threshold = MP_SIZE_T_MAX;
-mp_size_t mu_div_qr_threshold = MP_SIZE_T_MAX;
-mp_size_t mu_divappr_q_threshold = MP_SIZE_T_MAX;
-mp_size_t mupi_div_qr_threshold = MP_SIZE_T_MAX;
-mp_size_t mu_div_q_threshold = MP_SIZE_T_MAX;
-mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX;
-mp_size_t dc_bdiv_q_threshold = MP_SIZE_T_MAX;
-mp_size_t mu_bdiv_qr_threshold = MP_SIZE_T_MAX;
-mp_size_t mu_bdiv_q_threshold = MP_SIZE_T_MAX;
-mp_size_t inv_mulmod_bnm1_threshold = MP_SIZE_T_MAX;
-mp_size_t inv_newton_threshold = MP_SIZE_T_MAX;
-mp_size_t inv_appr_threshold = MP_SIZE_T_MAX;
-mp_size_t binv_newton_threshold = MP_SIZE_T_MAX;
-mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX;
-mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX;
-mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX;
+mp_size_t mullow_basecase_threshold = MP_SIZE_T_MAX;
+mp_size_t mullow_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t mullow_mul_n_threshold = MP_SIZE_T_MAX;
+mp_size_t div_sb_preinv_threshold = MP_SIZE_T_MAX;
+mp_size_t div_dc_threshold = MP_SIZE_T_MAX;
+mp_size_t powm_threshold = MP_SIZE_T_MAX;
mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX;
mp_size_t hgcd_threshold = MP_SIZE_T_MAX;
-mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX;
-mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX;
+mp_size_t gcd_accel_threshold = MP_SIZE_T_MAX;
mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX;
mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX;
-int div_qr_1n_pi1_method = 0;
-mp_size_t div_qr_1_norm_threshold = MP_SIZE_T_MAX;
-mp_size_t div_qr_1_unnorm_threshold = MP_SIZE_T_MAX;
mp_size_t divrem_1_norm_threshold = MP_SIZE_T_MAX;
mp_size_t divrem_1_unnorm_threshold = MP_SIZE_T_MAX;
mp_size_t mod_1_norm_threshold = MP_SIZE_T_MAX;
mp_size_t mod_1_unnorm_threshold = MP_SIZE_T_MAX;
-int mod_1_1p_method = 0;
-mp_size_t mod_1n_to_mod_1_1_threshold = MP_SIZE_T_MAX;
-mp_size_t mod_1u_to_mod_1_1_threshold = MP_SIZE_T_MAX;
-mp_size_t mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
-mp_size_t mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX;
-mp_size_t preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_1_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_2_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_3_threshold = MP_SIZE_T_MAX;
+mp_size_t mod_1_4_threshold = MP_SIZE_T_MAX;
mp_size_t divrem_2_threshold = MP_SIZE_T_MAX;
mp_size_t get_str_dc_threshold = MP_SIZE_T_MAX;
mp_size_t get_str_precompute_threshold = MP_SIZE_T_MAX;
mp_size_t set_str_dc_threshold = MP_SIZE_T_MAX;
mp_size_t set_str_precompute_threshold = MP_SIZE_T_MAX;
-mp_size_t fac_odd_threshold = 0;
-mp_size_t fac_dsc_threshold = FAC_DSC_THRESHOLD_LIMIT;
mp_size_t fft_modf_sqr_threshold = MP_SIZE_T_MAX;
mp_size_t fft_modf_mul_threshold = MP_SIZE_T_MAX;
@@ -236,8 +190,7 @@ struct param_t {
const char *name;
speed_function_t function;
speed_function_t function2;
- double step_factor; /* how much to step relatively */
- int step; /* how much to step absolutely */
+ double step_factor; /* how much to step sizes (rounded down) */
double function_fudge; /* multiplier for "function" speeds */
int stop_since_change;
double stop_factor;
@@ -263,9 +216,6 @@ struct param_t {
#ifndef HAVE_NATIVE_mpn_divexact_1
#define HAVE_NATIVE_mpn_divexact_1 0
#endif
-#ifndef HAVE_NATIVE_mpn_div_qr_1n_pi1
-#define HAVE_NATIVE_mpn_div_qr_1n_pi1 0
-#endif
#ifndef HAVE_NATIVE_mpn_divrem_1
#define HAVE_NATIVE_mpn_divrem_1 0
#endif
@@ -275,9 +225,6 @@ struct param_t {
#ifndef HAVE_NATIVE_mpn_mod_1
#define HAVE_NATIVE_mpn_mod_1 0
#endif
-#ifndef HAVE_NATIVE_mpn_mod_1_1p
-#define HAVE_NATIVE_mpn_mod_1_1p 0
-#endif
#ifndef HAVE_NATIVE_mpn_modexact_1_odd
#define HAVE_NATIVE_mpn_modexact_1_odd 0
#endif
@@ -383,13 +330,12 @@ analyze_dat (int final)
}
-/* Measuring for recompiled mpn/generic/div_qr_1.c,
- * mpn/generic/divrem_1.c, mpn/generic/mod_1.c and mpz/fac_ui.c */
+/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */
-mp_limb_t mpn_div_qr_1_tune (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
-mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
-void mpz_fac_ui_tune (mpz_ptr, unsigned long);
+mp_limb_t mpn_divrem_1_tune
+ __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_tune
+ __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
double
speed_mpn_mod_1_tune (struct speed_params *s)
@@ -401,16 +347,7 @@ speed_mpn_divrem_1_tune (struct speed_params *s)
{
SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
}
-double
-speed_mpz_fac_ui_tune (struct speed_params *s)
-{
- SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
-}
-double
-speed_mpn_div_qr_1_tune (struct speed_params *s)
-{
- SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1_tune);
-}
+
double
tuneup_measure (speed_function_t fun,
@@ -453,7 +390,7 @@ tuneup_measure (speed_function_t fun,
}
-#define PRINT_WIDTH 31
+#define PRINT_WIDTH 28
void
print_define_start (const char *name)
@@ -477,7 +414,6 @@ print_define_end_remark (const char *name, mp_size_t value, const char *remark)
if (remark != NULL)
printf (" /* %s */", remark);
printf ("\n");
- fflush (stdout);
}
void
@@ -519,7 +455,6 @@ one (mp_size_t *threshold, struct param_t *param)
DEFAULT (param->function_fudge, 1.0);
DEFAULT (param->function2, param->function);
DEFAULT (param->step_factor, 0.01); /* small steps by default */
- DEFAULT (param->step, 1); /* small steps by default */
DEFAULT (param->stop_since_change, 80);
DEFAULT (param->stop_factor, 1.2);
DEFAULT (param->min_size, 10);
@@ -575,10 +510,21 @@ one (mp_size_t *threshold, struct param_t *param)
for (s.size = param->min_size;
s.size < param->max_size;
- s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step))
+ s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), 1))
{
double ti, tiplus1, d;
+ /* If there's a size limit and it's reached then it should still
+ be sensible to analyze the data since we want the threshold put
+ either at or near the limit. */
+ if (s.size >= param->max_size)
+ {
+ if (option_trace)
+ printf ("Reached maximum size (%ld) without otherwise stopping\n",
+ (long) param->max_size);
+ break;
+ }
+
/*
FIXME: check minimum size requirements are met, possibly by just
checking for the -1 returns from the speed functions.
@@ -638,7 +584,7 @@ one (mp_size_t *threshold, struct param_t *param)
}
/* Stop if the threshold implied hasn't changed in a certain
- number of measurements. (It's this condition that usually
+ number of measurements. (It's this condition that ususally
stops the loop.) */
if (thresh_idx != new_thresh_idx)
since_thresh_change = 0, thresh_idx = new_thresh_idx;
@@ -713,7 +659,6 @@ struct fft_param_t {
mp_size_t first_size;
mp_size_t max_size;
speed_function_t function;
- speed_function_t mul_modf_function;
speed_function_t mul_function;
mp_size_t sqr;
};
@@ -728,7 +673,7 @@ fft_step_size (int k)
{
mp_size_t step;
- step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS;
+ step = MAX ((mp_size_t) 1 << (k-1), BITS_PER_MP_LIMB) / BITS_PER_MP_LIMB;
step *= (mp_size_t) 1 << k;
if (step <= 0)
@@ -754,435 +699,126 @@ fft_next_size (mp_size_t pl, int k)
return pl;
}
-#define NMAX_DEFAULT 1000000
-#define MAX_REPS 25
-#define MIN_REPS 5
-
-static inline size_t
-mpn_mul_fft_lcm (size_t a, unsigned int k)
-{
- unsigned int l = k;
-
- while (a % 2 == 0 && k > 0)
- {
- a >>= 1;
- k--;
- }
- return a << l;
-}
-
-mp_size_t
-fftfill (mp_size_t pl, int k, int sqr)
-{
- mp_size_t maxLK;
- mp_bitcnt_t N, Nprime, nprime, M;
-
- N = pl * GMP_NUMB_BITS;
- M = N >> k;
-
- maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k);
-
- Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
- nprime = Nprime / GMP_NUMB_BITS;
- if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
- {
- size_t K2;
- for (;;)
- {
- K2 = 1L << mpn_fft_best_k (nprime, sqr);
- if ((nprime & (K2 - 1)) == 0)
- break;
- nprime = (nprime + K2 - 1) & -K2;
- Nprime = nprime * GMP_LIMB_BITS;
- }
- }
- ASSERT_ALWAYS (nprime < pl);
-
- return Nprime;
-}
-
-static int
-compare_double (const void *ap, const void *bp)
-{
- double a = * (const double *) ap;
- double b = * (const double *) bp;
-
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- else
- return 0;
-}
-
-double
-median (double *times, int n)
-{
- qsort (times, n, sizeof (double), compare_double);
- return times[n/2];
-}
-
-#define FFT_CACHE_SIZE 25
-typedef struct fft_cache
-{
- mp_size_t n;
- double time;
-} fft_cache_t;
-
-fft_cache_t fft_cache[FFT_CACHE_SIZE];
-
-double
-cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k,
- int n_measurements)
-{
- int i;
- double t, ttab[MAX_REPS];
-
- if (fft_cache[k].n == n)
- return fft_cache[k].time;
-
- for (i = 0; i < n_measurements; i++)
- {
- speed_starttime ();
- mpn_mul_fft (rp, n, ap, n, bp, n, k);
- ttab[i] = speed_endtime ();
- }
-
- t = median (ttab, n_measurements);
- fft_cache[k].n = n;
- fft_cache[k].time = t;
- return t;
-}
-
-#define INSERT_FFTTAB(idx, nval, kval) \
- do { \
- fft_tab[idx].n = nval; \
- fft_tab[idx].k = kval; \
- fft_tab[idx+1].n = -1; /* sentinel */ \
- fft_tab[idx+1].k = -1; \
- } while (0)
-
-int
-fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print)
-{
- mp_size_t n, n1, prev_n1;
- int k, best_k, last_best_k, kmax;
- int eff, prev_eff;
- double t0, t1;
- int n_measurements;
- mp_limb_t *ap, *bp, *rp;
- mp_size_t alloc;
- char *linepref;
- struct fft_table_nk *fft_tab;
-
- fft_tab = mpn_fft_table3[p->sqr];
-
- for (k = 0; k < FFT_CACHE_SIZE; k++)
- fft_cache[k].n = 0;
-
- if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
- {
- nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD);
- }
-
- if (print)
- printf ("#define %s%*s", p->table_name, 38, "");
-
- if (idx == 0)
- {
- INSERT_FFTTAB (0, nmin, initial_k);
-
- if (print)
- {
- printf ("\\\n { ");
- printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k);
- linepref = " ";
- }
-
- idx = 1;
- }
-
- ap = malloc (sizeof (mp_limb_t));
- if (p->sqr)
- bp = ap;
- else
- bp = malloc (sizeof (mp_limb_t));
- rp = malloc (sizeof (mp_limb_t));
- alloc = 1;
-
- /* Round n to comply to initial k value */
- n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k);
-
- n_measurements = (18 - initial_k) | 1;
- n_measurements = MAX (n_measurements, MIN_REPS);
- n_measurements = MIN (n_measurements, MAX_REPS);
-
- last_best_k = initial_k;
- best_k = initial_k;
-
- while (n < nmax)
- {
- int start_k, end_k;
-
- /* Assume the current best k is best until we hit its next FFT step. */
- t0 = 99999;
-
- prev_n1 = n + 1;
-
- start_k = MAX (4, best_k - 4);
- end_k = MIN (24, best_k + 4);
- for (k = start_k; k <= end_k; k++)
- {
- n1 = mpn_fft_next_size (prev_n1, k);
-
- eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr);
-
- if (eff < 70) /* avoid measuring too slow fft:s */
- continue;
-
- if (n1 > alloc)
- {
- alloc = n1;
- if (p->sqr)
- {
- ap = realloc (ap, sizeof (mp_limb_t));
- rp = realloc (rp, sizeof (mp_limb_t));
- ap = bp = realloc (ap, alloc * sizeof (mp_limb_t));
- mpn_random (ap, alloc);
- rp = realloc (rp, alloc * sizeof (mp_limb_t));
- }
- else
- {
- ap = realloc (ap, sizeof (mp_limb_t));
- bp = realloc (bp, sizeof (mp_limb_t));
- rp = realloc (rp, sizeof (mp_limb_t));
- ap = realloc (ap, alloc * sizeof (mp_limb_t));
- mpn_random (ap, alloc);
- bp = realloc (bp, alloc * sizeof (mp_limb_t));
- mpn_random (bp, alloc);
- rp = realloc (rp, alloc * sizeof (mp_limb_t));
- }
- }
-
- t1 = cached_measure (rp, ap, bp, n1, k, n_measurements);
-
- if (t1 * n_measurements > 0.3)
- n_measurements -= 2;
- n_measurements = MAX (n_measurements, MIN_REPS);
-
- if (t1 < t0)
- {
- best_k = k;
- t0 = t1;
- }
- }
-
- n1 = mpn_fft_next_size (prev_n1, best_k);
-
- if (last_best_k != best_k)
- {
- ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1);
-
- if (idx >= FFT_TABLE3_SIZE)
- {
- printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
- abort ();
- }
- INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k);
-
- if (print)
- {
- printf (", ");
- if (idx % 4 == 0)
- printf ("\\\n ");
- printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
- }
-
- if (option_trace >= 2)
- {
- printf ("{%lu,%u}\n", prev_n1, best_k);
- fflush (stdout);
- }
-
- last_best_k = best_k;
- idx++;
- }
-
- for (;;)
- {
- prev_n1 = n1;
- prev_eff = fftfill (prev_n1, best_k, p->sqr);
- n1 = mpn_fft_next_size (prev_n1 + 1, best_k);
- eff = fftfill (n1, best_k, p->sqr);
-
- if (eff != prev_eff)
- break;
- }
-
- n = prev_n1;
- }
-
- kmax = sizeof (mp_size_t) * 4; /* GMP_MP_SIZE_T_BITS / 2 */
- kmax = MIN (kmax, 25-1);
- for (k = last_best_k + 1; k <= kmax; k++)
- {
- if (idx >= FFT_TABLE3_SIZE)
- {
- printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n");
- abort ();
- }
- INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k);
-
- if (print)
- {
- printf (", ");
- if (idx % 4 == 0)
- printf ("\\\n ");
- printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k);
- }
-
- idx++;
- }
-
- if (print)
- printf (" }\n");
-
- free (ap);
- if (! p->sqr)
- free (bp);
- free (rp);
-
- return idx;
-}
-
void
fft (struct fft_param_t *p)
{
mp_size_t size;
- int k, idx, initial_k;
-
- /*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/
+ int i, k;
-#if 1
- {
- /* Use plain one() mechanism, for some reasonable initial values of k. The
- advantage is that we don't depend on mpn_fft_table3, which can therefore
- leave it completely uninitialized. */
+ for (i = 0; i < numberof (mpn_fft_table[p->sqr]); i++)
+ mpn_fft_table[p->sqr][i] = MP_SIZE_T_MAX;
- static struct param_t param;
- mp_size_t thres, best_thres;
- int best_k;
- char buf[20];
+ *p->p_threshold = MP_SIZE_T_MAX;
+ *p->p_modf_threshold = MP_SIZE_T_MAX;
- best_thres = MP_SIZE_T_MAX;
- best_k = -1;
+ option_trace = MAX (option_trace, option_fft_trace);
- for (k = 5; k <= 7; k++)
- {
- param.name = p->modf_threshold_name;
- param.min_size = 100;
- param.max_size = 2000;
- param.function = p->mul_function;
- param.step_factor = 0.0;
- param.step = 4;
- param.function2 = p->mul_modf_function;
- param.noprint = 1;
- s.r = k;
- one (&thres, &param);
- if (thres < best_thres)
- {
- best_thres = thres;
- best_k = k;
- }
- }
+ printf ("#define %s {", p->table_name);
+ if (option_trace >= 2)
+ printf ("\n");
- *(p->p_modf_threshold) = best_thres;
- sprintf (buf, "k = %d", best_k);
- print_define_remark (p->modf_threshold_name, best_thres, buf);
- initial_k = best_k;
- }
-#else
+ k = FFT_FIRST_K;
size = p->first_size;
for (;;)
{
- double tk, tm;
+ double tk, tk1;
- size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr));
- k = mpn_fft_best_k (size, p->sqr);
+ size = fft_next_size (size+1, k+1);
if (size >= p->max_size)
break;
+ if (k >= FFT_FIRST_K + numberof (mpn_fft_table[p->sqr]))
+ break;
- s.size = size + fft_step_size (k) / 2;
+ /* compare k to k+1 in the middle of the current k+1 step */
+ s.size = size + fft_step_size (k+1) / 2;
s.r = k;
- tk = tuneup_measure (p->mul_modf_function, NULL, &s);
+ tk = tuneup_measure (p->function, NULL, &s);
if (tk == -1.0)
abort ();
- tm = tuneup_measure (p->mul_function, NULL, &s);
- if (tm == -1.0)
+ s.r = k+1;
+ tk1 = tuneup_measure (p->function, NULL, &s);
+ if (tk1 == -1.0)
abort ();
if (option_trace >= 2)
- printf ("at %ld size=%ld k=%d %.9f size=%ld modf %.9f\n",
- (long) size,
- (long) size + fft_step_size (k) / 2, k, tk,
- (long) s.size, tm);
+ printf ("at %ld size=%ld k=%d %.9f k=%d %.9f\n",
+ (long) size, (long) s.size, k, tk, k+1, tk1);
- if (tk < tm)
+ /* declare the k+1 threshold as soon as it's faster at its midpoint */
+ if (tk1 < tk)
{
- *p->p_modf_threshold = s.size;
- print_define (p->modf_threshold_name, *p->p_modf_threshold);
- break;
+ mpn_fft_table[p->sqr][k-FFT_FIRST_K] = s.size;
+ printf (" %ld,", (long) s.size);
+ if (option_trace >= 2) printf ("\n");
+ k++;
}
}
- initial_k = ?;
-#endif
- /*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/
+ mpn_fft_table[p->sqr][k-FFT_FIRST_K] = 0;
+ printf (" 0 }\n");
- idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1);
- printf ("#define %s_SIZE %d\n", p->table_name, idx);
- /*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/
+ size = p->first_size;
- size = 2 * *p->p_modf_threshold; /* OK? */
+ /* Declare an FFT faster than a plain toom4 etc multiplication found as
+ soon as one faster measurement obtained. A multiplication in the
+ middle of the FFT step is tested. */
for (;;)
{
+ int modf = (*p->p_modf_threshold == MP_SIZE_T_MAX);
double tk, tm;
- mp_size_t mulmod_size, mul_size;;
+
+ /* k=7 should be the first FFT which can beat toom4 on a full
+ multiply, so jump to that threshold and save some probing after the
+ modf threshold is found. */
+ if (!modf && size < mpn_fft_table[p->sqr][2])
+ {
+ size = mpn_fft_table[p->sqr][2];
+ if (option_trace >= 2)
+ printf ("jump to size=%ld\n", (long) size);
+ }
+
+ size = fft_next_size (size+1, mpn_fft_best_k (size, p->sqr));
+ k = mpn_fft_best_k (size, p->sqr);
if (size >= p->max_size)
break;
- mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2;
- mul_size = (size + mulmod_size) / 2; /* middle of step */
-
- s.size = mulmod_size;
+ s.size = size + fft_step_size (k) / 2;
+ s.r = k;
tk = tuneup_measure (p->function, NULL, &s);
if (tk == -1.0)
abort ();
- s.size = mul_size;
+ if (!modf) s.size /= 2;
tm = tuneup_measure (p->mul_function, NULL, &s);
if (tm == -1.0)
abort ();
if (option_trace >= 2)
- printf ("at %ld size=%ld %.9f size=%ld mul %.9f\n",
+ printf ("at %ld size=%ld k=%d %.9f size=%ld %s mul %.9f\n",
(long) size,
- (long) mulmod_size, tk,
- (long) mul_size, tm);
-
- size = mulmod_size;
+ (long) size + fft_step_size (k) / 2, k, tk,
+ (long) s.size, modf ? "modf" : "full", tm);
if (tk < tm)
{
- *p->p_threshold = s.size;
- print_define (p->threshold_name, *p->p_threshold);
- break;
+ if (modf)
+ {
+ *p->p_modf_threshold = s.size;
+ print_define (p->modf_threshold_name, *p->p_modf_threshold);
+ }
+ else
+ {
+ *p->p_threshold = s.size;
+ print_define (p->threshold_name, *p->p_threshold);
+ break;
+ }
}
}
+
}
@@ -1190,232 +826,58 @@ fft (struct fft_param_t *p)
/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
giving wrong results. */
void
-tune_mul_n (void)
+tune_mul (void)
{
static struct param_t param;
- mp_size_t next_toom_start;
- int something_changed;
param.function = speed_mpn_mul_n;
- param.name = "MUL_TOOM22_THRESHOLD";
- param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE);
- param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
- one (&mul_toom22_threshold, &param);
+ param.name = "MUL_KARATSUBA_THRESHOLD";
+ param.min_size = MAX (4, MPN_KARA_MUL_N_MINSIZE);
+ param.max_size = MUL_KARATSUBA_THRESHOLD_LIMIT-1;
+ one (&mul_karatsuba_threshold, &param);
- param.noprint = 1;
-
- /* Threshold sequence loop. Disable functions that would be used in a very
- narrow range, re-measuring things when that happens. */
- something_changed = 1;
- while (something_changed)
- {
- something_changed = 0;
-
- next_toom_start = mul_toom22_threshold;
-
- if (mul_toom33_threshold != 0)
- {
- param.name = "MUL_TOOM33_THRESHOLD";
- param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
- param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
- one (&mul_toom33_threshold, &param);
-
- if (next_toom_start * 1.05 >= mul_toom33_threshold)
- {
- mul_toom33_threshold = 0;
- something_changed = 1;
- }
- }
-
- next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
-
- if (mul_toom44_threshold != 0)
- {
- param.name = "MUL_TOOM44_THRESHOLD";
- param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
- param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
- one (&mul_toom44_threshold, &param);
-
- if (next_toom_start * 1.05 >= mul_toom44_threshold)
- {
- mul_toom44_threshold = 0;
- something_changed = 1;
- }
- }
-
- next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
-
- if (mul_toom6h_threshold != 0)
- {
- param.name = "MUL_TOOM6H_THRESHOLD";
- param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
- param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
- one (&mul_toom6h_threshold, &param);
-
- if (next_toom_start * 1.05 >= mul_toom6h_threshold)
- {
- mul_toom6h_threshold = 0;
- something_changed = 1;
- }
- }
-
- next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
-
- if (mul_toom8h_threshold != 0)
- {
- param.name = "MUL_TOOM8H_THRESHOLD";
- param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
- param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
- one (&mul_toom8h_threshold, &param);
-
- if (next_toom_start * 1.05 >= mul_toom8h_threshold)
- {
- mul_toom8h_threshold = 0;
- something_changed = 1;
- }
- }
- }
+ param.name = "MUL_TOOM3_THRESHOLD";
+ param.min_size = MAX (mul_karatsuba_threshold, MPN_TOOM3_MUL_N_MINSIZE);
+ param.max_size = MUL_TOOM3_THRESHOLD_LIMIT-1;
+ one (&mul_toom3_threshold, &param);
- print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
- print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
- print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
- print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
+ param.name = "MUL_TOOM44_THRESHOLD";
+ param.min_size = MAX (mul_toom3_threshold, MPN_TOOM44_MUL_N_MINSIZE);
+ param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
+ one (&mul_toom44_threshold, &param);
/* disabled until tuned */
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
}
-void
-tune_mul (void)
-{
- static struct param_t param;
- mp_size_t thres;
-
- param.noprint = 1;
-
- param.function = speed_mpn_toom32_for_toom43_mul;
- param.function2 = speed_mpn_toom43_for_toom32_mul;
- param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
- param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
- one (&thres, &param);
- mul_toom32_to_toom43_threshold = thres * 17 / 24;
- print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
-
- param.function = speed_mpn_toom32_for_toom53_mul;
- param.function2 = speed_mpn_toom53_for_toom32_mul;
- param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
- param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
- one (&thres, &param);
- mul_toom32_to_toom53_threshold = thres * 19 / 30;
- print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
-
- param.function = speed_mpn_toom42_for_toom53_mul;
- param.function2 = speed_mpn_toom53_for_toom42_mul;
- param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
- param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
- one (&thres, &param);
- mul_toom42_to_toom53_threshold = thres * 11 / 20;
- print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
-
- param.function = speed_mpn_toom42_mul;
- param.function2 = speed_mpn_toom63_mul;
- param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
- param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
- one (&thres, &param);
- mul_toom42_to_toom63_threshold = thres / 2;
- print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
-
- /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
- param.function = speed_mpn_toom43_for_toom54_mul;
- param.function2 = speed_mpn_toom54_for_toom43_mul;
- param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
- param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
- one (&thres, &param);
- mul_toom43_to_toom54_threshold = thres * 5 / 6;
- print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
-}
-
+/* This was written by the tuneup challenged tege. Kevin, please delete
+ this comment when you've reviewed/rewritten this. :-) */
void
-tune_mullo (void)
+tune_mullow (void)
{
static struct param_t param;
- param.function = speed_mpn_mullo_n;
+ param.function = speed_mpn_mullow_n;
- param.name = "MULLO_BASECASE_THRESHOLD";
- param.min_size = 1;
+ param.name = "MULLOW_BASECASE_THRESHOLD";
+ param.min_size = 3;
param.min_is_always = 1;
- param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1;
- param.stop_factor = 1.5;
- param.noprint = 1;
- one (&mullo_basecase_threshold, &param);
-
- param.name = "MULLO_DC_THRESHOLD";
- param.min_size = 8;
- param.min_is_always = 0;
- param.max_size = 1000;
- one (&mullo_dc_threshold, &param);
+ param.max_size = MULLOW_BASECASE_THRESHOLD_LIMIT-1;
+ one (&mullow_basecase_threshold, &param);
- if (mullo_basecase_threshold >= mullo_dc_threshold)
- {
- print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold);
- print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase");
- }
- else
- {
- print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold);
- print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold);
- }
-
-#if WANT_FFT
- param.name = "MULLO_MUL_N_THRESHOLD";
- param.min_size = mullo_dc_threshold;
- param.max_size = 2 * mul_fft_threshold;
- param.noprint = 0;
- param.step_factor = 0.03;
- one (&mullo_mul_n_threshold, &param);
-#else
- print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX,
- "without FFT use mullo forever");
-#endif
-}
-
-void
-tune_mulmid (void)
-{
- static struct param_t param;
+ param.min_is_always = 0; /* ??? */
- param.name = "MULMID_TOOM42_THRESHOLD";
- param.function = speed_mpn_mulmid_n;
- param.min_size = 4;
- param.max_size = 100;
- one (&mulmid_toom42_threshold, &param);
-}
-
-void
-tune_mulmod_bnm1 (void)
-{
- static struct param_t param;
-
- param.name = "MULMOD_BNM1_THRESHOLD";
- param.function = speed_mpn_mulmod_bnm1;
- param.min_size = 4;
- param.max_size = 100;
- one (&mulmod_bnm1_threshold, &param);
-}
-
-void
-tune_sqrmod_bnm1 (void)
-{
- static struct param_t param;
+ param.name = "MULLOW_DC_THRESHOLD";
+ param.min_size = mul_karatsuba_threshold;
+ param.max_size = 1000;
+ one (&mullow_dc_threshold, &param);
- param.name = "SQRMOD_BNM1_THRESHOLD";
- param.function = speed_mpn_sqrmod_bnm1;
- param.min_size = 4;
- param.max_size = 100;
- one (&sqrmod_bnm1_threshold, &param);
+ param.name = "MULLOW_MUL_N_THRESHOLD";
+ param.min_size = mullow_dc_threshold;
+ param.max_size = 2000;
+ one (&mullow_mul_n_threshold, &param);
}
@@ -1438,345 +900,130 @@ tune_sqr (void)
{
static struct param_t param;
param.name = "SQR_BASECASE_THRESHOLD";
- param.function = speed_mpn_sqr;
+ param.function = speed_mpn_sqr_n;
param.min_size = 3;
param.min_is_always = 1;
- param.max_size = TUNE_SQR_TOOM2_MAX;
+ param.max_size = TUNE_SQR_KARATSUBA_MAX;
param.noprint = 1;
one (&sqr_basecase_threshold, &param);
}
{
static struct param_t param;
- param.name = "SQR_TOOM2_THRESHOLD";
- param.function = speed_mpn_sqr;
- param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE);
- param.max_size = TUNE_SQR_TOOM2_MAX;
+ param.name = "SQR_KARATSUBA_THRESHOLD";
+ param.function = speed_mpn_sqr_n;
+ param.min_size = MAX (4, MPN_KARA_SQR_N_MINSIZE);
+ param.max_size = TUNE_SQR_KARATSUBA_MAX;
param.noprint = 1;
- one (&sqr_toom2_threshold, &param);
+ one (&sqr_karatsuba_threshold, &param);
if (! HAVE_NATIVE_mpn_sqr_basecase
- && sqr_toom2_threshold < sqr_basecase_threshold)
+ && sqr_karatsuba_threshold < sqr_basecase_threshold)
{
/* Karatsuba becomes faster than mul_basecase before
sqr_basecase does. Arrange for the expression
- "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which
- selects mpn_sqr_basecase in mpn_sqr to be false, by setting
- SQR_TOOM2_THRESHOLD to zero, making
- SQR_BASECASE_THRESHOLD the toom2 threshold. */
+ "BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))" which
+ selects mpn_sqr_basecase in mpn_sqr_n to be false, by setting
+ SQR_KARATSUBA_THRESHOLD to zero, making
+ SQR_BASECASE_THRESHOLD the karatsuba threshold. */
- sqr_basecase_threshold = SQR_TOOM2_THRESHOLD;
- SQR_TOOM2_THRESHOLD = 0;
+ sqr_basecase_threshold = SQR_KARATSUBA_THRESHOLD;
+ SQR_KARATSUBA_THRESHOLD = 0;
print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold,
- "toom2");
- print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD,
+ "karatsuba");
+ print_define_remark ("SQR_KARATSUBA_THRESHOLD",SQR_KARATSUBA_THRESHOLD,
"never sqr_basecase");
}
else
{
if (! HAVE_NATIVE_mpn_sqr_basecase)
print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold);
- print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD);
+ print_define ("SQR_KARATSUBA_THRESHOLD", SQR_KARATSUBA_THRESHOLD);
}
}
{
static struct param_t param;
- mp_size_t next_toom_start;
- int something_changed;
+ mp_size_t toom3_start = MAX (sqr_karatsuba_threshold, sqr_basecase_threshold);
- param.function = speed_mpn_sqr;
- param.noprint = 1;
+ param.function = speed_mpn_sqr_n;
- /* Threshold sequence loop. Disable functions that would be used in a very
- narrow range, re-measuring things when that happens. */
- something_changed = 1;
- while (something_changed)
- {
- something_changed = 0;
-
- next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
-
- sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
- param.name = "SQR_TOOM3_THRESHOLD";
- param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
- param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
- one (&sqr_toom3_threshold, &param);
-
- next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
-
- if (sqr_toom4_threshold != 0)
- {
- param.name = "SQR_TOOM4_THRESHOLD";
- sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
- param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
- param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
- one (&sqr_toom4_threshold, &param);
-
- if (next_toom_start * 1.05 >= sqr_toom4_threshold)
- {
- sqr_toom4_threshold = 0;
- something_changed = 1;
- }
- }
-
- next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
-
- if (sqr_toom6_threshold != 0)
- {
- param.name = "SQR_TOOM6_THRESHOLD";
- sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
- param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
- param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
- one (&sqr_toom6_threshold, &param);
-
- if (next_toom_start * 1.05 >= sqr_toom6_threshold)
- {
- sqr_toom6_threshold = 0;
- something_changed = 1;
- }
- }
-
- next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
-
- if (sqr_toom8_threshold != 0)
- {
- param.name = "SQR_TOOM8_THRESHOLD";
- sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
- param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
- param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
- one (&sqr_toom8_threshold, &param);
-
- if (next_toom_start * 1.05 >= sqr_toom8_threshold)
- {
- sqr_toom8_threshold = 0;
- something_changed = 1;
- }
- }
- }
+ param.name = "SQR_TOOM3_THRESHOLD";
+ param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_N_MINSIZE);
+ param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
+ one (&sqr_toom3_threshold, &param);
- print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
- print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
- print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
- print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
+ param.name = "SQR_TOOM4_THRESHOLD";
+ param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_N_MINSIZE);
+ param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
+ one (&sqr_toom4_threshold, &param);
}
}
void
-tune_dc_div (void)
+tune_sb_preinv (void)
{
- s.r = 0; /* clear to make speed function do 2n/n */
- {
- static struct param_t param;
- param.name = "DC_DIV_QR_THRESHOLD";
- param.function = speed_mpn_sbpi1_div_qr;
- param.function2 = speed_mpn_dcpi1_div_qr;
- param.min_size = 6;
- one (&dc_div_qr_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "DC_DIVAPPR_Q_THRESHOLD";
- param.function = speed_mpn_sbpi1_divappr_q;
- param.function2 = speed_mpn_dcpi1_divappr_q;
- param.min_size = 6;
- one (&dc_divappr_q_threshold, &param);
- }
-}
+ static struct param_t param;
-static double
-speed_mpn_sbordcpi1_div_qr (struct speed_params *s)
-{
- if (s->size < DC_DIV_QR_THRESHOLD)
- return speed_mpn_sbpi1_div_qr (s);
- else
- return speed_mpn_dcpi1_div_qr (s);
-}
+ if (GMP_NAIL_BITS != 0)
+ {
+ DIV_SB_PREINV_THRESHOLD = MP_SIZE_T_MAX;
+ print_define_remark ("DIV_SB_PREINV_THRESHOLD", MP_SIZE_T_MAX,
+ "no preinv with nails");
+ return;
+ }
-void
-tune_mu_div (void)
-{
- s.r = 0; /* clear to make speed function do 2n/n */
- {
- static struct param_t param;
- param.name = "MU_DIV_QR_THRESHOLD";
- param.function = speed_mpn_dcpi1_div_qr;
- param.function2 = speed_mpn_mu_div_qr;
- param.min_size = mul_toom22_threshold;
- param.max_size = 5000;
- param.step_factor = 0.02;
- one (&mu_div_qr_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "MU_DIVAPPR_Q_THRESHOLD";
- param.function = speed_mpn_dcpi1_divappr_q;
- param.function2 = speed_mpn_mu_divappr_q;
- param.min_size = mul_toom22_threshold;
- param.max_size = 5000;
- param.step_factor = 0.02;
- one (&mu_divappr_q_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "MUPI_DIV_QR_THRESHOLD";
- param.function = speed_mpn_sbordcpi1_div_qr;
- param.function2 = speed_mpn_mupi_div_qr;
- param.min_size = 6;
- param.min_is_always = 1;
- param.max_size = 1000;
- param.step_factor = 0.02;
- one (&mupi_div_qr_threshold, &param);
- }
-}
+ if (UDIV_PREINV_ALWAYS)
+ {
+ print_define_remark ("DIV_SB_PREINV_THRESHOLD", 0L, "preinv always");
+ return;
+ }
-void
-tune_dc_bdiv (void)
-{
- s.r = 0; /* clear to make speed function do 2n/n*/
- {
- static struct param_t param;
- param.name = "DC_BDIV_QR_THRESHOLD";
- param.function = speed_mpn_sbpi1_bdiv_qr;
- param.function2 = speed_mpn_dcpi1_bdiv_qr;
- param.min_size = 4;
- one (&dc_bdiv_qr_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "DC_BDIV_Q_THRESHOLD";
- param.function = speed_mpn_sbpi1_bdiv_q;
- param.function2 = speed_mpn_dcpi1_bdiv_q;
- param.min_size = 4;
- one (&dc_bdiv_q_threshold, &param);
- }
+ param.check_size = 256;
+ param.min_size = 3;
+ param.min_is_always = 1;
+ param.size_extra = 3;
+ param.stop_factor = 2.0;
+ param.name = "DIV_SB_PREINV_THRESHOLD";
+ param.function = speed_mpn_sb_divrem_m3;
+ one (&div_sb_preinv_threshold, &param);
}
-void
-tune_mu_bdiv (void)
-{
- s.r = 0; /* clear to make speed function do 2n/n*/
- {
- static struct param_t param;
- param.name = "MU_BDIV_QR_THRESHOLD";
- param.function = speed_mpn_dcpi1_bdiv_qr;
- param.function2 = speed_mpn_mu_bdiv_qr;
- param.min_size = mul_toom22_threshold;
- param.max_size = 5000;
- param.step_factor = 0.02;
- one (&mu_bdiv_qr_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "MU_BDIV_Q_THRESHOLD";
- param.function = speed_mpn_dcpi1_bdiv_q;
- param.function2 = speed_mpn_mu_bdiv_q;
- param.min_size = mul_toom22_threshold;
- param.max_size = 5000;
- param.step_factor = 0.02;
- one (&mu_bdiv_q_threshold, &param);
- }
-}
void
-tune_invertappr (void)
+tune_dc (void)
{
static struct param_t param;
-
- param.function = speed_mpn_ni_invertappr;
- param.name = "INV_MULMOD_BNM1_THRESHOLD";
- param.min_size = 4;
- one (&inv_mulmod_bnm1_threshold, &param);
-
- param.function = speed_mpn_invertappr;
- param.name = "INV_NEWTON_THRESHOLD";
- param.min_size = 3;
- one (&inv_newton_threshold, &param);
+ param.name = "DIV_DC_THRESHOLD";
+ param.function = speed_mpn_dc_tdiv_qr;
+ param.step_factor = 0.02;
+ one (&div_dc_threshold, &param);
}
-void
-tune_invert (void)
-{
- static struct param_t param;
- param.function = speed_mpn_invert;
- param.name = "INV_APPR_THRESHOLD";
- param.min_size = 3;
- one (&inv_appr_threshold, &param);
-}
+/* This is an indirect determination, based on a comparison between redc and
+ mpz_mod. A fudge factor of 1.04 is applied to redc, to represent
+ additional overheads it gets in mpz_powm.
+
+ stop_factor is 1.1 to hopefully help cray vector systems, where otherwise
+ currently it hits the 1000 limb limit with only a factor of about 1.18
+ (threshold should be around 650). */
void
-tune_binvert (void)
+tune_powm (void)
{
static struct param_t param;
-
- param.function = speed_mpn_binvert;
- param.name = "BINV_NEWTON_THRESHOLD";
- param.min_size = 8; /* pointless with smaller operands */
- one (&binv_newton_threshold, &param);
+ param.name = "POWM_THRESHOLD";
+ param.function = speed_mpn_redc_1;
+ param.function2 = speed_mpz_mod;
+ param.step_factor = 0.03;
+ param.stop_factor = 1.1;
+ param.function_fudge = 1.04;
+ one (&powm_threshold, &param);
}
-void
-tune_redc (void)
-{
-#define TUNE_REDC_2_MAX 100
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
-
-#if WANT_REDC_2
- {
- static struct param_t param;
- param.name = "REDC_1_TO_REDC_2_THRESHOLD";
- param.function = speed_mpn_redc_1;
- param.function2 = speed_mpn_redc_2;
- param.min_size = 1;
- param.min_is_always = 1;
- param.max_size = TUNE_REDC_2_MAX;
- param.noprint = 1;
- param.stop_factor = 1.5;
- one (&redc_1_to_redc_2_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "REDC_2_TO_REDC_N_THRESHOLD";
- param.function = speed_mpn_redc_2;
- param.function2 = speed_mpn_redc_n;
- param.min_size = 16;
- param.noprint = 1;
- one (&redc_2_to_redc_n_threshold, &param);
- }
- if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
- {
- redc_2_to_redc_n_threshold = 0; /* disable redc_2 */
-
- /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
- REDC_1_TO_REDC_2_THRESHOLD. */
- {
- static struct param_t param;
- param.name = "REDC_1_TO_REDC_2_THRESHOLD";
- param.function = speed_mpn_redc_1;
- param.function2 = speed_mpn_redc_n;
- param.min_size = 16;
- param.noprint = 1;
- one (&redc_1_to_redc_2_threshold, &param);
- }
- }
- print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
- print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
-#else
- {
- static struct param_t param;
- param.name = "REDC_1_TO_REDC_N_THRESHOLD";
- param.function = speed_mpn_redc_1;
- param.function2 = speed_mpn_redc_n;
- param.min_size = 16;
- one (&redc_1_to_redc_n_threshold, &param);
- }
-#endif
-}
void
tune_matrix22_mul (void)
@@ -1799,30 +1046,17 @@ tune_hgcd (void)
one (&hgcd_threshold, &param);
}
+#if 0
void
-tune_hgcd_appr (void)
-{
- static struct param_t param;
- param.name = "HGCD_APPR_THRESHOLD";
- param.function = speed_mpn_hgcd_appr;
- /* We seem to get strange results for small sizes */
- param.min_size = 50;
- param.stop_since_change = 150;
- one (&hgcd_appr_threshold, &param);
-}
-
-void
-tune_hgcd_reduce (void)
+tune_gcd_accel (void)
{
static struct param_t param;
- param.name = "HGCD_REDUCE_THRESHOLD";
- param.function = speed_mpn_hgcd_reduce;
- param.min_size = 30;
- param.max_size = 7000;
- param.step_factor = 0.04;
- one (&hgcd_reduce_threshold, &param);
+ param.name = "GCD_ACCEL_THRESHOLD";
+ param.function = speed_mpn_gcd;
+ param.min_size = 1;
+ one (&gcd_accel_threshold, &param);
}
-
+#endif
void
tune_gcd_dc (void)
{
@@ -1847,139 +1081,6 @@ tune_gcdext_dc (void)
one (&gcdext_dc_threshold, &param);
}
-/* In tune_powm_sec we compute the table used by the win_size function. The
- cutoff points are in exponent bits, disregarding other operand sizes. It is
- not possible to use the one framework since it currently uses a granularity
- of full limbs.
-*/
-
-/* This win_size replaces the variant in the powm code, allowing us to
- control k in the k-ary algorithms. */
-int winsize;
-int
-win_size (mp_bitcnt_t eb)
-{
- return winsize;
-}
-
-void
-tune_powm_sec (void)
-{
- mp_size_t n;
- int k, i;
- mp_size_t itch;
- mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
- const int n_max = 3000 / GMP_NUMB_BITS;
- const int n_measurements = 5;
- mp_ptr rp, bp, ep, mp, tp;
- double ttab[n_measurements], tk, tkp1;
- TMP_DECL;
- TMP_MARK;
-
- possible_nbits_cutoff = 0;
-
- k = 1;
-
- winsize = 10; /* the itch function needs this */
- itch = mpn_sec_powm_itch (n_max, n_max * GMP_NUMB_BITS, n_max);
-
- rp = TMP_ALLOC_LIMBS (n_max);
- bp = TMP_ALLOC_LIMBS (n_max);
- ep = TMP_ALLOC_LIMBS (n_max);
- mp = TMP_ALLOC_LIMBS (n_max);
- tp = TMP_ALLOC_LIMBS (itch);
-
- mpn_random (bp, n_max);
- mpn_random (mp, n_max);
- mp[0] |= 1;
-
-/* How about taking the M operand size into account?
-
- An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
- B = O(M)).
-
- Using k-ary and no sliding window, the precomputation will need time
- O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
- O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
-
- An operation R=powm_sec(B,E,N) will take time like powm.
-
- Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
- main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
- O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
- table reads, respectively. */
-
- printf ("#define POWM_SEC_TABLE ");
-
- /* For nbits == 1, we should always use k == 1, so no need to tune
- that. Starting with nbits == 2 also ensure that nbits always is
- larger than the windowsize k+1. */
- for (nbits = 2; nbits <= n_max * GMP_NUMB_BITS; )
- {
- n = (nbits - 1) / GMP_NUMB_BITS + 1;
-
- /* Generate E such that sliding-window for k and k+1 works equally
- well/poorly (but sliding is not used in powm_sec, of course). */
- for (i = 0; i < n; i++)
- ep[i] = ~CNST_LIMB(0);
-
- winsize = k;
- for (i = 0; i < n_measurements; i++)
- {
- speed_starttime ();
- mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
- ttab[i] = speed_endtime ();
- }
- tk = median (ttab, n_measurements);
-
- winsize = k + 1;
- speed_starttime ();
- for (i = 0; i < n_measurements; i++)
- {
- speed_starttime ();
- mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp);
- ttab[i] = speed_endtime ();
- }
- tkp1 = median (ttab, n_measurements);
-/*
- printf ("testing: %ld, %d", nbits, k, ep[n-1]);
- printf (" %10.5f %10.5f\n", tk, tkp1);
-*/
- if (tkp1 < tk)
- {
- if (possible_nbits_cutoff)
- {
- /* Two consecutive sizes indicate k increase, obey. */
-
- /* Must always have x[k] >= k */
- ASSERT_ALWAYS (possible_nbits_cutoff >= k);
-
- if (k > 1)
- printf (",");
- printf ("%ld", (long) possible_nbits_cutoff);
- k++;
- possible_nbits_cutoff = 0;
- }
- else
- {
- /* One measurement indicate k increase, save nbits for further
- consideration. */
- /* The new larger k gets used for sizes > the cutoff
- value, hence the cutoff should be one less than the
- smallest size where it gives a speedup. */
- possible_nbits_cutoff = nbits - 1;
- }
- }
- else
- possible_nbits_cutoff = 0;
-
- nbits_next = nbits * 65 / 64;
- nbits = nbits_next + (nbits_next == nbits);
- }
- printf ("\n");
- TMP_FREE;
-}
-
/* size_extra==1 reflects the fact that with high<divisor one division is
always skipped. Forcing high<divisor while testing ensures consistency
@@ -1999,7 +1100,7 @@ tune_powm_sec (void)
param.stop_factor = 2.0;
-double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
+double (*tuned_speed_mpn_divrem_1) __GMP_PROTO ((struct speed_params *));
void
tune_divrem_1 (void)
@@ -2052,59 +1153,15 @@ tune_divrem_1 (void)
}
}
-void
-tune_div_qr_1 (void)
-{
- static struct param_t param;
- double t1, t2;
-
- if (!HAVE_NATIVE_mpn_div_qr_1n_pi1)
- {
- static struct param_t param;
- double t1, t2;
-
- s.size = 10;
- s.r = randlimb_norm ();
-
- t1 = tuneup_measure (speed_mpn_div_qr_1n_pi1_1, &param, &s);
- t2 = tuneup_measure (speed_mpn_div_qr_1n_pi1_2, &param, &s);
-
- if (t1 == -1.0 || t2 == -1.0)
- {
- printf ("Oops, can't measure all mpn_div_qr_1n_pi1 methods at %ld\n",
- (long) s.size);
- abort ();
- }
- div_qr_1n_pi1_method = (t1 < t2) ? 1 : 2;
- print_define ("DIV_QR_1N_PI1_METHOD", div_qr_1n_pi1_method);
- }
-
- {
- static struct param_t param;
- param.name = "DIV_QR_1_NORM_THRESHOLD";
- DIV_1_PARAMS;
- param.min_size = 1;
- param.min_is_always = 0;
- s.r = randlimb_norm ();
- param.function = speed_mpn_div_qr_1_tune;
- one (&div_qr_1_norm_threshold, &param);
- }
- {
- static struct param_t param;
- param.name = "DIV_QR_1_UNNORM_THRESHOLD";
- DIV_1_PARAMS;
- param.min_size = 1;
- param.min_is_always = 0;
- s.r = randlimb_half();
- param.function = speed_mpn_div_qr_1_tune;
- one (&div_qr_1_unnorm_threshold, &param);
- }
-}
+double (*tuned_speed_mpn_mod_1) __GMP_PROTO ((struct speed_params *));
void
tune_mod_1 (void)
{
+ /* plain version by default */
+ tuned_speed_mpn_mod_1 = speed_mpn_mod_1;
+
/* No support for tuning native assembler code, do that by hand and put
the results in the .asm file, there's no need for such thresholds to
appear in gmp-mparam.h. */
@@ -2120,27 +1177,6 @@ tune_mod_1 (void)
return;
}
- if (!HAVE_NATIVE_mpn_mod_1_1p)
- {
- static struct param_t param;
- double t1, t2;
-
- s.size = 10;
- s.r = randlimb_half ();
-
- t1 = tuneup_measure (speed_mpn_mod_1_1_1, &param, &s);
- t2 = tuneup_measure (speed_mpn_mod_1_1_2, &param, &s);
-
- if (t1 == -1.0 || t2 == -1.0)
- {
- printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n",
- (long) s.size);
- abort ();
- }
- mod_1_1p_method = (t1 < t2) ? 1 : 2;
- print_define ("MOD_1_1P_METHOD", mod_1_1p_method);
- }
-
if (UDIV_PREINV_ALWAYS)
{
print_define ("MOD_1_NORM_THRESHOLD", 0L);
@@ -2148,6 +1184,8 @@ tune_mod_1 (void)
}
else
{
+ tuned_speed_mpn_mod_1 = speed_mpn_mod_1_tune;
+
{
static struct param_t param;
param.name = "MOD_1_NORM_THRESHOLD";
@@ -2168,80 +1206,26 @@ tune_mod_1 (void)
{
static struct param_t param;
- param.check_size = 256;
-
- s.r = randlimb_norm ();
+ s.r = GMP_NUMB_MASK / 5;
param.function = speed_mpn_mod_1_tune;
-
- param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD";
- param.min_size = 2;
- one (&mod_1n_to_mod_1_1_threshold, &param);
- }
-
- {
- static struct param_t param;
-
- param.check_size = 256;
- s.r = randlimb_half ();
- param.noprint = 1;
-
- param.function = speed_mpn_mod_1_1;
- param.function2 = speed_mpn_mod_1_2;
- param.min_is_always = 1;
- param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD";
- param.min_size = 2;
- one (&mod_1_1_to_mod_1_2_threshold, &param);
-
- param.function = speed_mpn_mod_1_2;
- param.function2 = speed_mpn_mod_1_4;
- param.min_is_always = 1;
- param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD";
param.min_size = 1;
- one (&mod_1_2_to_mod_1_4_threshold, &param);
- if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold)
- {
- /* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */
- mod_1_2_to_mod_1_4_threshold = 0;
-
- param.function = speed_mpn_mod_1_1;
- param.function2 = speed_mpn_mod_1_4;
- param.min_is_always = 1;
- param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake";
- param.min_size = 2;
- one (&mod_1_1_to_mod_1_2_threshold, &param);
- }
+ param.name = "MOD_1_1_THRESHOLD";
+ one (&mod_1_1_threshold, &param);
- param.function = speed_mpn_mod_1_tune;
- param.function2 = NULL;
- param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD";
- param.min_size = 2;
- param.min_is_always = 0;
- one (&mod_1u_to_mod_1_1_threshold, &param);
-
- if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold)
- mod_1_1_to_mod_1_2_threshold = 0;
- if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold)
- mod_1_2_to_mod_1_4_threshold = 0;
-
- print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL);
- print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold,
- mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL);
- print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold,
- mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL);
- }
+ param.name = "MOD_1_2_THRESHOLD";
+ param.min_size = mod_1_1_threshold + 1;
+ one (&mod_1_2_threshold, &param);
- {
- static struct param_t param;
-
- param.check_size = 256;
+#if 0
+ param.name = "MOD_1_3_THRESHOLD";
+ param.min_size = mod_1_2_threshold + 1;
+ one (&mod_1_3_threshold, &param);
+#endif
- param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD";
- s.r = randlimb_norm ();
- param.function = speed_mpn_preinv_mod_1;
- param.function2 = speed_mpn_mod_1_tune;
- param.min_size = 1;
- one (&preinv_mod_1_to_mod_1_threshold, &param);
+ param.name = "MOD_1_4_THRESHOLD";
+ param.min_size = mod_1_2_threshold + 1;
+ one (&mod_1_4_threshold, &param);
}
}
@@ -2318,6 +1302,72 @@ tune_preinv_divrem_1 (void)
}
+/* A non-zero MOD_1_UNNORM_THRESHOLD (or MOD_1_NORM_THRESHOLD) would imply
+ that udiv_qrnnd_preinv is worth using, but it seems most straightforward
+ to compare mpn_preinv_mod_1 and mpn_mod_1_div directly. */
+
+void
+tune_preinv_mod_1 (void)
+{
+ static struct param_t param;
+ speed_function_t mod_1;
+ const char *mod_1_name;
+ double t1, t2;
+
+ /* Any native version of mpn_preinv_mod_1 is assumed to exist because it's
+ faster than mpn_mod_1. */
+ if (HAVE_NATIVE_mpn_preinv_mod_1)
+ {
+ print_define_remark ("USE_PREINV_MOD_1", 1, "native");
+ return;
+ }
+
+ if (GMP_NAIL_BITS != 0)
+ {
+ print_define_remark ("USE_PREINV_MOD_1", 0, "no preinv with nails");
+ return;
+ }
+
+ /* If udiv_qrnnd_preinv is the only division method then of course
+ mpn_preinv_mod_1 should be used. */
+ if (UDIV_PREINV_ALWAYS)
+ {
+ print_define_remark ("USE_PREINV_MOD_1", 1, "preinv always");
+ return;
+ }
+
+ /* If we've got an assembler version of mpn_mod_1, then compare against
+ that, not the mpn_mod_1_div generic C. */
+ if (HAVE_NATIVE_mpn_mod_1)
+ {
+ mod_1 = speed_mpn_mod_1;
+ mod_1_name = "mpn_mod_1";
+ }
+ else
+ {
+ mod_1 = speed_mpn_mod_1_div;
+ mod_1_name = "mpn_mod_1_div";
+ }
+
+ param.data_high = DATA_HIGH_LT_R; /* let mpn_mod_1 skip one division */
+ s.size = 200; /* generous but not too big */
+ s.r = randlimb_norm(); /* divisor */
+
+ t1 = tuneup_measure (speed_mpn_preinv_mod_1, &param, &s);
+ t2 = tuneup_measure (mod_1, &param, &s);
+ if (t1 == -1.0 || t2 == -1.0)
+ {
+ printf ("Oops, can't measure mpn_preinv_mod_1 and %s at %ld\n",
+ mod_1_name, (long) s.size);
+ abort ();
+ }
+ if (option_trace >= 1)
+ printf ("size=%ld, mpn_preinv_mod_1 %.9f, %s %.9f\n",
+ (long) s.size, t1, mod_1_name, t2);
+
+ print_define_remark ("USE_PREINV_MOD_1", (mp_size_t) (t1 < t2), NULL);
+}
+
void
tune_divrem_2 (void)
@@ -2361,16 +1411,6 @@ tune_divrem_2 (void)
one (&divrem_2_threshold, &param);
}
-void
-tune_div_qr_2 (void)
-{
- static struct param_t param;
- param.name = "DIV_QR_2_PI2_THRESHOLD";
- param.function = speed_mpn_div_qr_2n;
- param.check_size = 500;
- param.min_size = 4;
- one (&div_qr_2_pi2_threshold, &param);
-}
/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
tune for that. Its speed can differ on odd or even divisor, so take an
@@ -2464,22 +1504,22 @@ tune_modexact_1_odd (void)
static struct param_t param;
mp_size_t thresh_lt, thresh_ge, average;
-#if 0
/* Any native mpn_modexact_1_odd is assumed to incorporate all the speed
of a full mpn_mod_1. */
if (HAVE_NATIVE_mpn_modexact_1_odd)
{
- print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1");
+ print_define_remark ("MODEXACT_1_ODD_THRESHOLD", 0, "always (native)");
return;
}
-#endif
- param.name = "BMOD_1_TO_MOD_1_THRESHOLD";
+ ASSERT_ALWAYS (tuned_speed_mpn_mod_1 != NULL);
+
+ param.name = "MODEXACT_1_ODD_THRESHOLD";
param.check_size = 256;
param.min_size = 2;
param.stop_factor = 1.5;
- param.function = speed_mpn_modexact_1c_odd;
- param.function2 = speed_mpn_mod_1_tune;
+ param.function = tuned_speed_mpn_mod_1;
+ param.function2 = speed_mpn_modexact_1c_odd;
param.noprint = 1;
s.r = randlimb_half () | 1;
@@ -2514,10 +1554,10 @@ void
tune_jacobi_base (void)
{
static struct param_t param;
- double t1, t2, t3, t4;
+ double t1, t2, t3;
int method;
- s.size = GMP_LIMB_BITS * 3 / 4;
+ s.size = BITS_PER_MP_LIMB * 3 / 4;
t1 = tuneup_measure (speed_mpn_jacobi_base_1, &param, &s);
if (option_trace >= 1)
@@ -2531,25 +1571,19 @@ tune_jacobi_base (void)
if (option_trace >= 1)
printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
- t4 = tuneup_measure (speed_mpn_jacobi_base_4, &param, &s);
- if (option_trace >= 1)
- printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4);
-
- if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)
+ if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0)
{
printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
(long) s.size);
abort ();
}
- if (t1 < t2 && t1 < t3 && t1 < t4)
+ if (t1 < t2 && t1 < t3)
method = 1;
- else if (t2 < t3 && t2 < t4)
+ else if (t2 < t3)
method = 2;
- else if (t3 < t4)
- method = 3;
else
- method = 4;
+ method = 3;
print_define ("JACOBI_BASE_METHOD", method);
}
@@ -2607,7 +1641,8 @@ speed_mpn_pre_set_str (struct speed_params *s)
for (i = 0; i < s->size; i++)
str[i] = s->xp[i] % base;
- LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
+ wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly))
+ / BITS_PER_MP_LIMB + 2;
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
/* use this during development to check wn is big enough */
@@ -2615,11 +1650,11 @@ speed_mpn_pre_set_str (struct speed_params *s)
ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn);
*/
- speed_operand_src (s, (mp_ptr) str, s->size/GMP_LIMB_BYTES);
+ speed_operand_src (s, (mp_ptr) str, s->size/BYTES_PER_MP_LIMB);
speed_operand_dst (s, wp, wn);
speed_cache_fill (s);
- chars_per_limb = mp_bases[base].chars_per_limb;
+ chars_per_limb = __mp_bases[base].chars_per_limb;
un = s->size / chars_per_limb + 1;
powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
@@ -2641,6 +1676,8 @@ speed_mpn_pre_set_str (struct speed_params *s)
void
tune_set_str (void)
{
+ static struct param_t param;
+
s.r = 10; /* decimal */
{
static struct param_t param;
@@ -2672,15 +1709,14 @@ tune_fft_mul (void)
if (option_fft_max_size == 0)
return;
- param.table_name = "MUL_FFT_TABLE3";
+ param.table_name = "MUL_FFT_TABLE";
param.threshold_name = "MUL_FFT_THRESHOLD";
param.p_threshold = &mul_fft_threshold;
param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
param.p_modf_threshold = &mul_fft_modf_threshold;
- param.first_size = MUL_TOOM33_THRESHOLD / 2;
+ param.first_size = MUL_TOOM3_THRESHOLD / 2;
param.max_size = option_fft_max_size;
- param.function = speed_mpn_fft_mul;
- param.mul_modf_function = speed_mpn_mul_fft;
+ param.function = speed_mpn_mul_fft;
param.mul_function = speed_mpn_mul_n;
param.sqr = 0;
fft (&param);
@@ -2695,38 +1731,19 @@ tune_fft_sqr (void)
if (option_fft_max_size == 0)
return;
- param.table_name = "SQR_FFT_TABLE3";
+ param.table_name = "SQR_FFT_TABLE";
param.threshold_name = "SQR_FFT_THRESHOLD";
param.p_threshold = &sqr_fft_threshold;
param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
param.p_modf_threshold = &sqr_fft_modf_threshold;
param.first_size = SQR_TOOM3_THRESHOLD / 2;
param.max_size = option_fft_max_size;
- param.function = speed_mpn_fft_sqr;
- param.mul_modf_function = speed_mpn_mul_fft_sqr;
- param.mul_function = speed_mpn_sqr;
- param.sqr = 1;
+ param.function = speed_mpn_mul_fft_sqr;
+ param.mul_function = speed_mpn_sqr_n;
+ param.sqr = 0;
fft (&param);
}
-void
-tune_fac_ui (void)
-{
- static struct param_t param;
-
- param.function = speed_mpz_fac_ui_tune;
-
- param.name = "FAC_DSC_THRESHOLD";
- param.min_size = 70;
- param.max_size = FAC_DSC_THRESHOLD_LIMIT;
- one (&fac_dsc_threshold, &param);
-
- param.name = "FAC_ODD_THRESHOLD";
- param.min_size = 22;
- param.stop_factor = 1.7;
- param.min_is_always = 1;
- one (&fac_odd_threshold, &param);
-}
void
all (void)
@@ -2794,81 +1811,52 @@ all (void)
}
printf ("\n");
- tune_divrem_1 ();
- tune_mod_1 ();
- tune_preinv_divrem_1 ();
- tune_div_qr_1 ();
-#if 0
- tune_divrem_2 ();
-#endif
- tune_div_qr_2 ();
- tune_divexact_1 ();
- tune_modexact_1_odd ();
- printf("\n");
-
- tune_mul_n ();
- printf("\n");
-
tune_mul ();
printf("\n");
tune_sqr ();
printf("\n");
- tune_mulmid ();
+ tune_mullow ();
printf("\n");
- tune_mulmod_bnm1 ();
- tune_sqrmod_bnm1 ();
- printf("\n");
-
- tune_fft_mul ();
- printf("\n");
-
- tune_fft_sqr ();
- printf ("\n");
-
- tune_mullo ();
- printf("\n");
-
- tune_dc_div ();
- tune_dc_bdiv ();
-
- printf("\n");
- tune_invertappr ();
- tune_invert ();
- printf("\n");
-
- tune_binvert ();
- tune_redc ();
- printf("\n");
-
- tune_mu_div ();
- tune_mu_bdiv ();
- printf("\n");
-
- tune_powm_sec ();
+ tune_sb_preinv ();
+ tune_dc ();
+ tune_powm ();
printf("\n");
tune_matrix22_mul ();
tune_hgcd ();
- tune_hgcd_appr ();
- tune_hgcd_reduce();
tune_gcd_dc ();
tune_gcdext_dc ();
+#if 0
+ tune_gcd_accel ();
+#endif
tune_jacobi_base ();
printf("\n");
+ tune_divrem_1 ();
+ tune_mod_1 ();
+ tune_preinv_divrem_1 ();
+ tune_preinv_mod_1 ();
+ tune_divrem_2 ();
+ tune_divexact_1 ();
+ tune_modexact_1_odd ();
+ printf("\n");
+
tune_get_str ();
tune_set_str ();
printf("\n");
- tune_fac_ui ();
+ tune_fft_mul ();
printf("\n");
+ tune_fft_sqr ();
+ printf ("\n");
+
time (&end_time);
printf ("/* Tuneup completed successfully, took %ld seconds */\n",
- (long) (end_time - start_time));
+ end_time - start_time);
TMP_FREE;
}
diff --git a/gmp/tune/x86_64.asm b/gmp/tune/x86_64.asm
index b7ec44c544..509909002a 100644
--- a/gmp/tune/x86_64.asm
+++ b/gmp/tune/x86_64.asm
@@ -1,32 +1,21 @@
dnl x86 pentium time stamp counter access routine.
-dnl Copyright 1999, 2000, 2003-2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')