diff options
Diffstat (limited to 'gmp/tune')
55 files changed, 1939 insertions, 5926 deletions
diff --git a/gmp/tune/Makefile.am b/gmp/tune/Makefile.am index bbe503a201..3c7f62c820 100644 --- a/gmp/tune/Makefile.am +++ b/gmp/tune/Makefile.am @@ -1,32 +1,21 @@ ## Process this file with automake to generate Makefile.in -# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests @@ -52,14 +41,13 @@ EXTRA_LTLIBRARIES = libspeed.la libspeed_la_SOURCES = \ common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \ - div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1_tune.c \ - freq.c \ + freq.c \ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ - hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \ - jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ - mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \ + jacbase1.c jacbase2.c jacbase3.c \ + mod_1_div.c mod_1_inv.c modlinv.c \ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ - set_strb.c set_strs.c set_strp.c time.c + set_strb.c set_strs.c set_strp.c time.c \ + sb_div.c sb_inv.c libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la @@ -81,10 +69,10 @@ $(top_builddir)/tests/libtests.la: # program. This can always be forced with "make speed_LDFLAGS=-all-static # ..." if desired, see tune/README. -EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup tune-gcd-p +EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup DEPENDENCIES = libspeed.la -LDADD = $(DEPENDENCIES) $(TUNE_LIBS) +LDADD = $(DEPENDENCIES) speed_SOURCES = speed.c speed_LDFLAGS = $(STATIC) @@ -95,15 +83,11 @@ speed_ext_SOURCES = speed-ext.c speed_ext_LDFLAGS = $(STATIC) tuneup_SOURCES = tuneup.c -nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS) +nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS) tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la -tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS) +tuneup_LDADD = $(tuneup_DEPENDENCIES) tuneup_LDFLAGS = $(STATIC) -tune_gcd_p_SOURCES = tune-gcd-p.c -tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c -tune_gcd_p_LDFLAGS = $(STATIC) - tune: $(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT) @@ -113,7 +97,7 @@ allprogs: $(EXTRA_PROGRAMS) # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \ - $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \ + $(TUNE_MPN_SRCS) sqr_asm.asm \ stg.gnuplot stg.data \ mtg.gnuplot mtg.data \ fibg.gnuplot fibg.data \ @@ -139,16 +123,9 @@ DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN) # recompiled object will be rebuilt if that file changes. TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c -TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \ - dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \ - invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \ - get_str.c set_str.c matrix22_mul.c \ - hgcd.c hgcd_appr.c hgcd_reduce.c \ - mul_n.c sqr.c sec_powm.c \ - mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \ - mulmid.c mulmid_n.c toom42_mulmid.c \ - nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ - toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c +TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c gcdext.c get_str.c \ + set_str.c matrix22_mul.c hgcd.c mul_n.c toom44_mul.c toom4_sqr.c \ + mullow_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c $(TUNE_MPN_SRCS_BASIC): for i in $(TUNE_MPN_SRCS_BASIC); do \ @@ -167,15 +144,60 @@ mod_1.c: echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm - echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm + echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm -# FIXME: Should it depend on $(top_builddir)/fac_ui.h too? -fac_ui.c: $(top_builddir)/mpz/fac_ui.c - echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c - echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c - echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c - echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c - echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c include ../mpn/Makeasm.am + + +# "mk" is multiplication in the karatsuba range +# "st" is squaring in the toom-cook range, etc +# "g" forms produce graphs + +mk: + ./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n + +MTS = -s 50-150 -c +mt: + ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n +mtg: + ./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n + +sk: + ./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n + +STS = -s 50-150 -c +st: + ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n +stg: + ./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n + +dc: + ./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr + +fib: + ./speed -s 40-60 -c mpz_fib_ui +fibg: + ./speed -s 10-300 -P fibg mpz_fib_ui + + +gcd: + ./speed -s 1-20 -c mpn_gcd + +udiv: + ./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c + +divn: + ./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1 +divun: + ./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345 +modn: + ./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1 +modun: + ./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345 + + +graph: + ./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr + gnuplot graph.gnuplot diff --git a/gmp/tune/Makefile.in b/gmp/tune/Makefile.in index 07abb022a3..c3698319d9 100644 --- a/gmp/tune/Makefile.in +++ b/gmp/tune/Makefile.in @@ -1,9 +1,8 @@ -# Makefile.in generated by automake 1.11.6 from Makefile.am. +# Makefile.in generated by automake 1.8.4 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software -# Foundation, Inc. +# 2003, 2004 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -15,85 +14,52 @@ @SET_MAKE@ -# Copyright 2000-2003, 2005-2011 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. -# Copyright 1996, 1998-2002 Free Software Foundation, Inc. -# -# This file is part of the GNU MP Library. +# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, +# Inc. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# This file is part of the GNU MP Library. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. # -# or +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. +SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) $(tuneup_SOURCES) $(nodist_tuneup_SOURCES) + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ VPATH = @srcdir@ -am__make_dryrun = \ - { \ - am__dry=no; \ - case $$MAKEFLAGS in \ - *\\[\ \ ]*) \ - echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ - | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ - *) \ - for am__flg in $$MAKEFLAGS; do \ - case $$am__flg in \ - *=*|--*) ;; \ - *n*) am__dry=yes; break;; \ - esac; \ - done;; \ - esac; \ - test $$am__dry = yes; \ - } pkgdatadir = $(datadir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ -pkglibexecdir = $(libexecdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c @@ -105,110 +71,74 @@ POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : -build_triplet = @build@ host_triplet = @host@ +ANSI2KNR = $(top_builddir)/ansi2knr EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \ - speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT) + speed-ext$(EXEEXT) tuneup$(EXEEXT) DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/../mpn/Makeasm.am \ $(srcdir)/Makefile.am $(srcdir)/Makefile.in subdir = tune ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ - $(top_srcdir)/configure.ac + $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d +mkinstalldirs = $(mkdir_p) CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = -CONFIG_CLEAN_VPATH_FILES = am__DEPENDENCIES_1 = am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la -am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \ - divrem2div.lo divrem2inv.lo div_qr_1n_pi1_1.lo \ - div_qr_1n_pi1_2.lo div_qr_1_tune.lo freq.lo gcdext_single.lo \ - gcdext_double.lo gcdextod.lo gcdextos.lo hgcd_lehmer.lo \ - hgcd_appr_lehmer.lo hgcd_reduce_1.lo hgcd_reduce_2.lo \ - jacbase1.lo jacbase2.lo jacbase3.lo jacbase4.lo mod_1_div.lo \ - mod_1_inv.lo mod_1_1-1.lo mod_1_1-2.lo modlinv.lo noop.lo \ - powm_mod.lo powm_redc.lo pre_divrem_1.lo set_strb.lo \ - set_strs.lo set_strp.lo time.lo +am_libspeed_la_OBJECTS = common$U.lo divrem1div$U.lo divrem1inv$U.lo \ + divrem2div$U.lo divrem2inv$U.lo freq$U.lo gcdext_single$U.lo \ + gcdext_double$U.lo gcdextod$U.lo gcdextos$U.lo jacbase1$U.lo \ + jacbase2$U.lo jacbase3$U.lo mod_1_div$U.lo mod_1_inv$U.lo \ + modlinv$U.lo noop$U.lo powm_mod$U.lo powm_redc$U.lo \ + pre_divrem_1$U.lo set_strb$U.lo set_strs$U.lo set_strp$U.lo \ + time$U.lo sb_div$U.lo sb_inv$U.lo libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS) -libspeed_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@ -am_speed_OBJECTS = speed.$(OBJEXT) +am_speed_OBJECTS = speed$U.$(OBJEXT) speed_OBJECTS = $(am_speed_OBJECTS) speed_LDADD = $(LDADD) -speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1) -speed_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(speed_LDFLAGS) \ - $(LDFLAGS) -o $@ -am_speed_dynamic_OBJECTS = speed.$(OBJEXT) +am__DEPENDENCIES_3 = libspeed.la +speed_DEPENDENCIES = $(am__DEPENDENCIES_3) +am_speed_dynamic_OBJECTS = speed$U.$(OBJEXT) speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS) speed_dynamic_LDADD = $(LDADD) -speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1) -am_speed_ext_OBJECTS = speed-ext.$(OBJEXT) +speed_dynamic_DEPENDENCIES = $(am__DEPENDENCIES_3) +am_speed_ext_OBJECTS = speed-ext$U.$(OBJEXT) speed_ext_OBJECTS = $(am_speed_ext_OBJECTS) speed_ext_LDADD = $(LDADD) -speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1) -speed_ext_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(speed_ext_LDFLAGS) $(LDFLAGS) -o $@ -am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT) -tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS) -tune_gcd_p_LDADD = $(LDADD) -tune_gcd_p_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@ -am_tuneup_OBJECTS = tuneup.$(OBJEXT) -am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \ - dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \ - dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \ - invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \ - divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \ - get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \ - hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \ - mul_n.$(OBJEXT) sqr.$(OBJEXT) sec_powm.$(OBJEXT) \ - mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \ - tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \ - mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \ - nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \ - toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \ - toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \ - toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT) -am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT) -nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \ - $(am__objects_2) +speed_ext_DEPENDENCIES = $(am__DEPENDENCIES_3) +am_tuneup_OBJECTS = tuneup$U.$(OBJEXT) +am__objects_1 = dc_divrem_n$U.$(OBJEXT) divrem_2$U.$(OBJEXT) \ + gcd$U.$(OBJEXT) gcdext$U.$(OBJEXT) get_str$U.$(OBJEXT) \ + set_str$U.$(OBJEXT) matrix22_mul$U.$(OBJEXT) hgcd$U.$(OBJEXT) \ + mul_n$U.$(OBJEXT) toom44_mul$U.$(OBJEXT) toom4_sqr$U.$(OBJEXT) \ + mullow_n$U.$(OBJEXT) mul_fft$U.$(OBJEXT) mul$U.$(OBJEXT) \ + sb_divrem_mn$U.$(OBJEXT) tdiv_qr$U.$(OBJEXT) +am__objects_2 = $(am__objects_1) divrem_1$U.$(OBJEXT) \ + mod_1$U.$(OBJEXT) +nodist_tuneup_OBJECTS = sqr_basecase$U.$(OBJEXT) $(am__objects_2) tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS) -am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la -tuneup_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(tuneup_LDFLAGS) \ - $(LDFLAGS) -o $@ -DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +am__DEPENDENCIES_4 = $(am__DEPENDENCIES_1) libspeed.la +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) depcomp = am__depfiles_maybe = COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) -LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ +LINK = $(LIBTOOL) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \ $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \ - $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \ - $(nodist_tuneup_SOURCES) + $(tuneup_SOURCES) $(nodist_tuneup_SOURCES) DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \ $(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \ - $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) -am__can_run_installinfo = \ - case $$AM_UPDATE_INFO_DIR in \ - n|no|NO) false;; \ - *) (install-info --version) >/dev/null 2>&1;; \ - esac + $(tuneup_SOURCES) HEADERS = $(noinst_HEADERS) ETAGS = etags CTAGS = ctags @@ -223,6 +153,7 @@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ +BITS_PER_MP_LIMB = @BITS_PER_MP_LIMB@ CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@ CC = @CC@ CCAS = @CCAS@ @@ -238,17 +169,16 @@ CYGPATH_W = @CYGPATH_W@ DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@ DEFS = @DEFS@ DLLTOOL = @DLLTOOL@ -DSYMUTIL = @DSYMUTIL@ -DUMPBIN = @DUMPBIN@ +ECHO = @ECHO@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ +ENABLE_STATIC_FALSE = @ENABLE_STATIC_FALSE@ +ENABLE_STATIC_TRUE = @ENABLE_STATIC_TRUE@ EXEEXT = @EXEEXT@ EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@ -FGREP = @FGREP@ GMP_LDFLAGS = @GMP_LDFLAGS@ -GMP_LIMB_BITS = @GMP_LIMB_BITS@ GMP_NAIL_BITS = @GMP_NAIL_BITS@ GREP = @GREP@ HAVE_CLOCK_01 = @HAVE_CLOCK_01@ @@ -262,12 +192,10 @@ HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@ HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@ HAVE_STACK_T_01 = @HAVE_STACK_T_01@ HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@ -INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LD = @LD@ LDFLAGS = @LDFLAGS@ LEX = @LEX@ LEXLIB = @LEXLIB@ @@ -282,26 +210,20 @@ LIBOBJS = @LIBOBJS@ LIBREADLINE = @LIBREADLINE@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ -LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAINT = @MAINT@ +MAINTAINER_MODE_FALSE = @MAINTAINER_MODE_FALSE@ +MAINTAINER_MODE_TRUE = @MAINTAINER_MODE_TRUE@ MAKEINFO = @MAKEINFO@ -MANIFEST_TOOL = @MANIFEST_TOOL@ -MKDIR_P = @MKDIR_P@ -NM = @NM@ -NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ -OTOOL = @OTOOL@ -OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ RANLIB = @RANLIB@ @@ -311,31 +233,26 @@ SHELL = @SHELL@ SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@ STRIP = @STRIP@ TAL_OBJECT = @TAL_OBJECT@ -TUNE_LIBS = @TUNE_LIBS@ TUNE_SQR_OBJ = @TUNE_SQR_OBJ@ +U = @U@ U_FOR_BUILD = @U_FOR_BUILD@ VERSION = @VERSION@ +WANT_CXX_FALSE = @WANT_CXX_FALSE@ +WANT_CXX_TRUE = @WANT_CXX_TRUE@ +WANT_MPBSD_FALSE = @WANT_MPBSD_FALSE@ +WANT_MPBSD_TRUE = @WANT_MPBSD_TRUE@ WITH_READLINE_01 = @WITH_READLINE_01@ YACC = @YACC@ YFLAGS = @YFLAGS@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ -ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__leading_dot = @am__leading_dot@ -am__tar = @am__tar@ -am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ -builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ @@ -359,6 +276,7 @@ mandir = @mandir@ mkdir_p = @mkdir_p@ mpn_objects = @mpn_objects@ mpn_objs_in_libgmp = @mpn_objs_in_libgmp@ +mpn_objs_in_libmp = @mpn_objs_in_libmp@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ @@ -366,12 +284,8 @@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ -top_build_prefix = @top_build_prefix@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests EXTRA_DIST = alpha.asm pentium.asm sparcv9.asm hppa.asm hppa2.asm hppa2w.asm \ ia64.asm powerpc.asm powerpc64.asm x86_64.asm many.pl @@ -389,14 +303,13 @@ noinst_HEADERS = speed.h EXTRA_LTLIBRARIES = libspeed.la libspeed_la_SOURCES = \ common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \ - div_qr_1n_pi1_1.c div_qr_1n_pi1_2.c div_qr_1_tune.c \ - freq.c \ + freq.c \ gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \ - hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \ - jacbase1.c jacbase2.c jacbase3.c jacbase4.c \ - mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \ + jacbase1.c jacbase2.c jacbase3.c \ + mod_1_div.c mod_1_inv.c modlinv.c \ noop.c powm_mod.c powm_redc.c pre_divrem_1.c \ - set_strb.c set_strs.c set_strp.c time.c + set_strb.c set_strs.c set_strp.c time.c \ + sb_div.c sb_inv.c libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la @@ -404,24 +317,21 @@ libspeed_la_DEPENDENCIES = $(SPEED_CYCLECOUNTER_OBJ) \ libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM) libspeed_la_LDFLAGS = $(STATIC) DEPENDENCIES = libspeed.la -LDADD = $(DEPENDENCIES) $(TUNE_LIBS) +LDADD = $(DEPENDENCIES) speed_SOURCES = speed.c speed_LDFLAGS = $(STATIC) speed_dynamic_SOURCES = speed.c speed_ext_SOURCES = speed-ext.c speed_ext_LDFLAGS = $(STATIC) tuneup_SOURCES = tuneup.c -nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS) +nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS) tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la -tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS) +tuneup_LDADD = $(tuneup_DEPENDENCIES) tuneup_LDFLAGS = $(STATIC) -tune_gcd_p_SOURCES = tune-gcd-p.c -tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c -tune_gcd_p_LDFLAGS = $(STATIC) # $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \ - $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \ + $(TUNE_MPN_SRCS) sqr_asm.asm \ stg.gnuplot stg.data \ mtg.gnuplot mtg.data \ fibg.gnuplot fibg.data \ @@ -446,16 +356,9 @@ DISTCLEANFILES = sqr_basecase.c $(MANY_DISTCLEAN) # FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the # recompiled object will be rebuilt if that file changes. TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c -TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \ - dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \ - invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \ - get_str.c set_str.c matrix22_mul.c \ - hgcd.c hgcd_appr.c hgcd_reduce.c \ - mul_n.c sqr.c sec_powm.c \ - mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \ - mulmid.c mulmid_n.c toom42_mulmid.c \ - nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \ - toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c +TUNE_MPN_SRCS_BASIC = dc_divrem_n.c divrem_2.c gcd.c gcdext.c get_str.c \ + set_str.c matrix22_mul.c hgcd.c mul_n.c toom44_mul.c toom4_sqr.c \ + mullow_n.c mul_fft.c mul.c sb_divrem_mn.c tdiv_qr.c # COMPILE minus CC. @@ -486,6 +389,8 @@ SUFFIXES = .s .S .asm # can be overridden during development, eg. "make RM_TMP=: mul_1.lo" RM_TMP = rm -f +MTS = -s 50-150 -c +STS = -s 50-150 -c all: all-am .SUFFIXES: @@ -494,14 +399,14 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/.. @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ - ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ - && { if test -f $@; then exit 0; else break; fi; }; \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ exit 1;; \ esac; \ done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \ - $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu --ignore-deps tune/Makefile + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tune/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu --ignore-deps tune/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ @@ -511,7 +416,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; -$(srcdir)/../mpn/Makeasm.am: $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh @@ -520,30 +424,31 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(am__aclocal_m4_deps): -libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES) - $(libspeed_la_LINK) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS) -speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES) +libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) + $(LINK) $(libspeed_la_LDFLAGS) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS) +speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) @rm -f speed$(EXEEXT) - $(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS) -speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES) + $(LINK) $(speed_LDFLAGS) $(speed_OBJECTS) $(speed_LDADD) $(LIBS) +speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) @rm -f speed-dynamic$(EXEEXT) - $(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS) -speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES) + $(LINK) $(speed_dynamic_LDFLAGS) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS) +speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) @rm -f speed-ext$(EXEEXT) - $(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS) -tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES) - @rm -f tune-gcd-p$(EXEEXT) - $(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS) -tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES) + $(LINK) $(speed_ext_LDFLAGS) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS) +tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) @rm -f tuneup$(EXEEXT) - $(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS) + $(LINK) $(tuneup_LDFLAGS) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c +$(top_builddir)/ansi2knr: + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ansi2knr + +mostlyclean-kr: + -test "$U" = "" || rm -f *_.c .c.o: $(COMPILE) -c $< @@ -553,6 +458,127 @@ distclean-compile: .c.lo: $(LTCOMPILE) -c -o $@ $< +common_.c: common.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/common.c; then echo $(srcdir)/common.c; else echo common.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +dc_divrem_n_.c: dc_divrem_n.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dc_divrem_n.c; then echo $(srcdir)/dc_divrem_n.c; else echo dc_divrem_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +divrem1div_.c: divrem1div.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1div.c; then echo $(srcdir)/divrem1div.c; else echo divrem1div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +divrem1inv_.c: divrem1inv.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1inv.c; then echo $(srcdir)/divrem1inv.c; else echo divrem1inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +divrem2div_.c: divrem2div.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2div.c; then echo $(srcdir)/divrem2div.c; else echo divrem2div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +divrem2inv_.c: divrem2inv.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2inv.c; then echo $(srcdir)/divrem2inv.c; else echo divrem2inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +divrem_1_.c: divrem_1.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +divrem_2_.c: divrem_2.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +freq_.c: freq.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/freq.c; then echo $(srcdir)/freq.c; else echo freq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +gcd_.c: gcd.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +gcdext_.c: gcdext.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +gcdext_double_.c: gcdext_double.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_double.c; then echo $(srcdir)/gcdext_double.c; else echo gcdext_double.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +gcdext_single_.c: gcdext_single.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_single.c; then echo $(srcdir)/gcdext_single.c; else echo gcdext_single.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +gcdextod_.c: gcdextod.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextod.c; then echo $(srcdir)/gcdextod.c; else echo gcdextod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +gcdextos_.c: gcdextos.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextos.c; then echo $(srcdir)/gcdextos.c; else echo gcdextos.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +get_str_.c: get_str.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +hgcd_.c: hgcd.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +jacbase1_.c: jacbase1.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase1.c; then echo $(srcdir)/jacbase1.c; else echo jacbase1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +jacbase2_.c: jacbase2.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase2.c; then echo $(srcdir)/jacbase2.c; else echo jacbase2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +jacbase3_.c: jacbase3.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase3.c; then echo $(srcdir)/jacbase3.c; else echo jacbase3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mod_1_.c: mod_1.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mod_1_div_.c: mod_1_div.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_div.c; then echo $(srcdir)/mod_1_div.c; else echo mod_1_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mod_1_inv_.c: mod_1_inv.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_inv.c; then echo $(srcdir)/mod_1_inv.c; else echo mod_1_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +modlinv_.c: modlinv.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/modlinv.c; then echo $(srcdir)/modlinv.c; else echo modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mul_.c: mul.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mul_fft_.c: mul_fft.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mul_n_.c: mul_n.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +mullow_n_.c: mullow_n.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullow_n.c; then echo $(srcdir)/mullow_n.c; else echo mullow_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +noop_.c: noop.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/noop.c; then echo $(srcdir)/noop.c; else echo noop.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +powm_mod_.c: powm_mod.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_mod.c; then echo $(srcdir)/powm_mod.c; else echo powm_mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +powm_redc_.c: powm_redc.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_redc.c; then echo $(srcdir)/powm_redc.c; else echo powm_redc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +sb_div_.c: sb_div.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_div.c; then echo $(srcdir)/sb_div.c; else echo sb_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +sb_divrem_mn_.c: sb_divrem_mn.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_divrem_mn.c; then echo $(srcdir)/sb_divrem_mn.c; else echo sb_divrem_mn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +sb_inv_.c: sb_inv.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sb_inv.c; then echo $(srcdir)/sb_inv.c; else echo sb_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +set_str_.c: set_str.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +set_strb_.c: set_strb.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strb.c; then echo $(srcdir)/set_strb.c; else echo set_strb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +set_strp_.c: set_strp.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strp.c; then echo $(srcdir)/set_strp.c; else echo set_strp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +set_strs_.c: set_strs.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strs.c; then echo $(srcdir)/set_strs.c; else echo set_strs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +speed_.c: speed.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed.c; then echo $(srcdir)/speed.c; else echo speed.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +speed-ext_.c: speed-ext.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed-ext.c; then echo $(srcdir)/speed-ext.c; else echo speed-ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +time_.c: time.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/time.c; then echo $(srcdir)/time.c; else echo time.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +toom44_mul_.c: toom44_mul.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +tuneup_.c: tuneup.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tuneup.c; then echo $(srcdir)/tuneup.c; else echo tuneup.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ +common_.$(OBJEXT) common_.lo dc_divrem_n_.$(OBJEXT) dc_divrem_n_.lo \ +divrem1div_.$(OBJEXT) divrem1div_.lo divrem1inv_.$(OBJEXT) \ +divrem1inv_.lo divrem2div_.$(OBJEXT) divrem2div_.lo \ +divrem2inv_.$(OBJEXT) divrem2inv_.lo divrem_1_.$(OBJEXT) divrem_1_.lo \ +divrem_2_.$(OBJEXT) divrem_2_.lo freq_.$(OBJEXT) freq_.lo \ +gcd_.$(OBJEXT) gcd_.lo gcdext_.$(OBJEXT) gcdext_.lo \ +gcdext_double_.$(OBJEXT) gcdext_double_.lo gcdext_single_.$(OBJEXT) \ +gcdext_single_.lo gcdextod_.$(OBJEXT) gcdextod_.lo gcdextos_.$(OBJEXT) \ +gcdextos_.lo get_str_.$(OBJEXT) get_str_.lo hgcd_.$(OBJEXT) hgcd_.lo \ +jacbase1_.$(OBJEXT) jacbase1_.lo jacbase2_.$(OBJEXT) jacbase2_.lo \ +jacbase3_.$(OBJEXT) jacbase3_.lo matrix22_mul_.$(OBJEXT) \ +matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo mod_1_div_.$(OBJEXT) \ +mod_1_div_.lo mod_1_inv_.$(OBJEXT) mod_1_inv_.lo modlinv_.$(OBJEXT) \ +modlinv_.lo mul_.$(OBJEXT) mul_.lo mul_fft_.$(OBJEXT) mul_fft_.lo \ +mul_n_.$(OBJEXT) mul_n_.lo mullow_n_.$(OBJEXT) mullow_n_.lo \ +noop_.$(OBJEXT) noop_.lo powm_mod_.$(OBJEXT) powm_mod_.lo \ +powm_redc_.$(OBJEXT) powm_redc_.lo pre_divrem_1_.$(OBJEXT) \ +pre_divrem_1_.lo sb_div_.$(OBJEXT) sb_div_.lo sb_divrem_mn_.$(OBJEXT) \ +sb_divrem_mn_.lo sb_inv_.$(OBJEXT) sb_inv_.lo set_str_.$(OBJEXT) \ +set_str_.lo set_strb_.$(OBJEXT) set_strb_.lo set_strp_.$(OBJEXT) \ +set_strp_.lo set_strs_.$(OBJEXT) set_strs_.lo speed_.$(OBJEXT) \ +speed_.lo speed-ext_.$(OBJEXT) speed-ext_.lo sqr_basecase_.$(OBJEXT) \ +sqr_basecase_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo time_.$(OBJEXT) \ +time_.lo toom44_mul_.$(OBJEXT) toom44_mul_.lo toom4_sqr_.$(OBJEXT) \ +toom4_sqr_.lo tuneup_.$(OBJEXT) tuneup_.lo : $(ANSI2KNR) mostlyclean-libtool: -rm -f *.lo @@ -560,85 +586,83 @@ mostlyclean-libtool: clean-libtool: -rm -rf .libs _libs +distclean-libtool: + -rm -f libtool +uninstall-info-am: + ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) - set x; \ + tags=; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - shift; \ - if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - if test $$# -gt 0; then \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - "$$@" $$unique; \ - else \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$unique; \ - fi; \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -z "$$unique" && unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - test -z "$(CTAGS_ARGS)$$unique" \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$unique + $$tags $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ - && $(am__cd) $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) "$$here" + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ + $(mkdir_p) $(distdir)/../mpn + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d "$(distdir)/$$file"; then \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ fi; \ - cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ else \ - test -f "$(distdir)/$$file" \ - || cp -p $$d/$$file "$(distdir)/$$file" \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ || exit 1; \ fi; \ done @@ -656,23 +680,17 @@ install-am: all-am installcheck: installcheck-am install-strip: - if test -z '$(STRIP)'; then \ - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - install; \ - else \ - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ - fi + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -rm -f $(CONFIG_CLEAN_FILES) -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) maintainer-clean-generic: @@ -685,7 +703,7 @@ clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags + distclean-libtool distclean-tags dvi: dvi-am @@ -693,38 +711,18 @@ dvi-am: html: html-am -html-am: - info: info-am info-am: install-data-am: -install-dvi: install-dvi-am - -install-dvi-am: - install-exec-am: -install-html: install-html-am - -install-html-am: - install-info: install-info-am -install-info-am: - install-man: -install-pdf: install-pdf-am - -install-pdf-am: - -install-ps: install-ps-am - -install-ps-am: - installcheck-am: maintainer-clean: maintainer-clean-am @@ -733,7 +731,7 @@ maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am -mostlyclean-am: mostlyclean-compile mostlyclean-generic \ +mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \ mostlyclean-libtool pdf: pdf-am @@ -744,22 +742,18 @@ ps: ps-am ps-am: -uninstall-am: - -.MAKE: install-am install-strip +uninstall-am: uninstall-info-am .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libtool ctags distclean distclean-compile \ distclean-generic distclean-libtool distclean-tags distdir dvi \ dvi-am html html-am info info-am install install-am \ - install-data install-data-am install-dvi install-dvi-am \ - install-exec install-exec-am install-html install-html-am \ - install-info install-info-am install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ + install-data install-data-am install-exec install-exec-am \ + install-info install-info-am install-man install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am + mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \ + pdf-am ps ps-am tags uninstall uninstall-am uninstall-info-am $(top_builddir)/tests/libtests.la: @@ -788,17 +782,9 @@ mod_1.c: echo "#include \"mpn/generic/mod_1.c\"" >>mod_1.c sqr_asm.asm: $(top_builddir)/mpn/sqr_basecase.asm - echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm + echo 'define(SQR_KARATSUBA_THRESHOLD_OVERRIDE,SQR_KARATSUBA_THRESHOLD_MAX)' >sqr_asm.asm echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm -# FIXME: Should it depend on $(top_builddir)/fac_ui.h too? -fac_ui.c: $(top_builddir)/mpz/fac_ui.c - echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c - echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c - echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c - echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c - echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c - # .s assembler, no preprocessing. # .s.o: @@ -855,6 +841,50 @@ fac_ui.c: $(top_builddir)/mpz/fac_ui.c .asm.lo: $(LIBTOOL) --mode=compile --tag=CC $(top_srcdir)/mpn/m4-ccas --m4="$(M4)" $(CCAS) $(COMPILE_FLAGS) `test -f '$<' || echo '$(srcdir)/'`$< +# "mk" is multiplication in the karatsuba range +# "st" is squaring in the toom-cook range, etc +# "g" forms produce graphs + +mk: + ./speed -s 5-40 -c mpn_mul_basecase mpn_kara_mul_n +mt: + ./speed $(MTS) mpn_kara_mul_n mpn_toom3_mul_n +mtg: + ./speed $(MTS) -P mtg mpn_kara_mul_n mpn_toom3_mul_n + +sk: + ./speed -s 5-40 -c mpn_sqr_basecase mpn_kara_sqr_n +st: + ./speed $(STS) mpn_kara_sqr_n mpn_toom3_sqr_n +stg: + ./speed $(STS) -P stg mpn_kara_sqr_n mpn_toom3_sqr_n + +dc: + ./speed -s 5-40 -c mpn_dc_divrem_sb mpn_dc_divrem_n mpn_dc_tdiv_qr + +fib: + ./speed -s 40-60 -c mpz_fib_ui +fibg: + ./speed -s 10-300 -P fibg mpz_fib_ui + +gcd: + ./speed -s 1-20 -c mpn_gcd + +udiv: + ./speed -s 1 -c udiv_qrnnd udiv_qrnnd_preinv udiv_qrnnd_preinv2norm invert_limb udiv_qrnnd_c + +divn: + ./speed -s 1-30 -c mpn_divrem_1_div.-1 mpn_divrem_1_inv.-1 +divun: + ./speed -s 1-30 -c mpn_divrem_1_div.12345 mpn_divrem_1_inv.12345 +modn: + ./speed -s 1-30 -c mpn_mod_1_div.-1 mpn_mod_1_inv.-1 +modun: + ./speed -s 1-30 -c mpn_mod_1_div.12345 mpn_mod_1_inv.12345 + +graph: + ./speed -s 1-5000 -f 1.02 -P graph mpn_mul_n mpn_sqr + gnuplot graph.gnuplot # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/gmp/tune/README b/gmp/tune/README index f76407f7ca..994231d3be 100644 --- a/gmp/tune/README +++ b/gmp/tune/README @@ -1,30 +1,19 @@ -Copyright 2000-2002, 2004 Free Software Foundation, Inc. +Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. @@ -84,27 +73,6 @@ SCO OpenUNIX 8 /etc/hw running the speed program repeatedly then set a GMP_CPU_FREQUENCY environment variable (see TIME BASE section below). -Timing on GNU/Linux - - On Linux, timing currently uses the cycle counter. This is unreliable, - since the counter is not saved and restored at context switches (unlike - FreeBSD and Solaris where the cycle counter is "virtualized"). - - Using the clock_gettime method with CLOCK_PROCESS_CPUTIME_ID (posix) or - CLOCK_VIRTUAL (BSD) should be more reliable. To get clock_gettime - with glibc, one has to link with -lrt (which also drags in the pthreads - threading library). configure.in must be hacked to detect this and - arrange proper linking. Something like - - old_LIBS="$LIBS" - AC_SEARCH_LIBS(clock_gettime, rt, [AC_DEFINE(HAVE_CLOCK_GETTIME)]) - TUNE_LIBS="$LIBS" - LIBS="$old_LIBS" - - AC_SUBST(TUNE_LIBS) - - might work. - Low resolution timebase Parameter tuning can be very time consuming if the only timebase @@ -117,7 +85,7 @@ Low resolution timebase PARAMETER TUNING The "tuneup" program runs some tests designed to find the best settings for -various thresholds, like MUL_TOOM22_THRESHOLD. Its output can be put +various thresholds, like MUL_KARATSUBA_THRESHOLD. Its output can be put into gmp-mparam.h. The program is built and run with make tune @@ -298,16 +266,10 @@ mpn_divrem_1, using division by 32 as an example. EXAMPLE COMPARISONS - MULTIPLICATION -mul_basecase takes a ".<r>" parameter. If positive, it gives the second -(smaller) operand size. For example to show speeds for 3x3 up to 20x3 in -cycles, - - ./speed -s 3-20 -c mpn_mul_basecase.3 - -A negative ".<-r>" parameter fixes the size of the product to the absolute -value r. For example to show speeds for 10x10 up to 19x1 in cycles, +mul_basecase takes a ".<r>" parameter which is the first (larger) size +parameter. For example to show speeds for 20x1 up to 20x15 in cycles, - ./speed -s 10-19 -c mpn_mul_basecase.-20 + ./speed -s 1-15 -c mpn_mul_basecase.20 mul_basecase with no parameter does an NxN multiply, so for example to show speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles, @@ -319,7 +281,7 @@ up to twice as fast as mul_basecase. In practice loop overheads and the products on the diagonal mean it falls short of this. Here's an example running the two and showing by what factor an NxN mul_basecase is slower than an NxN sqr_basecase. (Some versions of sqr_basecase only allow sizes -below SQR_TOOM2_THRESHOLD, so if it crashes at that point don't worry.) +below SQR_KARATSUBA_THRESHOLD, so if it crashes at that point don't worry.) ./speed -s 1-20 -r mpn_sqr_basecase mpn_mul_basecase @@ -465,12 +427,12 @@ normal libgmp.la. Note further that the various routines may recurse into themselves on sizes far enough above applicable thresholds. For example, mpn_kara_mul_n will recurse into itself on sizes greater than twice the compiled-in -MUL_TOOM22_THRESHOLD. +MUL_KARATSUBA_THRESHOLD. When doing the above comparison between mul_basecase and kara_mul_n what's probably of interest is mul_basecase versus a kara_mul_n that does one level of Karatsuba then calls to mul_basecase, but this only happens on sizes less -than twice the compiled MUL_TOOM22_THRESHOLD. A larger value for that +than twice the compiled MUL_KARATSUBA_THRESHOLD. A larger value for that setting can be compiled-in to avoid the problem if necessary. The same applies to toom3 and DC, though in a trickier fashion. diff --git a/gmp/tune/alpha.asm b/gmp/tune/alpha.asm index 888c77fe9d..b447462083 100644 --- a/gmp/tune/alpha.asm +++ b/gmp/tune/alpha.asm @@ -1,32 +1,21 @@ dnl Alpha time stamp counter access routine. dnl Copyright 2000, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/common.c b/gmp/tune/common.c index 8d06b4dc02..85a1ddef3f 100644 --- a/gmp/tune/common.c +++ b/gmp/tune/common.c @@ -1,32 +1,22 @@ /* Shared speed subroutines. -Copyright 1999-2006, 2008-2012 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software +Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define __GMP_NO_ATTRIBUTE_CONST_PURE @@ -51,7 +41,6 @@ see https://www.gnu.org/licenses/. */ int speed_option_addrs = 0; int speed_option_verbose = 0; -int speed_option_cycles_broken = 0; /* Provide __clz_tab even if it's not required, for the benefit of new code @@ -72,9 +61,9 @@ pentium_wbinvd(void) if (fd == -2) { - fd = open ("/dev/wbinvd", O_RDWR); - if (fd == -1) - perror ("open /dev/wbinvd"); + fd = open ("/dev/wbinvd", O_RDWR); + if (fd == -1) + perror ("open /dev/wbinvd"); } if (fd != -1) @@ -132,9 +121,10 @@ double_cmp_ptr (const double *p, const double *q) s->r, -1.0 should be returned. See the various base routines below. */ double -speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s) +speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)), + struct speed_params *s) { -#define TOLERANCE 1.01 /* 1% */ +#define TOLERANCE 1.005 /* 0.5% */ const int max_zeros = 10; struct speed_params s_dummy; @@ -157,77 +147,74 @@ speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s) for (i = 0; i < numberof (t); i++) { for (;;) - { - s->src_num = 0; - s->dst_num = 0; - - t[i] = (*fun) (s); - - if (speed_option_verbose >= 3) - gmp_printf("size=%ld reps=%u r=%Md attempt=%d %.9f\n", - (long) s->size, s->reps, s->r, i, t[i]); - - if (t[i] == 0.0) - { - zeros++; - if (zeros > max_zeros) - { - fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros); - abort (); - } - if (s->reps < 10000) - s->reps *= 2; - - continue; - } - - if (t[i] == -1.0) - return -1.0; - - if (t[i] >= speed_unittime * speed_precision) - break; - - /* go to a value of reps to make t[i] >= precision */ - reps_d = ceil (1.1 * s->reps - * speed_unittime * speed_precision - / MAX (t[i], speed_unittime)); - if (reps_d > 2e9 || reps_d < 1.0) - { - fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d); - fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n", - s->reps, speed_unittime, speed_precision, t[i]); - abort (); - } - s->reps = (unsigned) reps_d; - } + { + s->src_num = 0; + s->dst_num = 0; + + t[i] = (*fun) (s); + + if (speed_option_verbose >= 3) + gmp_printf("size=%ld reps=%u r=%Md attempt=%d %.9f\n", + (long) s->size, s->reps, s->r, i, t[i]); + + if (t[i] == 0.0) + { + zeros++; + if (zeros > max_zeros) + { + fprintf (stderr, "Fatal error: too many (%d) failed measurements (0.0)\n", zeros); + abort (); + } + continue; + } + + if (t[i] == -1.0) + return -1.0; + + if (t[i] >= speed_unittime * speed_precision) + break; + + /* go to a value of reps to make t[i] >= precision */ + reps_d = ceil (1.1 * s->reps + * speed_unittime * speed_precision + / MAX (t[i], speed_unittime)); + if (reps_d > 2e9 || reps_d < 1.0) + { + fprintf (stderr, "Fatal error: new reps bad: %.2f\n", reps_d); + fprintf (stderr, " (old reps %u, unittime %.4g, precision %d, t[i] %.4g)\n", + s->reps, speed_unittime, speed_precision, t[i]); + abort (); + } + s->reps = (unsigned) reps_d; + } t[i] /= s->reps; t_unsorted[i] = t[i]; if (speed_precision == 0) - return t[i]; + return t[i]; /* require 3 values within TOLERANCE when >= 2 secs, 4 when below */ if (t[0] >= 2.0) - e = 3; + e = 3; else - e = 4; + e = 4; /* Look for e many t[]'s within TOLERANCE of each other to consider a - valid measurement. Return smallest among them. */ + valid measurement. Return smallest among them. */ if (i >= e) - { - qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr); - for (j = e-1; j < i; j++) - if (t[j] <= t[j-e+1] * TOLERANCE) - return t[j-e+1] / s->time_divisor; - } + { + qsort (t, i+1, sizeof(t[0]), (qsort_function_t) double_cmp_ptr); + for (j = e-1; j < i; j++) + if (t[j] <= t[j-e+1] * TOLERANCE) + return t[j-e+1] / s->time_divisor; + } } fprintf (stderr, "speed_measure() could not get %d results within %.1f%%\n", - e, (TOLERANCE-1.0)*100.0); + e, (TOLERANCE-1.0)*100.0); fprintf (stderr, " unsorted sorted\n"); fprintf (stderr, " %.12f %.12f is about 0.5%%\n", - t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0)); + t_unsorted[0]*(TOLERANCE-1.0), t[0]*(TOLERANCE-1.0)); for (i = 0; i < numberof (t); i++) fprintf (stderr, " %.09f %.09f\n", t_unsorted[i], t[i]); @@ -318,30 +305,30 @@ speed_cache_fill (struct speed_params *s) different = (s->dst_num != prev.dst_num || s->src_num != prev.src_num); for (i = 0; i < s->dst_num; i++) - different |= (s->dst[i].ptr != prev.dst[i].ptr); + different |= (s->dst[i].ptr != prev.dst[i].ptr); for (i = 0; i < s->src_num; i++) - different |= (s->src[i].ptr != prev.src[i].ptr); + different |= (s->src[i].ptr != prev.src[i].ptr); if (different) - { - if (s->dst_num != 0) - { - printf ("dst"); - for (i = 0; i < s->dst_num; i++) - printf (" %08lX", (unsigned long) s->dst[i].ptr); - printf (" "); - } - - if (s->src_num != 0) - { - printf ("src"); - for (i = 0; i < s->src_num; i++) - printf (" %08lX", (unsigned long) s->src[i].ptr); - printf (" "); - } - printf (" (cf sp approx %08lX)\n", (unsigned long) &different); - - } + { + if (s->dst_num != 0) + { + printf ("dst"); + for (i = 0; i < s->dst_num; i++) + printf (" %08lX", (unsigned long) s->dst[i].ptr); + printf (" "); + } + + if (s->src_num != 0) + { + printf ("src"); + for (i = 0; i < s->src_num; i++) + printf (" %08lX", (unsigned long) s->src[i].ptr); + printf (" "); + } + printf (" (cf sp approx %08lX)\n", (unsigned long) &different); + + } memcpy (&prev, s, sizeof(prev)); } @@ -360,7 +347,7 @@ speed_cache_fill (struct speed_params *s) } -/* Miscellaneous options accepted by tune and speed programs under -o. */ +/* Miscellanous options accepted by tune and speed programs under -o. */ void speed_option_set (const char *s) @@ -379,10 +366,6 @@ speed_option_set (const char *s) { speed_option_verbose = n; } - else if (strcmp (s, "cycles-broken") == 0) - { - speed_option_cycles_broken = 1; - } else { printf ("Unrecognised -o option: %s\n", s); @@ -419,10 +402,10 @@ speed_option_set (const char *s) code on most CPUs, thereby minimizing overhead in the measurement. It can always be assumed s->reps >= 1. - i = s->reps - do - foo(); - while (--i != 0); + i = s->reps + do + foo(); + while (--i != 0); Additional parameters might be added to "struct speed_params" in the future. Routines should ignore anything they don't use. @@ -469,14 +452,9 @@ speed_memcpy (struct speed_params *s) SPEED_ROUTINE_MPN_COPY_BYTES (memcpy); } double -speed_mpn_com (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_com); -} -double -speed_mpn_sec_tabselect (struct speed_params *s) +speed_mpn_com_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_TABSELECT (mpn_sec_tabselect); + SPEED_ROUTINE_MPN_COPY (mpn_com_n); } @@ -573,20 +551,6 @@ speed_mpn_mul_4 (struct speed_params *s) SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4); } #endif -#if HAVE_NATIVE_mpn_mul_5 -double -speed_mpn_mul_5 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5); -} -#endif -#if HAVE_NATIVE_mpn_mul_6 -double -speed_mpn_mul_6 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6); -} -#endif double @@ -595,11 +559,6 @@ speed_mpn_lshift (struct speed_params *s) SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshift); } double -speed_mpn_lshiftc (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_UNARY_1 (mpn_lshiftc); -} -double speed_mpn_rshift (struct speed_params *s) { SPEED_ROUTINE_MPN_UNARY_1 (mpn_rshift); @@ -699,39 +658,6 @@ speed_mpn_divrem_2_inv (struct speed_params *s) } double -speed_mpn_div_qr_1n_pi1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1); -} -double -speed_mpn_div_qr_1n_pi1_1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_1); -} -double -speed_mpn_div_qr_1n_pi1_2 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_1N_PI1 (mpn_div_qr_1n_pi1_2); -} - -double -speed_mpn_div_qr_1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1); -} - -double -speed_mpn_div_qr_2n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1); -} -double -speed_mpn_div_qr_2u (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0); -} - -double speed_mpn_mod_1 (struct speed_params *s) { SPEED_ROUTINE_MPN_MOD_1 (mpn_mod_1); @@ -748,36 +674,6 @@ speed_mpn_preinv_mod_1 (struct speed_params *s) { SPEED_ROUTINE_MPN_PREINV_MOD_1 (mpn_preinv_mod_1); } -double -speed_mpn_mod_1_1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps); -} -double -speed_mpn_mod_1_1_1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1); -} -double -speed_mpn_mod_1_1_2 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2); -} -double -speed_mpn_mod_1_2 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2); -} -double -speed_mpn_mod_1_3 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_3p,mpn_mod_1s_3p_cps,3); -} -double -speed_mpn_mod_1_4 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_4p,mpn_mod_1s_4p_cps,4); -} double speed_mpn_divexact_1 (struct speed_params *s) @@ -797,18 +693,6 @@ speed_mpn_bdiv_dbm1c (struct speed_params *s) SPEED_ROUTINE_MPN_BDIV_DBM1C (mpn_bdiv_dbm1c); } -double -speed_mpn_bdiv_q_1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BDIV_Q_1 (mpn_bdiv_q_1); -} - -double -speed_mpn_pi1_bdiv_q_1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_PI1_BDIV_Q_1 (mpn_pi1_bdiv_q_1); -} - #if HAVE_NATIVE_mpn_modexact_1_odd double speed_mpn_modexact_1_odd (struct speed_params *s) @@ -823,145 +707,59 @@ speed_mpn_modexact_1c_odd (struct speed_params *s) SPEED_ROUTINE_MPN_MODEXACT_1C_ODD (mpn_modexact_1c_odd); } -double -speed_mpz_mod (struct speed_params *s) -{ - SPEED_ROUTINE_MPZ_MOD (mpz_mod); -} double -speed_mpn_sbpi1_div_qr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_div_qr, inv.inv32, 2,0); -} -double -speed_mpn_dcpi1_div_qr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_div_qr, &inv, 6,3); -} -double -speed_mpn_sbpi1_divappr_q (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_PI1_DIV (mpn_sbpi1_divappr_q, inv.inv32, 2,0); -} -double -speed_mpn_dcpi1_divappr_q (struct speed_params *s) +speed_mpn_dc_tdiv_qr (struct speed_params *s) { - SPEED_ROUTINE_MPN_PI1_DIV (mpn_dcpi1_divappr_q, &inv, 6,3); + SPEED_ROUTINE_MPN_DC_TDIV_QR (mpn_tdiv_qr); } double -speed_mpn_mu_div_qr (struct speed_params *s) +speed_mpn_dc_divrem_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_MU_DIV_QR (mpn_mu_div_qr, mpn_mu_div_qr_itch); + SPEED_ROUTINE_MPN_DC_DIVREM_N (mpn_dc_divrem_n); } double -speed_mpn_mu_divappr_q (struct speed_params *s) +speed_mpn_dc_divrem_sb (struct speed_params *s) { - SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_divappr_q, mpn_mu_divappr_q_itch); + SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn); } double -speed_mpn_mu_div_q (struct speed_params *s) +speed_mpn_dc_divrem_sb_div (struct speed_params *s) { - SPEED_ROUTINE_MPN_MU_DIV_Q (mpn_mu_div_q, mpn_mu_div_q_itch); + SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_div); } double -speed_mpn_mupi_div_qr (struct speed_params *s) +speed_mpn_dc_divrem_sb_inv (struct speed_params *s) { - SPEED_ROUTINE_MPN_MUPI_DIV_QR (mpn_preinv_mu_div_qr, mpn_preinv_mu_div_qr_itch); + SPEED_ROUTINE_MPN_DC_DIVREM_SB (mpn_sb_divrem_mn_inv); } double -speed_mpn_sbpi1_bdiv_qr (struct speed_params *s) +speed_mpn_sb_divrem_m3 (struct speed_params *s) { - SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_sbpi1_bdiv_qr); + SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn); } double -speed_mpn_dcpi1_bdiv_qr (struct speed_params *s) +speed_mpn_sb_divrem_m3_div (struct speed_params *s) { - SPEED_ROUTINE_MPN_PI1_BDIV_QR (mpn_dcpi1_bdiv_qr); + SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_div); } double -speed_mpn_sbpi1_bdiv_q (struct speed_params *s) +speed_mpn_sb_divrem_m3_inv (struct speed_params *s) { - SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_sbpi1_bdiv_q); -} -double -speed_mpn_dcpi1_bdiv_q (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_PI1_BDIV_Q (mpn_dcpi1_bdiv_q); -} -double -speed_mpn_mu_bdiv_q (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MU_BDIV_Q (mpn_mu_bdiv_q, mpn_mu_bdiv_q_itch); -} -double -speed_mpn_mu_bdiv_qr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch); -} - -double -speed_mpn_broot (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BROOT (mpn_broot); -} -double -speed_mpn_broot_invm1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1); -} -double -speed_mpn_brootinv (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size); + SPEED_ROUTINE_MPN_SB_DIVREM_M3 (mpn_sb_divrem_mn_inv); } double -speed_mpn_binvert (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINVERT (mpn_binvert, mpn_binvert_itch); -} - -double -speed_mpn_invert (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_INVERT (mpn_invert, mpn_invert_itch); -} - -double -speed_mpn_invertappr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_INVERTAPPR (mpn_invertappr, mpn_invertappr_itch); -} - -double -speed_mpn_ni_invertappr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_INVERTAPPR (mpn_ni_invertappr, mpn_invertappr_itch); -} - -double -speed_mpn_sec_invert (struct speed_params *s) +speed_mpz_mod (struct speed_params *s) { - SPEED_ROUTINE_MPN_SEC_INVERT (mpn_sec_invert, mpn_sec_invert_itch); + SPEED_ROUTINE_MPZ_MOD (mpz_mod); } - double speed_mpn_redc_1 (struct speed_params *s) { SPEED_ROUTINE_REDC_1 (mpn_redc_1); } -double -speed_mpn_redc_2 (struct speed_params *s) -{ - SPEED_ROUTINE_REDC_2 (mpn_redc_2); -} -double -speed_mpn_redc_n (struct speed_params *s) -{ - SPEED_ROUTINE_REDC_N (mpn_redc_n); -} double @@ -987,172 +785,28 @@ speed_mpn_sub_n (struct speed_params *s) SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n); } +#if HAVE_NATIVE_mpn_addsub_n double -speed_mpn_add_err1_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n); -} -double -speed_mpn_sub_err1_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n); -} -double -speed_mpn_add_err2_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n); -} -double -speed_mpn_sub_err2_n (struct speed_params *s) +speed_mpn_addsub_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n); -} -double -speed_mpn_add_err3_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n); -} -double -speed_mpn_sub_err3_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n); -} - - -#if HAVE_NATIVE_mpn_add_n_sub_n -double -speed_mpn_add_n_sub_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_add_n_sub_n (ap, sp, s->xp, s->yp, s->size)); + SPEED_ROUTINE_MPN_ADDSUB_N_CALL (mpn_addsub_n (ap, sp, s->xp, s->yp, s->size)); } #endif -#if HAVE_NATIVE_mpn_addlsh1_n == 1 +#if HAVE_NATIVE_mpn_addlsh1_n double speed_mpn_addlsh1_n (struct speed_params *s) { SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh1_n); } #endif -#if HAVE_NATIVE_mpn_sublsh1_n == 1 +#if HAVE_NATIVE_mpn_sublsh1_n double speed_mpn_sublsh1_n (struct speed_params *s) { SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n); } #endif -#if HAVE_NATIVE_mpn_addlsh1_n_ip1 -double -speed_mpn_addlsh1_n_ip1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1); -} -#endif -#if HAVE_NATIVE_mpn_addlsh1_n_ip2 -double -speed_mpn_addlsh1_n_ip2 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2); -} -#endif -#if HAVE_NATIVE_mpn_sublsh1_n_ip1 -double -speed_mpn_sublsh1_n_ip1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1); -} -#endif -#if HAVE_NATIVE_mpn_rsblsh1_n == 1 -double -speed_mpn_rsblsh1_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh1_n); -} -#endif -#if HAVE_NATIVE_mpn_addlsh2_n == 1 -double -speed_mpn_addlsh2_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N (mpn_addlsh2_n); -} -#endif -#if HAVE_NATIVE_mpn_sublsh2_n == 1 -double -speed_mpn_sublsh2_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n); -} -#endif -#if HAVE_NATIVE_mpn_addlsh2_n_ip1 -double -speed_mpn_addlsh2_n_ip1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1); -} -#endif -#if HAVE_NATIVE_mpn_addlsh2_n_ip2 -double -speed_mpn_addlsh2_n_ip2 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2); -} -#endif -#if HAVE_NATIVE_mpn_sublsh2_n_ip1 -double -speed_mpn_sublsh2_n_ip1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1); -} -#endif -#if HAVE_NATIVE_mpn_rsblsh2_n == 1 -double -speed_mpn_rsblsh2_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n); -} -#endif -#if HAVE_NATIVE_mpn_addlsh_n -double -speed_mpn_addlsh_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7)); -} -#endif -#if HAVE_NATIVE_mpn_sublsh_n -double -speed_mpn_sublsh_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7)); -} -#endif -#if HAVE_NATIVE_mpn_addlsh_n_ip1 -double -speed_mpn_addlsh_n_ip1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7)); -} -#endif -#if HAVE_NATIVE_mpn_addlsh_n_ip2 -double -speed_mpn_addlsh_n_ip2 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7)); -} -#endif -#if HAVE_NATIVE_mpn_sublsh_n_ip1 -double -speed_mpn_sublsh_n_ip1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7)); -} -#endif -#if HAVE_NATIVE_mpn_rsblsh_n -double -speed_mpn_rsblsh_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7)); -} -#endif #if HAVE_NATIVE_mpn_rsh1add_n double speed_mpn_rsh1add_n (struct speed_params *s) @@ -1168,58 +822,47 @@ speed_mpn_rsh1sub_n (struct speed_params *s) } #endif -double -speed_mpn_cnd_add_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_add_n (1, wp, xp, yp, s->size)); -} -double -speed_mpn_cnd_sub_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_cnd_sub_n (1, wp, xp, yp, s->size)); -} - /* mpn_and_n etc can be macros and so have to be handled with SPEED_ROUTINE_MPN_BINARY_N_CALL forms */ double speed_mpn_and_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size)); + SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_andn_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size)); +SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_nand_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size)); + SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_ior_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size)); +SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_iorn_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size)); + SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_nior_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size)); + SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_xor_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size)); + SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size)); } double speed_mpn_xnor_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size)); + SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size)); } @@ -1229,9 +872,9 @@ speed_mpn_mul_n (struct speed_params *s) SPEED_ROUTINE_MPN_MUL_N (mpn_mul_n); } double -speed_mpn_sqr (struct speed_params *s) +speed_mpn_sqr_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_SQR (mpn_sqr); + SPEED_ROUTINE_MPN_SQR (mpn_sqr_n); } double speed_mpn_mul_n_sqr (struct speed_params *s) @@ -1264,141 +907,29 @@ speed_mpn_sqr_diagonal (struct speed_params *s) } #endif -#if HAVE_NATIVE_mpn_sqr_diag_addlsh1 -double -speed_mpn_sqr_diag_addlsh1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size)); -} -#endif - -double -speed_mpn_toom2_sqr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM2_SQR (mpn_toom2_sqr); -} -double -speed_mpn_toom3_sqr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM3_SQR (mpn_toom3_sqr); -} -double -speed_mpn_toom4_sqr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM4_SQR (mpn_toom4_sqr); -} -double -speed_mpn_toom6_sqr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM6_SQR (mpn_toom6_sqr); -} -double -speed_mpn_toom8_sqr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM8_SQR (mpn_toom8_sqr); -} -double -speed_mpn_toom22_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM22_MUL_N (mpn_toom22_mul); -} -double -speed_mpn_toom33_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM33_MUL_N (mpn_toom33_mul); -} double -speed_mpn_toom44_mul (struct speed_params *s) +speed_mpn_kara_mul_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM44_MUL_N (mpn_toom44_mul); + SPEED_ROUTINE_MPN_KARA_MUL_N (mpn_kara_mul_n); } double -speed_mpn_toom6h_mul (struct speed_params *s) +speed_mpn_kara_sqr_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM6H_MUL_N (mpn_toom6h_mul); -} -double -speed_mpn_toom8h_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM8H_MUL_N (mpn_toom8h_mul); + SPEED_ROUTINE_MPN_KARA_SQR_N (mpn_kara_sqr_n); } double -speed_mpn_toom32_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM32_MUL (mpn_toom32_mul); -} -double -speed_mpn_toom42_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM42_MUL (mpn_toom42_mul); -} -double -speed_mpn_toom43_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM43_MUL (mpn_toom43_mul); -} -double -speed_mpn_toom63_mul (struct speed_params *s) +speed_mpn_toom3_mul_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM63_MUL (mpn_toom63_mul); + SPEED_ROUTINE_MPN_TOOM3_MUL_N (mpn_toom3_mul_n); } double -speed_mpn_toom32_for_toom43_mul (struct speed_params *s) +speed_mpn_toom3_sqr_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL (mpn_toom32_mul); -} -double -speed_mpn_toom43_for_toom32_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL (mpn_toom43_mul); -} -double -speed_mpn_toom32_for_toom53_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL (mpn_toom32_mul); -} -double -speed_mpn_toom53_for_toom32_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL (mpn_toom53_mul); -} -double -speed_mpn_toom42_for_toom53_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL (mpn_toom42_mul); -} -double -speed_mpn_toom53_for_toom42_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul); -} -double -speed_mpn_toom43_for_toom54_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul); -} -double -speed_mpn_toom54_for_toom43_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul); + SPEED_ROUTINE_MPN_TOOM3_SQR_N (mpn_toom3_sqr_n); } double -speed_mpn_nussbaumer_mul (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MUL_N_CALL - (mpn_nussbaumer_mul (wp, s->xp, s->size, s->yp, s->size)); -} -double -speed_mpn_nussbaumer_mul_sqr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_SQR_CALL - (mpn_nussbaumer_mul (wp, s->xp, s->size, s->xp, s->size)); -} - -#if WANT_OLD_FFT_FULL -double speed_mpn_mul_fft_full (struct speed_params *s) { SPEED_ROUTINE_MPN_MUL_N_CALL @@ -1410,7 +941,7 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s) SPEED_ROUTINE_MPN_SQR_CALL (mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size)); } -#endif + /* These are mod 2^N+1 multiplies and squares. If s->r is supplied it's used as k, otherwise the best k for the size is used. If s->size isn't a @@ -1424,31 +955,31 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s) unsigned i; \ double t; \ TMP_DECL; \ - \ + \ SPEED_RESTRICT_COND (s->size >= 1); \ - \ + \ if (s->r != 0) \ k = s->r; \ else \ k = mpn_fft_best_k (s->size, sqr); \ - \ + \ TMP_MARK; \ pl = mpn_fft_next_size (s->size, k); \ SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp); \ - \ + \ speed_operand_src (s, s->xp, s->size); \ if (!sqr) \ speed_operand_src (s, s->yp, s->size); \ speed_operand_dst (s, wp, pl+1); \ speed_cache_fill (s); \ - \ + \ speed_starttime (); \ i = s->reps; \ do \ call; \ while (--i != 0); \ t = speed_endtime (); \ - \ + \ TMP_FREE; \ return t; \ } @@ -1468,174 +999,179 @@ speed_mpn_mul_fft_sqr (struct speed_params *s) } double -speed_mpn_fft_mul (struct speed_params *s) +speed_mpn_mullow_n (struct speed_params *s) { - SPEED_ROUTINE_MPN_MUL_N_CALL (mpn_fft_mul (wp, s->xp, s->size, s->yp, s->size)); + SPEED_ROUTINE_MPN_MULLOW_N (mpn_mullow_n); } - double -speed_mpn_fft_sqr (struct speed_params *s) +speed_mpn_mullow_basecase (struct speed_params *s) { - SPEED_ROUTINE_MPN_SQR_CALL (mpn_fft_mul (wp, s->xp, s->size, s->xp, s->size)); + SPEED_ROUTINE_MPN_MULLOW_BASECASE (mpn_mullow_basecase); } double -speed_mpn_mullo_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULLO_N (mpn_mullo_n); -} -double -speed_mpn_mullo_basecase (struct speed_params *s) +speed_mpn_matrix22_mul (struct speed_params *s) { - SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase); -} + /* Speed params only includes 2 inputs, so we have to invent the + other 6. */ -double -speed_mpn_mulmid_basecase (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase); -} + mp_ptr a1, a2, a3; + mp_ptr r0, r1, r2, r3; + mp_ptr b1, b2, b3; + mp_ptr tp; + mp_size_t scratch; + unsigned i; + double t; + TMP_DECL; -double -speed_mpn_mulmid (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMID (mpn_mulmid); -} + TMP_MARK; + SPEED_TMP_ALLOC_LIMBS (a1, s->size, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (a2, s->size, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (a3, s->size, s->align_xp); -double -speed_mpn_mulmid_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n); -} + SPEED_TMP_ALLOC_LIMBS (b1, s->size, s->align_yp); + SPEED_TMP_ALLOC_LIMBS (b2, s->size, s->align_yp); + SPEED_TMP_ALLOC_LIMBS (b3, s->size, s->align_yp); -double -speed_mpn_toom42_mulmid (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid); -} + SPEED_TMP_ALLOC_LIMBS (r0, 2 * s->size +1, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (r1, 2 * s->size +1, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (r2, 2 * s->size +1, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (r3, 2 * s->size +1, s->align_xp); -double -speed_mpn_mulmod_bnm1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_mulmod_bnm1 (wp, s->size, s->xp, s->size, s->yp, s->size, tp)); -} + mpn_random (a1, s->size); + mpn_random (a2, s->size); + mpn_random (a3, s->size); + mpn_random (b1, s->size); + mpn_random (b2, s->size); + mpn_random (b3, s->size); -double -speed_mpn_bc_mulmod_bnm1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_bc_mulmod_bnm1 (wp, s->xp, s->yp, s->size, tp)); -} + scratch = mpn_matrix22_mul_itch (s->size, s->size); + SPEED_TMP_ALLOC_LIMBS (tp, scratch, s->align_wp); -double -speed_mpn_mulmod_bnm1_rounded (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED (mpn_mulmod_bnm1); -} - -double -speed_mpn_sqrmod_bnm1 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL (mpn_sqrmod_bnm1 (wp, s->size, s->xp, s->size, tp)); + speed_starttime (); + i = s->reps; + do + { + MPN_COPY (r0, s->xp, s->size); + MPN_COPY (r1, a1, s->size); + MPN_COPY (r2, a2, s->size); + MPN_COPY (r3, a3, s->size); + mpn_matrix22_mul (r0, r1, r2, r3, s->size, s->yp, b1, b2, b3, s->size, tp); + } + while (--i != 0); + t = speed_endtime(); + TMP_FREE; + return t; } double -speed_mpn_matrix22_mul (struct speed_params *s) +speed_mpn_hgcd (struct speed_params *s) { - /* Speed params only includes 2 inputs, so we have to invent the - other 6. */ + mp_ptr wp; + mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); + mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size); + mp_ptr ap; + mp_ptr bp; + mp_ptr tmp1, tmp2; - mp_ptr a; - mp_ptr r; - mp_ptr b; - mp_ptr tp; - mp_size_t itch; + struct hgcd_matrix hgcd; + int res; unsigned i; double t; TMP_DECL; + if (s->size < 2) + return -1; + TMP_MARK; - SPEED_TMP_ALLOC_LIMBS (a, 4 * s->size, s->align_xp); - SPEED_TMP_ALLOC_LIMBS (b, 4 * s->size, s->align_yp); - SPEED_TMP_ALLOC_LIMBS (r, 8 * s->size + 4, s->align_wp); - MPN_COPY (a, s->xp, s->size); - mpn_random (a + s->size, 3 * s->size); - MPN_COPY (b, s->yp, s->size); - mpn_random (b + s->size, 3 * s->size); + SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); - itch = mpn_matrix22_mul_itch (s->size, s->size); - SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); + s->xp[s->size - 1] |= 1; + s->yp[s->size - 1] |= 1; - speed_operand_src (s, a, 4 * s->size); - speed_operand_src (s, b, 4 * s->size); - speed_operand_dst (s, r, 8 * s->size + 4); - speed_operand_dst (s, tp, itch); - speed_cache_fill (s); + SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp); + SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp); speed_starttime (); i = s->reps; do { - mp_size_t sz = s->size; - MPN_COPY (r + 0 * sz + 0, a + 0 * sz, sz); - MPN_COPY (r + 2 * sz + 1, a + 1 * sz, sz); - MPN_COPY (r + 4 * sz + 2, a + 2 * sz, sz); - MPN_COPY (r + 6 * sz + 3, a + 3 * sz, sz); - mpn_matrix22_mul (r, r + 2 * sz + 1, r + 4 * sz + 2, r + 6 * sz + 3, sz, - b, b + 1 * sz, b + 2 * sz, b + 3 * sz, sz, - tp); + MPN_COPY (ap, s->xp, s->size); + MPN_COPY (bp, s->yp, s->size); + mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); + res = mpn_hgcd (ap, bp, s->size, &hgcd, wp); } while (--i != 0); - t = speed_endtime(); + t = speed_endtime (); TMP_FREE; return t; } double -speed_mpn_hgcd (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch); -} - -double speed_mpn_hgcd_lehmer (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch); -} + mp_ptr wp; + mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); + mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size); + mp_ptr ap; + mp_ptr bp; + mp_ptr tmp1, tmp2; -double -speed_mpn_hgcd_appr (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch); -} + struct hgcd_matrix hgcd; + int res; + unsigned i; + double t; + TMP_DECL; -double -speed_mpn_hgcd_appr_lehmer (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch); + if (s->size < 2) + return -1; + + TMP_MARK; + + SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); + SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); + + s->xp[s->size - 1] |= 1; + s->yp[s->size - 1] |= 1; + + SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp); + SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp); + + speed_starttime (); + i = s->reps; + do + { + MPN_COPY (ap, s->xp, s->size); + MPN_COPY (bp, s->yp, s->size); + mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); + res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp); + } + while (--i != 0); + t = speed_endtime (); + TMP_FREE; + return t; } double -speed_mpn_hgcd_reduce (struct speed_params *s) +speed_mpn_gcd (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch); + SPEED_ROUTINE_MPN_GCD (mpn_gcd); } +#if 0 double -speed_mpn_hgcd_reduce_1 (struct speed_params *s) +speed_mpn_gcd_binary (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch); + SPEED_ROUTINE_MPN_GCD (mpn_gcd_binary); } double -speed_mpn_hgcd_reduce_2 (struct speed_params *s) +speed_mpn_gcd_accel (struct speed_params *s) { - SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch); + SPEED_ROUTINE_MPN_GCD (mpn_gcd_accel); } +#endif -double -speed_mpn_gcd (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_GCD (mpn_gcd); -} double speed_mpn_gcdext (struct speed_params *s) @@ -1706,11 +1242,6 @@ speed_mpn_jacobi_base_3 (struct speed_params *s) { SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3); } -double -speed_mpn_jacobi_base_4 (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4); -} double @@ -1776,11 +1307,6 @@ speed_mpz_powm_redc (struct speed_params *s) SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc); } double -speed_mpz_powm_sec (struct speed_params *s) -{ - SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec); -} -double speed_mpz_powm_ui (struct speed_params *s) { SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui); @@ -1856,12 +1382,12 @@ speed_noop_wxys (struct speed_params *s) { \ unsigned i; \ variables; \ - \ + \ speed_starttime (); \ i = s->reps; \ do \ { \ - calls; \ + calls; \ } \ while (--i != 0); \ return speed_endtime (); \ @@ -1876,39 +1402,39 @@ speed_noop_wxys (struct speed_params *s) double speed_malloc_free (struct speed_params *s) { - size_t bytes = s->size * GMP_LIMB_BYTES; + size_t bytes = s->size * BYTES_PER_MP_LIMB; SPEED_ROUTINE_ALLOC_FREE (void *p, - p = malloc (bytes); - free (p)); + p = malloc (bytes); + free (p)); } double speed_malloc_realloc_free (struct speed_params *s) { - size_t bytes = s->size * GMP_LIMB_BYTES; + size_t bytes = s->size * BYTES_PER_MP_LIMB; SPEED_ROUTINE_ALLOC_FREE (void *p, - p = malloc (GMP_LIMB_BYTES); - p = realloc (p, bytes); - free (p)); + p = malloc (BYTES_PER_MP_LIMB); + p = realloc (p, bytes); + free (p)); } double speed_gmp_allocate_free (struct speed_params *s) { - size_t bytes = s->size * GMP_LIMB_BYTES; + size_t bytes = s->size * BYTES_PER_MP_LIMB; SPEED_ROUTINE_ALLOC_FREE (void *p, - p = (*__gmp_allocate_func) (bytes); - (*__gmp_free_func) (p, bytes)); + p = (*__gmp_allocate_func) (bytes); + (*__gmp_free_func) (p, bytes)); } double speed_gmp_allocate_reallocate_free (struct speed_params *s) { - size_t bytes = s->size * GMP_LIMB_BYTES; + size_t bytes = s->size * BYTES_PER_MP_LIMB; SPEED_ROUTINE_ALLOC_FREE (void *p, - p = (*__gmp_allocate_func) (GMP_LIMB_BYTES); - p = (*__gmp_reallocate_func) (p, bytes, GMP_LIMB_BYTES); + p = (*__gmp_allocate_func) (BYTES_PER_MP_LIMB); + p = (*__gmp_reallocate_func) (p, bytes, BYTES_PER_MP_LIMB); (*__gmp_free_func) (p, bytes)); } @@ -1916,38 +1442,38 @@ double speed_mpz_init_clear (struct speed_params *s) { SPEED_ROUTINE_ALLOC_FREE (mpz_t z, - mpz_init (z); - mpz_clear (z)); + mpz_init (z); + mpz_clear (z)); } double speed_mpz_init_realloc_clear (struct speed_params *s) { SPEED_ROUTINE_ALLOC_FREE (mpz_t z, - mpz_init (z); - _mpz_realloc (z, s->size); - mpz_clear (z)); + mpz_init (z); + _mpz_realloc (z, s->size); + mpz_clear (z)); } double speed_mpq_init_clear (struct speed_params *s) { SPEED_ROUTINE_ALLOC_FREE (mpq_t q, - mpq_init (q); - mpq_clear (q)); + mpq_init (q); + mpq_clear (q)); } double speed_mpf_init_clear (struct speed_params *s) { SPEED_ROUTINE_ALLOC_FREE (mpf_t f, - mpf_init (f); - mpf_clear (f)); + mpf_init (f); + mpf_clear (f)); } /* Compare this to mpn_add_n to see how much overhead mpz_add adds. Note - that repeatedly calling mpz_add with the same data gives branch prediction + that repeatedly calling mpz_add with the same data gives branch predition in it an advantage. */ double @@ -2011,40 +1537,6 @@ speed_mpz_bin_uiui (struct speed_params *s) return t; } -/* If r==0, calculate binomial(2^size,size), - otherwise calculate binomial(2^size,r). */ - -double -speed_mpz_bin_ui (struct speed_params *s) -{ - mpz_t w, x; - unsigned long k; - unsigned i; - double t; - - mpz_init (w); - mpz_init_set_ui (x, 0); - - mpz_setbit (x, s->size); - - if (s->r != 0) - k = s->r; - else - k = s->size; - - speed_starttime (); - i = s->reps; - do - { - mpz_bin_ui (w, x, k); - } - while (--i != 0); - t = speed_endtime (); - - mpz_clear (w); - mpz_clear (x); - return t; -} /* The multiplies are successively dependent so the latency is measured, not the issue rate. There's only 10 per loop so the code doesn't get too big @@ -2072,41 +1564,41 @@ speed_mpz_bin_ui (struct speed_params *s) mp_limb_t h, l; \ unsigned i; \ double t; \ - \ + \ s->time_divisor = 10; \ - \ + \ h = s->xp[0]; \ l = s->yp[0]; \ - \ + \ if (s->r == 1) \ { \ - speed_starttime (); \ - i = s->reps; \ - do \ - { + speed_starttime (); \ + i = s->reps; \ + do \ + { #define SPEED_MACRO_UMUL_PPMM_B \ - } \ - while (--i != 0); \ - t = speed_endtime (); \ + } \ + while (--i != 0); \ + t = speed_endtime (); \ } \ else \ { \ - speed_starttime (); \ - i = s->reps; \ - do \ - { + speed_starttime (); \ + i = s->reps; \ + do \ + { #define SPEED_MACRO_UMUL_PPMM_C \ - } \ - while (--i != 0); \ - t = speed_endtime (); \ + } \ + while (--i != 0); \ + t = speed_endtime (); \ } \ - \ + \ /* stop the compiler optimizing away the whole calculation! */ \ noop_1 (h); \ noop_1 (l); \ - \ + \ return t; \ } @@ -2242,25 +1734,25 @@ speed_mpn_umul_ppmm_r (struct speed_params *s) unsigned i; \ mp_limb_t q, r, d; \ mp_limb_t dinv; \ - \ + \ s->time_divisor = 10; \ - \ + \ /* divisor from "r" parameter, or a default */ \ d = s->r; \ if (d == 0) \ - d = mp_bases[10].big_base; \ - \ + d = __mp_bases[10].big_base; \ + \ if (normalize) \ { \ - unsigned norm; \ - count_leading_zeros (norm, d); \ - d <<= norm; \ - invert_limb (dinv, d); \ + unsigned norm; \ + count_leading_zeros (norm, d); \ + d <<= norm; \ + invert_limb (dinv, d); \ } \ - \ + \ q = s->xp[0]; \ r = s->yp[0] % d; \ - \ + \ speed_starttime (); \ i = s->reps; \ do \ @@ -2270,11 +1762,11 @@ speed_mpn_umul_ppmm_r (struct speed_params *s) } \ while (--i != 0); \ t = speed_endtime (); \ - \ + \ /* stop the compiler optimizing away the whole calculation! */ \ noop_1 (q); \ noop_1 (r); \ - \ + \ return t; \ } @@ -2298,6 +1790,44 @@ speed_udiv_qrnnd (struct speed_params *s) } double +speed_udiv_qrnnd_preinv1 (struct speed_params *s) +{ + SPEED_ROUTINE_UDIV_QRNND_A (1); + { + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv1 (q, r, r, q, d, dinv); + } + SPEED_ROUTINE_UDIV_QRNND_B; +} + +double +speed_udiv_qrnnd_preinv2 (struct speed_params *s) +{ + SPEED_ROUTINE_UDIV_QRNND_A (1); + { + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + udiv_qrnnd_preinv2 (q, r, r, q, d, dinv); + } + SPEED_ROUTINE_UDIV_QRNND_B; +} + +double speed_udiv_qrnnd_c (struct speed_params *s) { SPEED_ROUTINE_UDIV_QRNND_A (1); @@ -2380,7 +1910,7 @@ speed_operator_div (struct speed_params *s) /* divisor from "r" parameter, or a default */ d = s->r; if (d == 0) - d = mp_bases[10].big_base; + d = __mp_bases[10].big_base; x = s->xp[0]; q = 0; @@ -2421,7 +1951,7 @@ speed_operator_mod (struct speed_params *s) /* divisor from "r" parameter, or a default */ d = s->r; if (d == 0) - d = mp_bases[10].big_base; + d = __mp_bases[10].big_base; x = s->xp[0]; r = 0; @@ -2455,12 +1985,12 @@ speed_operator_mod (struct speed_params *s) be typical for count_trailing_zeros in a GCD etc. r==1 measures on data with the resultant count uniformly distributed - between 0 and GMP_LIMB_BITS-1. This is probably sensible for + between 0 and BITS_PER_MP_LIMB-1. This is probably sensible for count_leading_zeros on the high limbs of divisors. */ int speed_routine_count_zeros_setup (struct speed_params *s, - mp_ptr xp, int leading, int zero) + mp_ptr xp, int leading, int zero) { int i, c; mp_limb_t n; @@ -2468,26 +1998,26 @@ speed_routine_count_zeros_setup (struct speed_params *s, if (s->r == 0) { /* Make uniformly distributed data. If zero isn't allowed then change - it to 1 for leading, or 0x800..00 for trailing. */ + it to 1 for leading, or 0x800..00 for trailing. */ MPN_COPY (xp, s->xp_block, SPEED_BLOCK_SIZE); if (! zero) - for (i = 0; i < SPEED_BLOCK_SIZE; i++) - if (xp[i] == 0) - xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT; + for (i = 0; i < SPEED_BLOCK_SIZE; i++) + if (xp[i] == 0) + xp[i] = leading ? 1 : GMP_LIMB_HIGHBIT; } else if (s->r == 1) { /* Make counts uniformly distributed. A randomly chosen bit is set, and - for leading the rest above it are cleared, or for trailing then the - rest below. */ + for leading the rest above it are cleared, or for trailing then the + rest below. */ for (i = 0; i < SPEED_BLOCK_SIZE; i++) - { - mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % GMP_LIMB_BITS); - mp_limb_t keep_below = set-1; - mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below; - mp_limb_t keep = (leading ? keep_below : keep_above); - xp[i] = (s->xp_block[i] & keep) | set; - } + { + mp_limb_t set = CNST_LIMB(1) << (s->yp_block[i] % BITS_PER_MP_LIMB); + mp_limb_t keep_below = set-1; + mp_limb_t keep_above = MP_LIMB_T_MAX ^ keep_below; + mp_limb_t keep = (leading ? keep_below : keep_above); + xp[i] = (s->xp_block[i] & keep) | set; + } } else { @@ -2502,9 +2032,9 @@ speed_routine_count_zeros_setup (struct speed_params *s, xp[i] ^= c; if (leading) - count_leading_zeros (c, n); + count_leading_zeros (c, n); else - count_trailing_zeros (c, n); + count_trailing_zeros (c, n); } return 1; @@ -2620,7 +2150,7 @@ speed_gmp_randseed_ui (struct speed_params *s) gmp_randseed_ui (rstate, (unsigned long) s->xp_block[j]); j++; if (j >= SPEED_BLOCK_SIZE) - j = 0; + j = 0; } while (--i != 0); t = speed_endtime (); diff --git a/gmp/tune/div_qr_1_tune.c b/gmp/tune/div_qr_1_tune.c deleted file mode 100644 index 7e928dcce9..0000000000 --- a/gmp/tune/div_qr_1_tune.c +++ /dev/null @@ -1,47 +0,0 @@ -/* mpn/generic/div_qr_1, using tuned threshold and method. - -Copyright 2013 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define TUNE_PROGRAM_BUILD 1 - -#include "gmp.h" -#include "gmp-impl.h" - -mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); -mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); - -#if !HAVE_NATIVE_mpn_div_qr_1n_pi1 -#define __gmpn_div_qr_1n_pi1 \ - (div_qr_1n_pi1_method == 1 ? mpn_div_qr_1n_pi1_1 : mpn_div_qr_1n_pi1_2) -#endif - -#undef mpn_div_qr_1 -#define mpn_div_qr_1 mpn_div_qr_1_tune - -#include "mpn/generic/div_qr_1.c" diff --git a/gmp/tune/div_qr_1n_pi1_1.c b/gmp/tune/div_qr_1n_pi1_1.c deleted file mode 100644 index 6dd8ceb438..0000000000 --- a/gmp/tune/div_qr_1n_pi1_1.c +++ /dev/null @@ -1,39 +0,0 @@ -/* mpn/generic/div_qr_1n_pi1.c method 1. - -Copyright 2013 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef DIV_QR_1N_METHOD -#define DIV_QR_1N_METHOD 1 -#undef mpn_div_qr_1n_pi1 -#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_1 - -#include "mpn/generic/div_qr_1n_pi1.c" diff --git a/gmp/tune/div_qr_1n_pi1_2.c b/gmp/tune/div_qr_1n_pi1_2.c deleted file mode 100644 index acc80d4695..0000000000 --- a/gmp/tune/div_qr_1n_pi1_2.c +++ /dev/null @@ -1,39 +0,0 @@ -/* mpn/generic/div_qr_1n_pi1.c method 2. - -Copyright 2013 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef DIV_QR_1N_METHOD -#define DIV_QR_1N_METHOD 2 -#undef mpn_div_qr_1n_pi1 -#define mpn_div_qr_1n_pi1 mpn_div_qr_1n_pi1_2 - -#include "mpn/generic/div_qr_1n_pi1.c" diff --git a/gmp/tune/divrem1div.c b/gmp/tune/divrem1div.c index b680f9d222..5580f80578 100644 --- a/gmp/tune/divrem1div.c +++ b/gmp/tune/divrem1div.c @@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define OPERATION_divrem_1 diff --git a/gmp/tune/divrem1inv.c b/gmp/tune/divrem1inv.c index 598c03c739..73ed57f411 100644 --- a/gmp/tune/divrem1inv.c +++ b/gmp/tune/divrem1inv.c @@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define OPERATION_divrem_1 diff --git a/gmp/tune/divrem2div.c b/gmp/tune/divrem2div.c index cd7f3f5a88..10b50e2f83 100644 --- a/gmp/tune/divrem2div.c +++ b/gmp/tune/divrem2div.c @@ -6,28 +6,17 @@ Copyright 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/divrem2inv.c b/gmp/tune/divrem2inv.c index bd7c4268f7..05644b2560 100644 --- a/gmp/tune/divrem2inv.c +++ b/gmp/tune/divrem2inv.c @@ -6,28 +6,17 @@ Copyright 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/freq.c b/gmp/tune/freq.c index 210f42564e..f1092e2640 100644 --- a/gmp/tune/freq.c +++ b/gmp/tune/freq.c @@ -1,32 +1,21 @@ /* CPU frequency determination. -Copyright 1999-2004 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ /* Currently we don't get a CPU frequency on the following systems, @@ -470,7 +459,7 @@ freq_sunos_sysinfo (int help) /* "/etc/hw -r cpu" for SCO OpenUnix 8, printing a line like - The speed of the CPU is approximately 450MHz + The speed of the CPU is approximately 450Mhz */ static int freq_sco_etchw (int help) @@ -491,7 +480,7 @@ freq_sco_etchw (int help) while (fgets (buf, sizeof (buf), fp) != NULL) { end = 0; - if (sscanf (buf, " The speed of the CPU is approximately %lfMHz%n", + if (sscanf (buf, " The speed of the CPU is approximately %lfMhz%n", &val, &end) == 1 && end != 0) { speed_cycletime = 1e-6 / val; diff --git a/gmp/tune/gcdext_double.c b/gmp/tune/gcdext_double.c index c72f07ea9f..5470f1aff5 100644 --- a/gmp/tune/gcdext_double.c +++ b/gmp/tune/gcdext_double.c @@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/gcdext_single.c b/gmp/tune/gcdext_single.c index 292e9e87e0..1bc47e786e 100644 --- a/gmp/tune/gcdext_single.c +++ b/gmp/tune/gcdext_single.c @@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/gcdextod.c b/gmp/tune/gcdextod.c index c08087d480..957864c5e9 100644 --- a/gmp/tune/gcdextod.c +++ b/gmp/tune/gcdextod.c @@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/gcdextos.c b/gmp/tune/gcdextos.c index fb8af29279..afde776f7d 100644 --- a/gmp/tune/gcdextos.c +++ b/gmp/tune/gcdextos.c @@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/hgcd_appr_lehmer.c b/gmp/tune/hgcd_appr_lehmer.c deleted file mode 100644 index 790e61e3cb..0000000000 --- a/gmp/tune/hgcd_appr_lehmer.c +++ /dev/null @@ -1,40 +0,0 @@ -/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */ - -/* -Copyright 2010, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef HGCD_APPR_THRESHOLD -#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX -#define __gmpn_hgcd_appr mpn_hgcd_appr_lehmer -#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch - -#include "../mpn/generic/hgcd_appr.c" diff --git a/gmp/tune/hgcd_lehmer.c b/gmp/tune/hgcd_lehmer.c deleted file mode 100644 index 11d0ef8821..0000000000 --- a/gmp/tune/hgcd_lehmer.c +++ /dev/null @@ -1,40 +0,0 @@ -/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */ - -/* -Copyright 2010 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef HGCD_THRESHOLD -#define HGCD_THRESHOLD MP_SIZE_T_MAX -#define __gmpn_hgcd mpn_hgcd_lehmer -#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch - -#include "../mpn/generic/hgcd.c" diff --git a/gmp/tune/hgcd_reduce_1.c b/gmp/tune/hgcd_reduce_1.c deleted file mode 100644 index 383c2d7009..0000000000 --- a/gmp/tune/hgcd_reduce_1.c +++ /dev/null @@ -1,41 +0,0 @@ -/* mpn/generic/hgcd_reduce.c forced to use hgcd. */ - -/* -Copyright 2010 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef HGCD_REDUCE_THRESHOLD -#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX -#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1 -#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch - - -#include "../mpn/generic/hgcd_reduce.c" diff --git a/gmp/tune/hgcd_reduce_2.c b/gmp/tune/hgcd_reduce_2.c deleted file mode 100644 index ac18b6033a..0000000000 --- a/gmp/tune/hgcd_reduce_2.c +++ /dev/null @@ -1,40 +0,0 @@ -/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */ - -/* -Copyright 2010 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef HGCD_REDUCE_THRESHOLD -#define HGCD_REDUCE_THRESHOLD 0 -#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2 -#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch - -#include "../mpn/generic/hgcd_reduce.c" diff --git a/gmp/tune/hppa.asm b/gmp/tune/hppa.asm index fc9d62e3b2..e99a399e4a 100644 --- a/gmp/tune/hppa.asm +++ b/gmp/tune/hppa.asm @@ -1,32 +1,21 @@ dnl HPPA 32-bit time stamp counter access routine. dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/hppa2.asm b/gmp/tune/hppa2.asm index 57ef4c4683..9755c907d2 100644 --- a/gmp/tune/hppa2.asm +++ b/gmp/tune/hppa2.asm @@ -1,32 +1,21 @@ dnl HPPA 64-bit time stamp counter access routine. dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/hppa2w.asm b/gmp/tune/hppa2w.asm index 215a0cc5c2..ddf0ea9b0a 100644 --- a/gmp/tune/hppa2w.asm +++ b/gmp/tune/hppa2w.asm @@ -1,32 +1,21 @@ dnl HPPA 64-bit time stamp counter access routine. dnl Copyright 2000, 2002, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/ia64.asm b/gmp/tune/ia64.asm index 0651111031..ef487db8f6 100644 --- a/gmp/tune/ia64.asm +++ b/gmp/tune/ia64.asm @@ -1,32 +1,21 @@ dnl IA-64 time stamp counter access routine. dnl Copyright 2000, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/jacbase1.c b/gmp/tune/jacbase1.c index a73df8b723..2a0b859c2c 100644 --- a/gmp/tune/jacbase1.c +++ b/gmp/tune/jacbase1.c @@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/jacbase2.c b/gmp/tune/jacbase2.c index b99ebe9061..6bbe7e9e93 100644 --- a/gmp/tune/jacbase2.c +++ b/gmp/tune/jacbase2.c @@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/jacbase3.c b/gmp/tune/jacbase3.c index 408b0fed6b..f8f89d49be 100644 --- a/gmp/tune/jacbase3.c +++ b/gmp/tune/jacbase3.c @@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/jacbase4.c b/gmp/tune/jacbase4.c deleted file mode 100644 index 70d535240b..0000000000 --- a/gmp/tune/jacbase4.c +++ /dev/null @@ -1,38 +0,0 @@ -/* mpn/generic/jacbase.c method 4. - -Copyright 2002, 2010 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef JACOBI_BASE_METHOD -#define JACOBI_BASE_METHOD 4 -#define __gmpn_jacobi_base mpn_jacobi_base_4 - -#include "mpn/generic/jacbase.c" diff --git a/gmp/tune/many.pl b/gmp/tune/many.pl index 524a67dd1e..11b5cf4521 100644..100755 --- a/gmp/tune/many.pl +++ b/gmp/tune/many.pl @@ -1,32 +1,21 @@ #! /usr/bin/perl -w -# Copyright 2000-2002 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2002 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. # Usage: cd $builddir/tune @@ -380,14 +369,23 @@ my @table = }, { - 'regexp'=> 'add_n_sub_n', + 'regexp'=> 'addsub_n', 'ret' => 'mp_limb_t', 'args' => 'mp_ptr sum, mp_ptr diff, mp_srcptr xp, mp_srcptr yp, mp_size_t size', 'speed_flags'=> 'FLAG_R_OPTIONAL', }, { - 'regexp'=> 'com|copyi|copyd', + 'regexp'=> 'bdivmod', + 'ret' => 'mp_limb_t', + 'args' => 'mp_ptr qp, mp_ptr up, mp_size_t usize, mp_srcptr vp, mp_size_t vsize, unsigned long int d', + 'carrys'=> [''], + 'try' => 'none', + 'speed' => 'none', + }, + + { + 'regexp'=> 'com_n|copyi|copyd', 'ret' => 'void', 'args' => 'mp_ptr wp, mp_srcptr xp, mp_size_t size', 'speed' => 'SPEED_ROUTINE_MPN_COPY', diff --git a/gmp/tune/mod_1_1-1.c b/gmp/tune/mod_1_1-1.c deleted file mode 100644 index 7eb7fcdf79..0000000000 --- a/gmp/tune/mod_1_1-1.c +++ /dev/null @@ -1,41 +0,0 @@ -/* mpn/generic/mod_1_1.c method 1. - -Copyright 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef MOD_1_1P_METHOD -#define MOD_1_1P_METHOD 1 -#undef mpn_mod_1_1p -#undef mpn_mod_1_1p_cps -#define mpn_mod_1_1p mpn_mod_1_1p_1 -#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_1 - -#include "mpn/generic/mod_1_1.c" diff --git a/gmp/tune/mod_1_1-2.c b/gmp/tune/mod_1_1-2.c deleted file mode 100644 index 52ca57749b..0000000000 --- a/gmp/tune/mod_1_1-2.c +++ /dev/null @@ -1,41 +0,0 @@ -/* mpn/generic/mod_1_1.c method 2. - -Copyright 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#include "gmp.h" -#include "gmp-impl.h" - -#undef MOD_1_1P_METHOD -#define MOD_1_1P_METHOD 2 -#undef mpn_mod_1_1p -#undef mpn_mod_1_1p_cps -#define mpn_mod_1_1p mpn_mod_1_1p_2 -#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_2 - -#include "mpn/generic/mod_1_1.c" diff --git a/gmp/tune/mod_1_div.c b/gmp/tune/mod_1_div.c index a0663be055..1c01e8c692 100644 --- a/gmp/tune/mod_1_div.c +++ b/gmp/tune/mod_1_div.c @@ -5,28 +5,17 @@ Copyright 2000, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define OPERATION_mod_1 @@ -35,12 +24,8 @@ see https://www.gnu.org/licenses/. */ #undef MOD_1_NORM_THRESHOLD #undef MOD_1_UNNORM_THRESHOLD -#undef MOD_1N_TO_MOD_1_1_THRESHOLD -#undef MOD_1U_TO_MOD_1_1_THRESHOLD #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX -#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX -#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX #define __gmpn_mod_1 mpn_mod_1_div #include "mpn/generic/mod_1.c" diff --git a/gmp/tune/mod_1_inv.c b/gmp/tune/mod_1_inv.c index 92c936ddcf..448d093f31 100644 --- a/gmp/tune/mod_1_inv.c +++ b/gmp/tune/mod_1_inv.c @@ -5,28 +5,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define OPERATION_mod_1 @@ -35,12 +24,8 @@ see https://www.gnu.org/licenses/. */ #undef MOD_1_NORM_THRESHOLD #undef MOD_1_UNNORM_THRESHOLD -#undef MOD_1N_TO_MOD_1_1_THRESHOLD -#undef MOD_1U_TO_MOD_1_1_THRESHOLD #define MOD_1_NORM_THRESHOLD 0 #define MOD_1_UNNORM_THRESHOLD 0 -#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX -#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX #define __gmpn_mod_1 mpn_mod_1_inv #include "mpn/generic/mod_1.c" diff --git a/gmp/tune/modlinv.c b/gmp/tune/modlinv.c index e3f2063e07..3ed0c8a03f 100644 --- a/gmp/tune/modlinv.c +++ b/gmp/tune/modlinv.c @@ -6,28 +6,17 @@ Copyright 2000, 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include <stdio.h> #include "gmp.h" @@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/. */ dependent chain, whereas the "2*" in the standard version isn't. Depending on the CPU this should be the same or a touch slower. */ -#if GMP_LIMB_BITS <= 32 +#if BITS_PER_MP_LIMB <= 32 #define binvert_limb_mul1(inv,n) \ do { \ mp_limb_t __n = (n); \ @@ -55,7 +44,7 @@ see https://www.gnu.org/licenses/. */ } while (0) #endif -#if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64 +#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64 #define binvert_limb_mul1(inv,n) \ do { \ mp_limb_t __n = (n); \ @@ -111,7 +100,7 @@ see https://www.gnu.org/licenses/. */ \ ASSERT ((__n & 1) == 1); \ \ - __count = GMP_LIMB_BITS-1; \ + __count = BITS_PER_MP_LIMB-1; \ do \ { \ __inv >>= 1; \ @@ -142,11 +131,11 @@ see https://www.gnu.org/licenses/. */ \ ASSERT ((__n & 1) == 1); \ \ - __count = GMP_LIMB_BITS-1; \ + __count = BITS_PER_MP_LIMB-1; \ do \ { \ __lowbit = __rem & 1; \ - __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1)); \ + __inv = (__inv >> 1) | (__lowbit << (BITS_PER_MP_LIMB-1)); \ __rem = (__rem - (__n & -__lowbit)) >> 1; \ } \ while (-- __count); \ diff --git a/gmp/tune/noop.c b/gmp/tune/noop.c index 5c13c96ee3..7c7f1b5fe6 100644 --- a/gmp/tune/noop.c +++ b/gmp/tune/noop.c @@ -9,28 +9,17 @@ Copyright 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/pentium.asm b/gmp/tune/pentium.asm index fb1e8332c8..369a8ea76f 100644 --- a/gmp/tune/pentium.asm +++ b/gmp/tune/pentium.asm @@ -1,32 +1,21 @@ dnl x86 pentium time stamp counter access routine. dnl Copyright 1999, 2000, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/powerpc.asm b/gmp/tune/powerpc.asm index 2f4ac27bea..19ab901386 100644 --- a/gmp/tune/powerpc.asm +++ b/gmp/tune/powerpc.asm @@ -1,32 +1,21 @@ dnl PowerPC mftb_function -- read time base registers. dnl Copyright 2002 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundationn; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -43,7 +32,7 @@ L(again): mftbu r4 mftb r5 mftbu r6 - cmpw cr0, r4, r6 + cmp cr0, r4, r6 bne L(again) stw r5, 0(r3) diff --git a/gmp/tune/powerpc64.asm b/gmp/tune/powerpc64.asm index 1ade99638a..eb705466da 100644 --- a/gmp/tune/powerpc64.asm +++ b/gmp/tune/powerpc64.asm @@ -1,32 +1,21 @@ dnl PowerPC mftb_function -- read time base registers, 64-bit integer. -dnl Copyright 2002-2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 2002, 2003, 2004 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundationn; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/powm_mod.c b/gmp/tune/powm_mod.c index 7c20f53e70..e65f512e85 100644 --- a/gmp/tune/powm_mod.c +++ b/gmp/tune/powm_mod.c @@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/powm_redc.c b/gmp/tune/powm_redc.c index c34bb2e0a5..a9e4bb502a 100644 --- a/gmp/tune/powm_redc.c +++ b/gmp/tune/powm_redc.c @@ -6,28 +6,17 @@ Copyright 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/pre_divrem_1.c b/gmp/tune/pre_divrem_1.c index 388ca4150a..2b3fb79e22 100644 --- a/gmp/tune/pre_divrem_1.c +++ b/gmp/tune/pre_divrem_1.c @@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/tune/sb_div.c b/gmp/tune/sb_div.c new file mode 100644 index 0000000000..4d52a24ce3 --- /dev/null +++ b/gmp/tune/sb_div.c @@ -0,0 +1,30 @@ +/* mpn/generic/sb_divrem_mn.c forced to use plain udiv_qrnnd. */ + +/* +Copyright 2001, 2002 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#ifdef DIV_SB_PREINV_THRESHOLD +#undef DIV_SB_PREINV_THRESHOLD +#endif +#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX +#define __gmpn_sb_divrem_mn mpn_sb_divrem_mn_div + +#include "mpn/generic/sb_divrem_mn.c" diff --git a/gmp/tune/sb_inv.c b/gmp/tune/sb_inv.c new file mode 100644 index 0000000000..f8d798b43b --- /dev/null +++ b/gmp/tune/sb_inv.c @@ -0,0 +1,30 @@ +/* mpn/generic/sb_divrem_mn.c forced to use udiv_qrnnd_preinv. */ + +/* +Copyright 2001, 2002 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + +#include "gmp.h" +#include "gmp-impl.h" + +#ifdef DIV_SB_PREINV_THRESHOLD +#undef DIV_SB_PREINV_THRESHOLD +#endif +#define DIV_SB_PREINV_THRESHOLD 0 +#define __gmpn_sb_divrem_mn mpn_sb_divrem_mn_inv + +#include "mpn/generic/sb_divrem_mn.c" diff --git a/gmp/tune/set_strb.c b/gmp/tune/set_strb.c index 842ec4cd44..c67b09ccaa 100644 --- a/gmp/tune/set_strb.c +++ b/gmp/tune/set_strb.c @@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define __gmpn_set_str mpn_set_str_basecase #define __gmpn_bc_set_str mpn_bc_set_str_basecase diff --git a/gmp/tune/set_strp.c b/gmp/tune/set_strp.c index 5520f28696..701ab2bf2b 100644 --- a/gmp/tune/set_strp.c +++ b/gmp/tune/set_strp.c @@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define TUNE_PROGRAM_BUILD 1 /* for gmp-impl.h */ diff --git a/gmp/tune/set_strs.c b/gmp/tune/set_strs.c index 75b6f39b4d..d8edc7dfde 100644 --- a/gmp/tune/set_strs.c +++ b/gmp/tune/set_strs.c @@ -5,28 +5,17 @@ Copyright 2002 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #define __gmpn_set_str mpn_set_str_subquad #define __gmpn_bc_set_str mpn_bc_set_str_subquad diff --git a/gmp/tune/sparcv9.asm b/gmp/tune/sparcv9.asm index f0981c70fe..b951ff3de2 100644 --- a/gmp/tune/sparcv9.asm +++ b/gmp/tune/sparcv9.asm @@ -1,32 +1,21 @@ dnl Sparc v9 32-bit time stamp counter access routine. dnl Copyright 2000, 2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/tune/speed-ext.c b/gmp/tune/speed-ext.c index e7fb8b9f60..04760a5eab 100644 --- a/gmp/tune/speed-ext.c +++ b/gmp/tune/speed-ext.c @@ -5,28 +5,17 @@ Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ /* The extension here is three versions of an mpn arithmetic mean. These @@ -68,9 +57,9 @@ see https://www.gnu.org/licenses/. */ #define SPEED_EXTRA_PROTOS \ - double speed_mean_calls (struct speed_params *s); \ - double speed_mean_open (struct speed_params *s); \ - double speed_mean_open2 (struct speed_params *s); + double speed_mean_calls __GMP_PROTO ((struct speed_params *s)); \ + double speed_mean_open __GMP_PROTO ((struct speed_params *s)); \ + double speed_mean_open2 __GMP_PROTO ((struct speed_params *s)); #define SPEED_EXTRA_ROUTINES \ { "mean_calls", speed_mean_calls }, \ @@ -93,8 +82,8 @@ mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) ASSERT (size >= 1); c = mpn_add_n (wp, xp, yp, size); - ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1); - wp[size-1] |= (c << (GMP_LIMB_BITS-1)); + ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1); + wp[size-1] |= (c << (BITS_PER_MP_LIMB-1)); return ret; } @@ -118,7 +107,7 @@ mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) c = (wprev < x); ret = (wprev & 1); -#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1))) +#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1))) for (i = 1; i < size; i++) { diff --git a/gmp/tune/speed.c b/gmp/tune/speed.c index 12d53bcaa3..2bb7f7d933 100644 --- a/gmp/tune/speed.c +++ b/gmp/tune/speed.c @@ -1,32 +1,21 @@ /* Speed measuring program. -Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ /* Usage message is in the code below, run with no arguments to print it. See README for interesting applications. @@ -100,10 +89,10 @@ SPEED_EXTRA_PROTOS2 } while (0) -#if GMP_LIMB_BITS == 32 +#if BITS_PER_MP_LIMB == 32 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) #endif -#if GMP_LIMB_BITS == 64 +#if BITS_PER_MP_LIMB == 64 #define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) #endif @@ -163,15 +152,8 @@ const struct routine_t { { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, - { "mpn_add_err1_n", speed_mpn_add_err1_n }, - { "mpn_add_err2_n", speed_mpn_add_err2_n }, - { "mpn_add_err3_n", speed_mpn_add_err3_n }, - { "mpn_sub_err1_n", speed_mpn_sub_err1_n }, - { "mpn_sub_err2_n", speed_mpn_sub_err2_n }, - { "mpn_sub_err3_n", speed_mpn_sub_err3_n }, - -#if HAVE_NATIVE_mpn_add_n_sub_n - { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, +#if HAVE_NATIVE_mpn_addsub_n + { "mpn_addsub_n", speed_mpn_addsub_n, FLAG_R_OPTIONAL }, #endif { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, @@ -208,12 +190,6 @@ const struct routine_t { #if HAVE_NATIVE_mpn_mul_4 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, #endif -#if HAVE_NATIVE_mpn_mul_5 - { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_mul_6 - { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL }, -#endif { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, @@ -229,13 +205,6 @@ const struct routine_t { { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, - { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R }, - { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R }, - { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R }, - { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R }, - { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R }, - { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R }, - { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, @@ -247,19 +216,9 @@ const struct routine_t { { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, - { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R }, - { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R }, - { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R }, - { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R }, - - { "mpn_div_qr_2n", speed_mpn_div_qr_2n, }, - { "mpn_div_qr_2u", speed_mpn_div_qr_2u, }, - { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, { "mpn_divexact_by3", speed_mpn_divexact_by3 }, - { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R }, - { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, #if HAVE_NATIVE_mpn_modexact_1_odd @@ -271,8 +230,17 @@ const struct routine_t { { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, #endif + { "mpn_dc_tdiv_qr", speed_mpn_dc_tdiv_qr }, + { "mpn_dc_divrem_n", speed_mpn_dc_divrem_n }, + { "mpn_dc_divrem_sb", speed_mpn_dc_divrem_sb }, + { "mpn_dc_divrem_sb_div", speed_mpn_dc_divrem_sb_div }, + { "mpn_dc_divrem_sb_inv", speed_mpn_dc_divrem_sb_inv }, + + { "mpn_sb_divrem_m3", speed_mpn_sb_divrem_m3 }, + { "mpn_sb_divrem_m3_div", speed_mpn_sb_divrem_m3_div }, + { "mpn_sb_divrem_m3_inv", speed_mpn_sb_divrem_m3_inv }, + { "mpn_lshift", speed_mpn_lshift, FLAG_R }, - { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, { "mpn_rshift", speed_mpn_rshift, FLAG_R }, { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, @@ -283,7 +251,7 @@ const struct routine_t { { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, - { "mpn_com", speed_mpn_com }, + { "mpn_com_n", speed_mpn_com_n }, { "mpn_popcount", speed_mpn_popcount }, { "mpn_hamdist", speed_mpn_hamdist }, @@ -292,17 +260,16 @@ const struct routine_t { { "mpn_hgcd", speed_mpn_hgcd }, { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, - { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, - { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer }, - - { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, - { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, - { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, { "mpn_gcd", speed_mpn_gcd }, +#if 0 + { "mpn_gcd_binary", speed_mpn_gcd_binary }, + { "mpn_gcd_accel", speed_mpn_gcd_accel }, + { "find_a", speed_find_a, FLAG_NODATA }, +#endif { "mpn_gcdext", speed_mpn_gcdext }, { "mpn_gcdext_single", speed_mpn_gcdext_single }, @@ -317,7 +284,6 @@ const struct routine_t { { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, - { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, @@ -325,74 +291,27 @@ const struct routine_t { #if HAVE_NATIVE_mpn_sqr_diagonal { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, #endif -#if HAVE_NATIVE_mpn_sqr_diag_addlsh1 - { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 }, -#endif { "mpn_mul_n", speed_mpn_mul_n }, - { "mpn_sqr", speed_mpn_sqr }, - - { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, - { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, - { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, - { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, - { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, - { "mpn_toom22_mul", speed_mpn_toom22_mul }, - { "mpn_toom33_mul", speed_mpn_toom33_mul }, - { "mpn_toom44_mul", speed_mpn_toom44_mul }, - { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, - { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, - { "mpn_toom32_mul", speed_mpn_toom32_mul }, - { "mpn_toom42_mul", speed_mpn_toom42_mul }, - { "mpn_toom43_mul", speed_mpn_toom43_mul }, - { "mpn_toom63_mul", speed_mpn_toom63_mul }, - { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, - { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, -#if WANT_OLD_FFT_FULL + { "mpn_sqr", speed_mpn_sqr_n }, + + { "mpn_kara_mul_n", speed_mpn_kara_mul_n }, + { "mpn_kara_sqr_n", speed_mpn_kara_sqr_n }, + { "mpn_toom3_mul_n", speed_mpn_toom3_mul_n }, + { "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n }, { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, -#endif + { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, - { "mpn_mullo_n", speed_mpn_mullo_n }, - { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, - - { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL }, - { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid }, - { "mpn_mulmid_n", speed_mpn_mulmid_n }, - { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL }, - - { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, - { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, - { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, - { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, - - { "mpn_invert", speed_mpn_invert }, - { "mpn_invertappr", speed_mpn_invertappr }, - { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, - { "mpn_binvert", speed_mpn_binvert }, - { "mpn_sec_invert", speed_mpn_sec_invert }, - - { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, - { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, - { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, - { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, - { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, - { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, - - { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, - { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, - { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, - { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, - - { "mpn_broot", speed_mpn_broot, FLAG_R }, - { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R }, - { "mpn_brootinv", speed_mpn_brootinv, FLAG_R }, - - { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, - { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, - { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, + { "mpn_mullow_n", speed_mpn_mullow_n }, + { "mpn_mullow_basecase", speed_mpn_mullow_basecase}, + + { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, + + { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, + { "mpn_set_str_basecase",speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, { "mpn_sqrtrem", speed_mpn_sqrtrem }, { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, @@ -405,18 +324,14 @@ const struct routine_t { { "mpz_add", speed_mpz_add }, { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, - { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL }, { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, { "mpz_powm", speed_mpz_powm }, { "mpz_powm_mod", speed_mpz_powm_mod }, { "mpz_powm_redc", speed_mpz_powm_redc }, - { "mpz_powm_sec", speed_mpz_powm_sec }, { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, { "mpz_mod", speed_mpz_mod }, { "mpn_redc_1", speed_mpn_redc_1 }, - { "mpn_redc_2", speed_mpn_redc_2 }, - { "mpn_redc_n", speed_mpn_redc_n }, { "MPN_COPY", speed_MPN_COPY }, { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, @@ -428,71 +343,19 @@ const struct routine_t { #if HAVE_NATIVE_mpn_copyd { "mpn_copyd", speed_mpn_copyd }, #endif - { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL }, -#if HAVE_NATIVE_mpn_addlsh1_n == 1 - { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_sublsh1_n == 1 - { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_addlsh1_n_ip1 - { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 }, -#endif -#if HAVE_NATIVE_mpn_addlsh1_n_ip2 - { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 }, -#endif -#if HAVE_NATIVE_mpn_sublsh1_n_ip1 - { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 }, -#endif -#if HAVE_NATIVE_mpn_rsblsh1_n == 1 - { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_addlsh2_n == 1 - { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_sublsh2_n == 1 - { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_addlsh2_n_ip1 - { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 }, +#if HAVE_NATIVE_mpn_addlsh1_n + { "mpn_addlsh1_n", speed_mpn_addlsh1_n }, #endif -#if HAVE_NATIVE_mpn_addlsh2_n_ip2 - { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 }, -#endif -#if HAVE_NATIVE_mpn_sublsh2_n_ip1 - { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 }, -#endif -#if HAVE_NATIVE_mpn_rsblsh2_n == 1 - { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_addlsh_n - { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_sublsh_n - { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL }, -#endif -#if HAVE_NATIVE_mpn_addlsh_n_ip1 - { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 }, -#endif -#if HAVE_NATIVE_mpn_addlsh_n_ip2 - { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 }, -#endif -#if HAVE_NATIVE_mpn_sublsh_n_ip1 - { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 }, -#endif -#if HAVE_NATIVE_mpn_rsblsh_n - { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL }, +#if HAVE_NATIVE_mpn_sublsh1_n + { "mpn_sublsh1_n", speed_mpn_sublsh1_n }, #endif #if HAVE_NATIVE_mpn_rsh1add_n - { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL }, + { "mpn_rsh1add_n", speed_mpn_rsh1add_n }, #endif #if HAVE_NATIVE_mpn_rsh1sub_n - { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL }, + { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n }, #endif - { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL }, - { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL }, - { "MPN_ZERO", speed_MPN_ZERO }, { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, @@ -522,6 +385,8 @@ const struct routine_t { { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, + { "udiv_qrnnd_preinv1", speed_udiv_qrnnd_preinv1, FLAG_R_OPTIONAL }, + { "udiv_qrnnd_preinv2", speed_udiv_qrnnd_preinv2, FLAG_R_OPTIONAL }, { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, #if HAVE_NATIVE_mpn_udiv_qrnnd { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, @@ -884,7 +749,7 @@ run_gnuplot (int argc, char *argv[]) fprintf (fp, "set key left\n"); /* designed to make it possible to see crossovers easily */ - fprintf (fp, "set style data lines\n"); + fprintf (fp, "set data style lines\n"); fprintf (fp, "plot "); for (i = 0; i < num_choices; i++) @@ -914,7 +779,7 @@ run_gnuplot (int argc, char *argv[]) /* Return a limb with n many one bits (starting from the least significant) */ #define LIMB_ONES(n) \ - ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ + ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX \ : (n) == 0 ? CNST_LIMB(0) \ : (CNST_LIMB(1) << (n)) - 1) @@ -941,7 +806,7 @@ r_string (const char *s) { if (siz > 1 || siz < -1) printf ("Warning, r parameter %s truncated to %d bits\n", - s_orig, GMP_LIMB_BITS); + s_orig, BITS_PER_MP_LIMB); return l; } } @@ -954,10 +819,10 @@ r_string (const char *s) if (strcmp (s, "bits") == 0) { mp_limb_t l; - if (n > GMP_LIMB_BITS) + if (n > BITS_PER_MP_LIMB) { fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", - n, GMP_LIMB_BITS); + n, BITS_PER_MP_LIMB); exit (1); } mpn_random (&l, 1); @@ -965,10 +830,10 @@ r_string (const char *s) } else if (strcmp (s, "ones") == 0) { - if (n > GMP_LIMB_BITS) + if (n > BITS_PER_MP_LIMB) { fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", - n, GMP_LIMB_BITS); + n, BITS_PER_MP_LIMB); exit (1); } return LIMB_ONES (n); diff --git a/gmp/tune/speed.h b/gmp/tune/speed.h index d9474adb35..ca021409ef 100644 --- a/gmp/tune/speed.h +++ b/gmp/tune/speed.h @@ -1,32 +1,22 @@ /* Header for speed and threshold things. -Copyright 1999-2003, 2005, 2006, 2008-2013 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006 Free Software Foundation, +Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #ifndef __SPEED_H__ #define __SPEED_H__ @@ -41,9 +31,9 @@ see https://www.gnu.org/licenses/. */ } while (0) /* A mask of the least significant n bits. Note 1<<32 doesn't give zero on - x86 family CPUs, hence the separate case for GMP_LIMB_BITS. */ + x86 family CPUs, hence the separate case for BITS_PER_MP_LIMB. */ #define MP_LIMB_T_LOWBITMASK(n) \ - ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1) + ((n) == BITS_PER_MP_LIMB ? MP_LIMB_T_MAX : ((mp_limb_t) 1 << (n)) - 1) /* align must be a power of 2 here, usually CACHE_LINE_SIZE is a good choice */ @@ -66,7 +56,7 @@ see https://www.gnu.org/licenses/. */ */ #define CACHE_LINE_SIZE 64 /* bytes */ -#define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/GMP_LIMB_BYTES - 1) +#define SPEED_TMP_ALLOC_ADJUST_MASK (CACHE_LINE_SIZE/BYTES_PER_MP_LIMB - 1) /* Set ptr to a TMP_ALLOC block of the given limbs, with the given limb alignment. */ @@ -75,7 +65,7 @@ see https://www.gnu.org/licenses/. */ mp_ptr __ptr; \ mp_size_t __ptr_align, __ptr_add; \ \ - ASSERT ((CACHE_LINE_SIZE % GMP_LIMB_BYTES) == 0); \ + ASSERT ((CACHE_LINE_SIZE % BYTES_PER_MP_LIMB) == 0); \ __ptr = TMP_ALLOC_LIMBS ((limbs) + SPEED_TMP_ALLOC_ADJUST_MASK); \ __ptr_align = (__ptr - (mp_ptr) NULL); \ __ptr_add = ((align) - __ptr_align) & SPEED_TMP_ALLOC_ADJUST_MASK; \ @@ -97,13 +87,13 @@ extern double speed_unittime; extern double speed_cycletime; extern int speed_precision; extern char speed_time_string[]; -void speed_time_init (void); -void speed_cycletime_fail (const char *str); -void speed_cycletime_init (void); -void speed_cycletime_need_cycles (void); -void speed_cycletime_need_seconds (void); -void speed_starttime (void); -double speed_endtime (void); +void speed_time_init __GMP_PROTO ((void)); +void speed_cycletime_fail __GMP_PROTO ((const char *str)); +void speed_cycletime_init __GMP_PROTO ((void)); +void speed_cycletime_need_cycles __GMP_PROTO ((void)); +void speed_cycletime_need_seconds __GMP_PROTO ((void)); +void speed_starttime __GMP_PROTO ((void)); +double speed_endtime __GMP_PROTO ((void)); struct speed_params { @@ -127,304 +117,203 @@ struct speed_params { struct { mp_ptr ptr; mp_size_t size; - } src[5], dst[4]; + } src[2], dst[3]; }; -typedef double (*speed_function_t) (struct speed_params *); +typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s)); -double speed_measure (speed_function_t fun, struct speed_params *); +double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s)); /* Prototypes for speed measuring routines */ -double speed_back_to_back (struct speed_params *); -double speed_count_leading_zeros (struct speed_params *); -double speed_count_trailing_zeros (struct speed_params *); -double speed_find_a (struct speed_params *); -double speed_gmp_allocate_free (struct speed_params *); -double speed_gmp_allocate_reallocate_free (struct speed_params *); -double speed_invert_limb (struct speed_params *); -double speed_malloc_free (struct speed_params *); -double speed_malloc_realloc_free (struct speed_params *); -double speed_memcpy (struct speed_params *); -double speed_binvert_limb (struct speed_params *); -double speed_binvert_limb_mul1 (struct speed_params *); -double speed_binvert_limb_loop (struct speed_params *); -double speed_binvert_limb_cond (struct speed_params *); -double speed_binvert_limb_arith (struct speed_params *); - -double speed_mpf_init_clear (struct speed_params *); - -double speed_mpn_add_n (struct speed_params *); -double speed_mpn_add_err1_n (struct speed_params *); -double speed_mpn_add_err2_n (struct speed_params *); -double speed_mpn_add_err3_n (struct speed_params *); -double speed_mpn_addlsh_n (struct speed_params *); -double speed_mpn_addlsh1_n (struct speed_params *); -double speed_mpn_addlsh2_n (struct speed_params *); -double speed_mpn_addlsh_n_ip1 (struct speed_params *); -double speed_mpn_addlsh1_n_ip1 (struct speed_params *); -double speed_mpn_addlsh2_n_ip1 (struct speed_params *); -double speed_mpn_addlsh_n_ip2 (struct speed_params *); -double speed_mpn_addlsh1_n_ip2 (struct speed_params *); -double speed_mpn_addlsh2_n_ip2 (struct speed_params *); -double speed_mpn_add_n_sub_n (struct speed_params *); -double speed_mpn_and_n (struct speed_params *); -double speed_mpn_andn_n (struct speed_params *); -double speed_mpn_addmul_1 (struct speed_params *); -double speed_mpn_addmul_2 (struct speed_params *); -double speed_mpn_addmul_3 (struct speed_params *); -double speed_mpn_addmul_4 (struct speed_params *); -double speed_mpn_addmul_5 (struct speed_params *); -double speed_mpn_addmul_6 (struct speed_params *); -double speed_mpn_addmul_7 (struct speed_params *); -double speed_mpn_addmul_8 (struct speed_params *); -double speed_mpn_cnd_add_n (struct speed_params *); -double speed_mpn_cnd_sub_n (struct speed_params *); -double speed_mpn_com (struct speed_params *); -double speed_mpn_copyd (struct speed_params *); -double speed_mpn_copyi (struct speed_params *); -double speed_MPN_COPY (struct speed_params *); -double speed_MPN_COPY_DECR (struct speed_params *); -double speed_MPN_COPY_INCR (struct speed_params *); -double speed_mpn_sec_tabselect (struct speed_params *); -double speed_mpn_divexact_1 (struct speed_params *); -double speed_mpn_divexact_by3 (struct speed_params *); -double speed_mpn_bdiv_q_1 (struct speed_params *); -double speed_mpn_pi1_bdiv_q_1 (struct speed_params *); -double speed_mpn_bdiv_dbm1c (struct speed_params *); -double speed_mpn_divrem_1 (struct speed_params *); -double speed_mpn_divrem_1f (struct speed_params *); -double speed_mpn_divrem_1c (struct speed_params *); -double speed_mpn_divrem_1cf (struct speed_params *); -double speed_mpn_divrem_1_div (struct speed_params *); -double speed_mpn_divrem_1f_div (struct speed_params *); -double speed_mpn_divrem_1_inv (struct speed_params *); -double speed_mpn_divrem_1f_inv (struct speed_params *); -double speed_mpn_divrem_2 (struct speed_params *); -double speed_mpn_divrem_2_div (struct speed_params *); -double speed_mpn_divrem_2_inv (struct speed_params *); -double speed_mpn_div_qr_1n_pi1 (struct speed_params *); -double speed_mpn_div_qr_1n_pi1_1 (struct speed_params *); -double speed_mpn_div_qr_1n_pi1_2 (struct speed_params *); -double speed_mpn_div_qr_1 (struct speed_params *); -double speed_mpn_div_qr_2n (struct speed_params *); -double speed_mpn_div_qr_2u (struct speed_params *); -double speed_mpn_fib2_ui (struct speed_params *); -double speed_mpn_matrix22_mul (struct speed_params *); -double speed_mpn_hgcd (struct speed_params *); -double speed_mpn_hgcd_lehmer (struct speed_params *); -double speed_mpn_hgcd_appr (struct speed_params *); -double speed_mpn_hgcd_appr_lehmer (struct speed_params *); -double speed_mpn_hgcd_reduce (struct speed_params *); -double speed_mpn_hgcd_reduce_1 (struct speed_params *); -double speed_mpn_hgcd_reduce_2 (struct speed_params *); -double speed_mpn_gcd (struct speed_params *); -double speed_mpn_gcd_1 (struct speed_params *); -double speed_mpn_gcd_1N (struct speed_params *); -double speed_mpn_gcdext (struct speed_params *); -double speed_mpn_gcdext_double (struct speed_params *); -double speed_mpn_gcdext_one_double (struct speed_params *); -double speed_mpn_gcdext_one_single (struct speed_params *); -double speed_mpn_gcdext_single (struct speed_params *); -double speed_mpn_get_str (struct speed_params *); -double speed_mpn_hamdist (struct speed_params *); -double speed_mpn_ior_n (struct speed_params *); -double speed_mpn_iorn_n (struct speed_params *); -double speed_mpn_jacobi_base (struct speed_params *); -double speed_mpn_jacobi_base_1 (struct speed_params *); -double speed_mpn_jacobi_base_2 (struct speed_params *); -double speed_mpn_jacobi_base_3 (struct speed_params *); -double speed_mpn_jacobi_base_4 (struct speed_params *); -double speed_mpn_lshift (struct speed_params *); -double speed_mpn_lshiftc (struct speed_params *); -double speed_mpn_mod_1 (struct speed_params *); -double speed_mpn_mod_1c (struct speed_params *); -double speed_mpn_mod_1_div (struct speed_params *); -double speed_mpn_mod_1_inv (struct speed_params *); -double speed_mpn_mod_1_1 (struct speed_params *); -double speed_mpn_mod_1_1_1 (struct speed_params *); -double speed_mpn_mod_1_1_2 (struct speed_params *); -double speed_mpn_mod_1_2 (struct speed_params *); -double speed_mpn_mod_1_3 (struct speed_params *); -double speed_mpn_mod_1_4 (struct speed_params *); -double speed_mpn_mod_34lsub1 (struct speed_params *); -double speed_mpn_modexact_1_odd (struct speed_params *); -double speed_mpn_modexact_1c_odd (struct speed_params *); -double speed_mpn_mul_1 (struct speed_params *); -double speed_mpn_mul_1_inplace (struct speed_params *); -double speed_mpn_mul_2 (struct speed_params *); -double speed_mpn_mul_3 (struct speed_params *); -double speed_mpn_mul_4 (struct speed_params *); -double speed_mpn_mul_5 (struct speed_params *); -double speed_mpn_mul_6 (struct speed_params *); -double speed_mpn_mul (struct speed_params *); -double speed_mpn_mul_basecase (struct speed_params *); -double speed_mpn_mulmid (struct speed_params *); -double speed_mpn_mulmid_basecase (struct speed_params *); -double speed_mpn_mul_fft (struct speed_params *); -double speed_mpn_mul_fft_sqr (struct speed_params *); -double speed_mpn_fft_mul (struct speed_params *); -double speed_mpn_fft_sqr (struct speed_params *); -#if WANT_OLD_FFT_FULL -double speed_mpn_mul_fft_full (struct speed_params *); -double speed_mpn_mul_fft_full_sqr (struct speed_params *); -#endif -double speed_mpn_nussbaumer_mul (struct speed_params *); -double speed_mpn_nussbaumer_mul_sqr (struct speed_params *); -double speed_mpn_mul_n (struct speed_params *); -double speed_mpn_mul_n_sqr (struct speed_params *); -double speed_mpn_mulmid_n (struct speed_params *); -double speed_mpn_mullo_n (struct speed_params *); -double speed_mpn_mullo_basecase (struct speed_params *); -double speed_mpn_nand_n (struct speed_params *); -double speed_mpn_nior_n (struct speed_params *); -double speed_mpn_popcount (struct speed_params *); -double speed_mpn_preinv_divrem_1 (struct speed_params *); -double speed_mpn_preinv_divrem_1f (struct speed_params *); -double speed_mpn_preinv_mod_1 (struct speed_params *); -double speed_mpn_sbpi1_div_qr (struct speed_params *); -double speed_mpn_dcpi1_div_qr (struct speed_params *); -double speed_mpn_sbpi1_divappr_q (struct speed_params *); -double speed_mpn_dcpi1_divappr_q (struct speed_params *); -double speed_mpn_mu_div_qr (struct speed_params *); -double speed_mpn_mu_divappr_q (struct speed_params *); -double speed_mpn_mupi_div_qr (struct speed_params *); -double speed_mpn_mu_div_q (struct speed_params *); -double speed_mpn_sbpi1_bdiv_qr (struct speed_params *); -double speed_mpn_dcpi1_bdiv_qr (struct speed_params *); -double speed_mpn_sbpi1_bdiv_q (struct speed_params *); -double speed_mpn_dcpi1_bdiv_q (struct speed_params *); -double speed_mpn_mu_bdiv_q (struct speed_params *); -double speed_mpn_mu_bdiv_qr (struct speed_params *); -double speed_mpn_broot (struct speed_params *); -double speed_mpn_broot_invm1 (struct speed_params *); -double speed_mpn_brootinv (struct speed_params *); -double speed_mpn_invert (struct speed_params *); -double speed_mpn_invertappr (struct speed_params *); -double speed_mpn_ni_invertappr (struct speed_params *); -double speed_mpn_sec_invert (struct speed_params *s); -double speed_mpn_binvert (struct speed_params *); -double speed_mpn_redc_1 (struct speed_params *); -double speed_mpn_redc_2 (struct speed_params *); -double speed_mpn_redc_n (struct speed_params *); -double speed_mpn_rsblsh_n (struct speed_params *); -double speed_mpn_rsblsh1_n (struct speed_params *); -double speed_mpn_rsblsh2_n (struct speed_params *); -double speed_mpn_rsh1add_n (struct speed_params *); -double speed_mpn_rsh1sub_n (struct speed_params *); -double speed_mpn_rshift (struct speed_params *); -double speed_mpn_sb_divrem_m3 (struct speed_params *); -double speed_mpn_sb_divrem_m3_div (struct speed_params *); -double speed_mpn_sb_divrem_m3_inv (struct speed_params *); -double speed_mpn_set_str (struct speed_params *); -double speed_mpn_bc_set_str (struct speed_params *); -double speed_mpn_dc_set_str (struct speed_params *); -double speed_mpn_set_str_pre (struct speed_params *); -double speed_mpn_sqr_basecase (struct speed_params *); -double speed_mpn_sqr_diag_addlsh1 (struct speed_params *); -double speed_mpn_sqr_diagonal (struct speed_params *); -double speed_mpn_sqr (struct speed_params *); -double speed_mpn_sqrtrem (struct speed_params *); -double speed_mpn_rootrem (struct speed_params *); -double speed_mpn_sub_n (struct speed_params *); -double speed_mpn_sub_err1_n (struct speed_params *); -double speed_mpn_sub_err2_n (struct speed_params *); -double speed_mpn_sub_err3_n (struct speed_params *); -double speed_mpn_sublsh_n (struct speed_params *); -double speed_mpn_sublsh1_n (struct speed_params *); -double speed_mpn_sublsh2_n (struct speed_params *); -double speed_mpn_sublsh_n_ip1 (struct speed_params *); -double speed_mpn_sublsh1_n_ip1 (struct speed_params *); -double speed_mpn_sublsh2_n_ip1 (struct speed_params *); -double speed_mpn_submul_1 (struct speed_params *); -double speed_mpn_toom2_sqr (struct speed_params *); -double speed_mpn_toom3_sqr (struct speed_params *); -double speed_mpn_toom4_sqr (struct speed_params *); -double speed_mpn_toom6_sqr (struct speed_params *); -double speed_mpn_toom8_sqr (struct speed_params *); -double speed_mpn_toom22_mul (struct speed_params *); -double speed_mpn_toom33_mul (struct speed_params *); -double speed_mpn_toom44_mul (struct speed_params *); -double speed_mpn_toom6h_mul (struct speed_params *); -double speed_mpn_toom8h_mul (struct speed_params *); -double speed_mpn_toom32_mul (struct speed_params *); -double speed_mpn_toom42_mul (struct speed_params *); -double speed_mpn_toom43_mul (struct speed_params *); -double speed_mpn_toom63_mul (struct speed_params *); -double speed_mpn_toom32_for_toom43_mul (struct speed_params *); -double speed_mpn_toom43_for_toom32_mul (struct speed_params *); -double speed_mpn_toom32_for_toom53_mul (struct speed_params *); -double speed_mpn_toom53_for_toom32_mul (struct speed_params *); -double speed_mpn_toom42_for_toom53_mul (struct speed_params *); -double speed_mpn_toom53_for_toom42_mul (struct speed_params *); -double speed_mpn_toom43_for_toom54_mul (struct speed_params *); -double speed_mpn_toom54_for_toom43_mul (struct speed_params *); -double speed_mpn_toom42_mulmid (struct speed_params *); -double speed_mpn_mulmod_bnm1 (struct speed_params *); -double speed_mpn_bc_mulmod_bnm1 (struct speed_params *); -double speed_mpn_mulmod_bnm1_rounded (struct speed_params *); -double speed_mpn_sqrmod_bnm1 (struct speed_params *); -double speed_mpn_udiv_qrnnd (struct speed_params *); -double speed_mpn_udiv_qrnnd_r (struct speed_params *); -double speed_mpn_umul_ppmm (struct speed_params *); -double speed_mpn_umul_ppmm_r (struct speed_params *); -double speed_mpn_xnor_n (struct speed_params *); -double speed_mpn_xor_n (struct speed_params *); -double speed_MPN_ZERO (struct speed_params *); - -double speed_mpq_init_clear (struct speed_params *); - -double speed_mpz_add (struct speed_params *); -double speed_mpz_bin_uiui (struct speed_params *); -double speed_mpz_bin_ui (struct speed_params *); -double speed_mpz_fac_ui (struct speed_params *); -double speed_mpz_fib_ui (struct speed_params *); -double speed_mpz_fib2_ui (struct speed_params *); -double speed_mpz_init_clear (struct speed_params *); -double speed_mpz_init_realloc_clear (struct speed_params *); -double speed_mpz_jacobi (struct speed_params *); -double speed_mpz_lucnum_ui (struct speed_params *); -double speed_mpz_lucnum2_ui (struct speed_params *); -double speed_mpz_mod (struct speed_params *); -double speed_mpz_powm (struct speed_params *); -double speed_mpz_powm_mod (struct speed_params *); -double speed_mpz_powm_redc (struct speed_params *); -double speed_mpz_powm_sec (struct speed_params *); -double speed_mpz_powm_ui (struct speed_params *); -double speed_mpz_urandomb (struct speed_params *); - -double speed_gmp_randseed (struct speed_params *); -double speed_gmp_randseed_ui (struct speed_params *); - -double speed_noop (struct speed_params *); -double speed_noop_wxs (struct speed_params *); -double speed_noop_wxys (struct speed_params *); - -double speed_operator_div (struct speed_params *); -double speed_operator_mod (struct speed_params *); - -double speed_udiv_qrnnd (struct speed_params *); -double speed_udiv_qrnnd_preinv1 (struct speed_params *); -double speed_udiv_qrnnd_preinv2 (struct speed_params *); -double speed_udiv_qrnnd_preinv3 (struct speed_params *); -double speed_udiv_qrnnd_c (struct speed_params *); -double speed_umul_ppmm (struct speed_params *); +double speed_back_to_back __GMP_PROTO ((struct speed_params *s)); +double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s)); +double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s)); +double speed_find_a __GMP_PROTO ((struct speed_params *s)); +double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s)); +double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s)); +double speed_invert_limb __GMP_PROTO ((struct speed_params *s)); +double speed_malloc_free __GMP_PROTO ((struct speed_params *s)); +double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s)); +double speed_memcpy __GMP_PROTO ((struct speed_params *s)); +double speed_binvert_limb __GMP_PROTO ((struct speed_params *s)); +double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s)); +double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s)); +double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s)); +double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s)); + +double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s)); + +double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addsub_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_com_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_dc_divrem_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_dc_divrem_sb __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_dc_divrem_sb_div __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_dc_divrem_sb_inv __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_dc_tdiv_qr __GMP_PROTO ((struct speed_params *s)); +double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s)); +double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s)); +double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcd_binary __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcd_accel __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_kara_mul_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_kara_sqr_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mullow_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_mullow_basecase __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sqr_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_toom3_mul_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_toom3_sqr_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s)); +double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s)); +double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s)); + +double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s)); + +double speed_mpz_add __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_mod __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_powm __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s)); +double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s)); + +double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s)); +double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s)); + +double speed_noop __GMP_PROTO ((struct speed_params *s)); +double speed_noop_wxs __GMP_PROTO ((struct speed_params *s)); +double speed_noop_wxys __GMP_PROTO ((struct speed_params *s)); + +double speed_operator_div __GMP_PROTO ((struct speed_params *s)); +double speed_operator_mod __GMP_PROTO ((struct speed_params *s)); + +double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s)); +double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s)); +double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s)); +double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s)); +double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s)); /* Prototypes for other routines */ /* low 32-bits in p[0], high 32-bits in p[1] */ -void speed_cyclecounter (unsigned p[2]); +void speed_cyclecounter __GMP_PROTO ((unsigned p[2])); -void mftb_function (unsigned p[2]); +void mftb_function __GMP_PROTO ((unsigned p[2])); /* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy output or a clobber for the cpuid, hence an explicit save and restore. A clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use the dummy output style in non-PIC, so there's an error if somehow -fPIC - is used without a -DPIC to tell us about it. */ + is used without a -DPIC to tell us about it. */ #if defined(__GNUC__) && ! defined (NO_ASM) \ && (defined (__i386__) || defined (__i486__)) -#if defined (PIC) || defined (__APPLE_CC__) +#ifdef PIC #define speed_cyclecounter(p) \ do { \ int __speed_cyclecounter__save_ebx; \ @@ -453,80 +342,71 @@ void mftb_function (unsigned p[2]); #endif #endif -double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]); -int gettimeofday_microseconds_p (void); -int getrusage_microseconds_p (void); -int cycles_works_p (void); -long clk_tck (void); -double freq_measure (const char *, double (*)(void)); - -int double_cmp_ptr (const double *, const double *); -void pentium_wbinvd (void); -typedef int (*qsort_function_t) (const void *, const void *); - -void noop (void); -void noop_1 (mp_limb_t); -void noop_wxs (mp_ptr, mp_srcptr, mp_size_t); -void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); -void mpn_cache_fill (mp_srcptr, mp_size_t); -void mpn_cache_fill_dummy (mp_limb_t); -void speed_cache_fill (struct speed_params *); -void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t); -void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t); +double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2])); +int gettimeofday_microseconds_p __GMP_PROTO ((void)); +int getrusage_microseconds_p __GMP_PROTO ((void)); +int cycles_works_p __GMP_PROTO ((void)); +long clk_tck __GMP_PROTO ((void)); +double freq_measure __GMP_PROTO ((const char *, double (*)(void))); + +int double_cmp_ptr __GMP_PROTO ((const double *, const double *)); +void pentium_wbinvd __GMP_PROTO ((void)); +typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *)); + +void noop __GMP_PROTO ((void)); +void noop_1 __GMP_PROTO ((mp_limb_t)); +void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); +void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); +void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t)); +void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t)); +void speed_cache_fill __GMP_PROTO ((struct speed_params *)); +void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t)); +void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t)); extern int speed_option_addrs; extern int speed_option_verbose; -extern int speed_option_cycles_broken; -void speed_option_set (const char *); - -mp_limb_t mpn_div_qr_1n_pi1_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); -mp_limb_t mpn_div_qr_1n_pi1_2 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); - -mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr); -mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr); +void speed_option_set __GMP_PROTO((const char *)); -int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int); -int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int); -int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int); -int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int); +mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); +mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); +mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr)); +mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr)); -mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t); +int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int)); +int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int)); +int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int)); -mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); -mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]); +mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)); +mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)); -void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t); -void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t); +mp_size_t mpn_gcd_binary + __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +mp_size_t mpn_gcd_accel + __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +mp_size_t mpn_gcdext_one_double + __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +mp_size_t mpn_gcdext_one_single + __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +mp_size_t mpn_gcdext_single + __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); +mp_size_t mpn_gcdext_double + __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t)); -mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t); -mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t); -mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t); -mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t); -mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); -mp_size_t mpn_hgcd_lehmer_itch (mp_size_t); +mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); +mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t)); -mp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); -mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t); +mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int)); -mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr); -mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t); +void mpn_toom3_mul_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); +void mpn_toom3_sqr_n_open __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); +void mpn_toom3_mul_n_mpn __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); +void mpn_toom3_sqr_n_mpn __GMP_PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); -mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr); -mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t); +void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr)); +void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr)); -mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t); -mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t); - -mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int); -void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr); - -void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr); -void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr); - -int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); +int speed_routine_count_zeros_setup + __GMP_PROTO ((struct speed_params *, mp_ptr, int, int)); /* "get" is called repeatedly until it ticks over, just in case on a fast @@ -623,7 +503,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); #define SPEED_RESTRICT_COND(cond) if (!(cond)) return -1.0; /* For mpn_copy or similar. */ -#define SPEED_ROUTINE_MPN_COPY_CALL(call) \ +#define SPEED_ROUTINE_MPN_COPY(function) \ { \ mp_ptr wp; \ unsigned i; \ @@ -642,47 +522,13 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); speed_starttime (); \ i = s->reps; \ do \ - call; \ + function (wp, s->xp, s->size); \ while (--i != 0); \ t = speed_endtime (); \ \ TMP_FREE; \ return t; \ } -#define SPEED_ROUTINE_MPN_COPY(function) \ - SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size)) - -#define SPEED_ROUTINE_MPN_TABSELECT(function) \ - { \ - mp_ptr xp, wp; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 0); \ - \ - if (s->r == 0) \ - s->r = s->size; /* default to a quadratic shape */ \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (xp, s->size * s->r, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ - \ - speed_operand_src (s, xp, s->size * s->r); \ - speed_operand_dst (s, wp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (wp, xp, s->size, s->r, (s->r) / 2); \ - while (--i != 0); \ - t = speed_endtime () / s->r; \ - \ - TMP_FREE; \ - return t; \ - } - #define SPEED_ROUTINE_MPN_COPYC(function) \ { \ @@ -712,7 +558,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); } /* s->size is still in limbs, and it's limbs which are copied, but - "function" takes a size in bytes not limbs. */ + "function" takes a size in bytes not limbs. */ #define SPEED_ROUTINE_MPN_COPY_BYTES(function) \ { \ mp_ptr wp; \ @@ -732,7 +578,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); speed_starttime (); \ i = s->reps; \ do \ - function (wp, s->xp, s->size * GMP_LIMB_BYTES); \ + function (wp, s->xp, s->size * BYTES_PER_MP_LIMB); \ while (--i != 0); \ t = speed_endtime (); \ \ @@ -788,72 +634,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } - -/* For mpn_aors_errK_n, where 1 <= K <= 3. */ -#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K) \ - { \ - mp_ptr wp; \ - mp_ptr xp, yp; \ - mp_ptr zp[K]; \ - mp_limb_t ep[2*K]; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ - \ - /* (don't have a mechanism to specify zp alignments) */ \ - for (i = 0; i < K; i++) \ - SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0); \ - \ - xp = s->xp; \ - yp = s->yp; \ - \ - if (s->r == 0) ; \ - else if (s->r == 1) { xp = wp; } \ - else if (s->r == 2) { yp = wp; } \ - else if (s->r == 3) { xp = wp; yp = wp; } \ - else if (s->r == 4) { yp = xp; } \ - else { \ - TMP_FREE; \ - return -1.0; \ - } \ - \ - /* initialize wp if operand overlap */ \ - if (xp == wp || yp == wp) \ - MPN_COPY (wp, s->xp, s->size); \ - \ - speed_operand_src (s, xp, s->size); \ - speed_operand_src (s, yp, s->size); \ - for (i = 0; i < K; i++) \ - speed_operand_src (s, zp[i], s->size); \ - speed_operand_dst (s, wp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - call; \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function) \ - SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1) - -#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function) \ - SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2) - -#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function) \ - SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3) - - /* For mpn_add_n, mpn_sub_n, or similar. */ #define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call) \ { \ @@ -954,26 +734,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); #define SPEED_ROUTINE_MPN_DIVEXACT_1(function) \ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r)) -#define SPEED_ROUTINE_MPN_BDIV_Q_1(function) \ - SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r)) - -#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL(call) \ - { \ - unsigned shift; \ - mp_limb_t dinv; \ - \ - SPEED_RESTRICT_COND (s->size > 0); \ - SPEED_RESTRICT_COND (s->r != 0); \ - \ - count_trailing_zeros (shift, s->r); \ - binvert_limb (dinv, s->r >> shift); \ - \ - SPEED_ROUTINE_MPN_UNARY_1_CALL (call); \ - } -#define SPEED_ROUTINE_MPN_PI1_BDIV_Q_1(function) \ - SPEED_ROUTINE_MPN_PI1_BDIV_Q_1_CALL \ - ((*function) (wp, s->xp, s->size, s->r, dinv, shift)) - #define SPEED_ROUTINE_MPN_BDIV_DBM1C(function) \ SPEED_ROUTINE_MPN_UNARY_1_CALL ((*function) (wp, s->xp, s->size, s->r, 0)) @@ -1069,30 +829,30 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); /* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */ #define SPEED_ROUTINE_MPN_MUL(function) \ { \ - mp_ptr wp; \ + mp_ptr wp, xp; \ mp_size_t size1; \ unsigned i; \ double t; \ TMP_DECL; \ \ size1 = (s->r == 0 ? s->size : s->r); \ - if (size1 < 0) size1 = -size1 - s->size; \ \ - SPEED_RESTRICT_COND (size1 >= 1); \ - SPEED_RESTRICT_COND (s->size >= size1); \ + SPEED_RESTRICT_COND (s->size >= 1); \ + SPEED_RESTRICT_COND (size1 >= s->size); \ \ TMP_MARK; \ SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp); \ + SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \ \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, s->yp, size1); \ + speed_operand_src (s, xp, size1); \ + speed_operand_src (s, s->yp, s->size); \ speed_operand_dst (s, wp, size1 + s->size); \ speed_cache_fill (s); \ \ speed_starttime (); \ i = s->reps; \ do \ - function (wp, s->xp, s->size, s->yp, size1); \ + function (wp, xp, size1, s->yp, s->size); \ while (--i != 0); \ t = speed_endtime (); \ \ @@ -1132,7 +892,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); #define SPEED_ROUTINE_MPN_MUL_N(function) \ SPEED_ROUTINE_MPN_MUL_N_CALL (function (wp, s->xp, s->yp, s->size)); -#define SPEED_ROUTINE_MPN_MULLO_N_CALL(call) \ +#define SPEED_ROUTINE_MPN_MULLOW_N_CALL(call) \ { \ mp_ptr wp; \ unsigned i; \ @@ -1160,11 +920,11 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -#define SPEED_ROUTINE_MPN_MULLO_N(function) \ - SPEED_ROUTINE_MPN_MULLO_N_CALL (function (wp, s->xp, s->yp, s->size)); +#define SPEED_ROUTINE_MPN_MULLOW_N(function) \ + SPEED_ROUTINE_MPN_MULLOW_N_CALL (function (wp, s->xp, s->yp, s->size)); /* For mpn_mul_basecase, xsize=r, ysize=s->size. */ -#define SPEED_ROUTINE_MPN_MULLO_BASECASE(function) \ +#define SPEED_ROUTINE_MPN_MULLOW_BASECASE(function) \ { \ mp_ptr wp; \ unsigned i; \ @@ -1192,172 +952,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */ -#define SPEED_ROUTINE_MPN_MULMID(function) \ - { \ - mp_ptr wp, xp; \ - mp_size_t size1; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - size1 = (s->r == 0 ? (2 * s->size - 1) : s->r); \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - SPEED_RESTRICT_COND (size1 >= s->size); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \ - \ - speed_operand_src (s, xp, size1); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, wp, size1 - s->size + 3); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (wp, xp, size1, s->yp, s->size); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_MULMID_N(function) \ - { \ - mp_ptr wp, xp; \ - mp_size_t size1; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - size1 = 2 * s->size - 1; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \ - \ - speed_operand_src (s, xp, size1); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, wp, size1 - s->size + 3); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (wp, xp, s->yp, s->size); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function) \ - { \ - mp_ptr wp, xp, scratch; \ - mp_size_t size1, scratch_size; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - size1 = 2 * s->size - 1; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \ - scratch_size = mpn_toom42_mulmid_itch (s->size); \ - SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0); \ - \ - speed_operand_src (s, xp, size1); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, wp, size1 - s->size + 3); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (wp, xp, s->yp, s->size, scratch); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call) \ - { \ - mp_ptr wp, tp; \ - unsigned i; \ - double t; \ - mp_size_t itch; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - itch = mpn_mulmod_bnm1_itch (s->size, s->size, s->size); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, wp, 2 * s->size); \ - speed_operand_dst (s, tp, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - call; \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_MULMOD_BNM1_ROUNDED(function) \ - { \ - mp_ptr wp, tp; \ - unsigned i; \ - double t; \ - mp_size_t size, itch; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - size = mpn_mulmod_bnm1_next_size (s->size); \ - itch = mpn_mulmod_bnm1_itch (size, size, size); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itch, s->align_wp2); \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, wp, size); \ - speed_operand_dst (s, tp, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (wp, size, s->xp, s->size, s->yp, s->size, tp); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - #define SPEED_ROUTINE_MPN_MUL_N_TSPACE(call, tsize, minsize) \ { \ mp_ptr wp, tspace; \ @@ -1388,104 +982,17 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -#define SPEED_ROUTINE_MPN_TOOM22_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom22_mul_itch (s->size, s->size), \ - MPN_TOOM22_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM33_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom33_mul_itch (s->size, s->size), \ - MPN_TOOM33_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM44_MUL_N(function) \ +#define SPEED_ROUTINE_MPN_KARA_MUL_N(function) \ SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom44_mul_itch (s->size, s->size), \ - MPN_TOOM44_MUL_MINSIZE) + (function (wp, s->xp, s->xp, s->size, tspace), \ + MPN_KARA_MUL_N_TSIZE (s->size), \ + MPN_KARA_MUL_N_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM6H_MUL_N(function) \ +#define SPEED_ROUTINE_MPN_TOOM3_MUL_N(function) \ SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom6h_mul_itch (s->size, s->size), \ - MPN_TOOM6H_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM8H_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size, tspace), \ - mpn_toom8h_mul_itch (s->size, s->size), \ - MPN_TOOM8H_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM32_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 2*s->size/3, tspace), \ - mpn_toom32_mul_itch (s->size, 2*s->size/3), \ - MPN_TOOM32_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM42_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \ - mpn_toom42_mul_itch (s->size, s->size/2), \ - MPN_TOOM42_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM43_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size*3/4, tspace), \ - mpn_toom43_mul_itch (s->size, s->size*3/4), \ - MPN_TOOM43_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM63_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, s->size/2, tspace), \ - mpn_toom63_mul_itch (s->size, s->size/2), \ - MPN_TOOM63_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM43_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \ - mpn_toom32_mul_itch (s->size, 17*s->size/24), \ - MPN_TOOM32_MUL_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM32_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 17*s->size/24, tspace), \ - mpn_toom43_mul_itch (s->size, 17*s->size/24), \ - MPN_TOOM43_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM32_FOR_TOOM53_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \ - mpn_toom32_mul_itch (s->size, 19*s->size/30), \ - MPN_TOOM32_MUL_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM32_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 19*s->size/30, tspace), \ - mpn_toom53_mul_itch (s->size, 19*s->size/30), \ - MPN_TOOM53_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM42_FOR_TOOM53_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \ - mpn_toom42_mul_itch (s->size, 11*s->size/20), \ - MPN_TOOM42_MUL_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 11*s->size/20, tspace), \ - mpn_toom53_mul_itch (s->size, 11*s->size/20), \ - MPN_TOOM53_MUL_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \ - mpn_toom42_mul_itch (s->size, 5*s->size/6), \ - MPN_TOOM54_MUL_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function) \ - SPEED_ROUTINE_MPN_MUL_N_TSPACE \ - (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \ - mpn_toom54_mul_itch (s->size, 5*s->size/6), \ - MPN_TOOM54_MUL_MINSIZE) - + (function (wp, s->xp, s->yp, s->size, tspace), \ + MPN_TOOM3_MUL_N_TSIZE (s->size), \ + MPN_TOOM3_MUL_N_MINSIZE) #define SPEED_ROUTINE_MPN_SQR_CALL(call) \ @@ -1518,34 +1025,9 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); #define SPEED_ROUTINE_MPN_SQR(function) \ SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size)) -#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call) \ - { \ - mp_ptr wp, tp; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, tp, 2 * s->size); \ - speed_operand_dst (s, wp, 2 * s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - call; \ - while (--i != 0); \ - t = speed_endtime () / 2; \ - \ - TMP_FREE; \ - return t; \ - } +#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function) \ + SPEED_ROUTINE_MPN_SQR (function) + #define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize) \ { \ @@ -1576,32 +1058,17 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -#define SPEED_ROUTINE_MPN_TOOM2_SQR(function) \ +#define SPEED_ROUTINE_MPN_KARA_SQR_N(function) \ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \ - mpn_toom2_sqr_itch (s->size), \ - MPN_TOOM2_SQR_MINSIZE) + MPN_KARA_SQR_N_TSIZE (s->size), \ + MPN_KARA_SQR_N_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM3_SQR(function) \ +#define SPEED_ROUTINE_MPN_TOOM3_SQR_N(function) \ SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \ - mpn_toom3_sqr_itch (s->size), \ - MPN_TOOM3_SQR_MINSIZE) + MPN_TOOM3_SQR_N_TSIZE (s->size), \ + MPN_TOOM3_SQR_N_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM4_SQR(function) \ - SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \ - mpn_toom4_sqr_itch (s->size), \ - MPN_TOOM4_SQR_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM6_SQR(function) \ - SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \ - mpn_toom6_sqr_itch (s->size), \ - MPN_TOOM6_SQR_MINSIZE) - -#define SPEED_ROUTINE_MPN_TOOM8_SQR(function) \ - SPEED_ROUTINE_MPN_SQR_TSPACE (function (wp, s->xp, s->size, tspace), \ - mpn_toom8_sqr_itch (s->size), \ - MPN_TOOM8_SQR_MINSIZE) - #define SPEED_ROUTINE_MPN_MOD_CALL(call) \ { \ unsigned i; \ @@ -1656,47 +1123,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return speed_endtime (); \ } -#define SPEED_ROUTINE_MPN_MOD_1_1(function,pfunc) \ - { \ - unsigned i; \ - mp_limb_t inv[4]; \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - \ - mpn_mod_1_1p_cps (inv, s->r); \ - speed_operand_src (s, s->xp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - pfunc (inv, s->r); \ - function (s->xp, s->size, s->r << inv[1], inv); \ - } while (--i != 0); \ - \ - return speed_endtime (); \ - } -#define SPEED_ROUTINE_MPN_MOD_1_N(function,pfunc,N) \ - { \ - unsigned i; \ - mp_limb_t inv[N+3]; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - SPEED_RESTRICT_COND (s->r <= ~(mp_limb_t)0 / N); \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - pfunc (inv, s->r); \ - function (s->xp, s->size, s->r, inv); \ - } while (--i != 0); \ - \ - return speed_endtime (); \ - } - /* A division of 2*s->size by s->size limbs */ @@ -1705,7 +1131,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); unsigned i; \ mp_ptr a, d, q, r; \ double t; \ - gmp_pi1_t dinv; \ TMP_DECL; \ \ SPEED_RESTRICT_COND (s->size >= 1); \ @@ -1725,8 +1150,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); d[s->size-1] |= GMP_NUMB_HIGHBIT; \ a[2*s->size-1] = d[s->size-1] - 1; \ \ - invert_pi1 (dinv, d[s->size-1], d[s->size-2]); \ - \ speed_operand_src (s, a, 2*s->size); \ speed_operand_src (s, d, s->size); \ speed_operand_dst (s, q, s->size+1); \ @@ -1744,522 +1167,52 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } +#define SPEED_ROUTINE_MPN_DC_DIVREM_N(function) \ + SPEED_ROUTINE_MPN_DC_DIVREM_CALL((*function) (q, a, d, s->size)) -/* A remainder 2*s->size by s->size limbs */ +#define SPEED_ROUTINE_MPN_DC_DIVREM_SB(function) \ + SPEED_ROUTINE_MPN_DC_DIVREM_CALL \ + ((*function) (q, a, 2*s->size, d, s->size)) -#define SPEED_ROUTINE_MPZ_MOD(function) \ - { \ - unsigned i; \ - mpz_t a, d, r; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - mpz_init_set_n (d, s->yp, s->size); \ - \ - /* high part less than d, low part a duplicate copied in */ \ - mpz_init_set_n (a, s->xp, s->size); \ - mpz_mod (a, a, d); \ - mpz_mul_2exp (a, a, GMP_LIMB_BITS * s->size); \ - MPN_COPY (PTR(a), s->xp, s->size); \ - \ - mpz_init (r); \ - \ - speed_operand_src (s, PTR(a), SIZ(a)); \ - speed_operand_src (s, PTR(d), SIZ(d)); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (r, a, d); \ - while (--i != 0); \ - return speed_endtime (); \ - } +#define SPEED_ROUTINE_MPN_DC_TDIV_QR(function) \ + SPEED_ROUTINE_MPN_DC_DIVREM_CALL \ + ((*function) (q, r, 0, a, 2*s->size, d, s->size)) -#define SPEED_ROUTINE_MPN_PI1_DIV(function, INV, DMIN, QMIN) \ - { \ - unsigned i; \ - mp_ptr dp, tp, ap, qp; \ - gmp_pi1_t inv; \ - double t; \ - mp_size_t size1; \ - TMP_DECL; \ - \ - size1 = (s->r == 0 ? 2 * s->size : s->r); \ - \ - SPEED_RESTRICT_COND (s->size >= DMIN); \ - SPEED_RESTRICT_COND (size1 - s->size >= QMIN); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ap, size1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_wp2); \ - \ - /* we don't fill in dividend completely when size1 > s->size */ \ - MPN_COPY (ap, s->xp, s->size); \ - MPN_COPY (ap + size1 - s->size, s->xp, s->size); \ - \ - MPN_COPY (dp, s->yp, s->size); \ - \ - /* normalize the data */ \ - dp[s->size-1] |= GMP_NUMB_HIGHBIT; \ - ap[size1 - 1] = dp[s->size - 1] - 1; \ - \ - invert_pi1 (inv, dp[s->size-1], dp[s->size-2]); \ - \ - speed_operand_src (s, ap, size1); \ - speed_operand_dst (s, tp, size1); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, qp, size1 - s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - MPN_COPY (tp, ap, size1); \ - function (qp, tp, size1, dp, s->size, INV); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_MU_DIV_Q(function,itchfn) \ - { \ - unsigned i; \ - mp_ptr dp, tp, qp, scratch; \ - double t; \ - mp_size_t itch; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - \ - itch = itchfn (2 * s->size, s->size, 0); \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \ - \ - MPN_COPY (tp, s->xp, s->size); \ - MPN_COPY (tp+s->size, s->xp, s->size); \ - \ - /* normalize the data */ \ - dp[s->size-1] |= GMP_NUMB_HIGHBIT; \ - tp[2*s->size-1] = dp[s->size-1] - 1; \ - \ - speed_operand_dst (s, qp, s->size); \ - speed_operand_src (s, tp, 2 * s->size); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, scratch, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - function (qp, tp, 2 * s->size, dp, s->size, scratch); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_MU_DIV_QR(function,itchfn) \ - { \ - unsigned i; \ - mp_ptr dp, tp, qp, rp, scratch; \ - double t; \ - mp_size_t size1, itch; \ - TMP_DECL; \ - \ - size1 = (s->r == 0 ? 2 * s->size : s->r); \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - SPEED_RESTRICT_COND (size1 >= s->size); \ - \ - itch = itchfn (size1, s->size, 0); \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \ - SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \ - \ - /* we don't fill in dividend completely when size1 > s->size */ \ - MPN_COPY (tp, s->xp, s->size); \ - MPN_COPY (tp + size1 - s->size, s->xp, s->size); \ - \ - MPN_COPY (dp, s->yp, s->size); \ - \ - /* normalize the data */ \ - dp[s->size-1] |= GMP_NUMB_HIGHBIT; \ - tp[size1 - 1] = dp[s->size - 1] - 1; \ - \ - speed_operand_dst (s, qp, size1 - s->size); \ - speed_operand_dst (s, rp, s->size); \ - speed_operand_src (s, tp, size1); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, scratch, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - function (qp, rp, tp, size1, dp, s->size, scratch); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn) \ - { \ - unsigned i; \ - mp_ptr dp, tp, qp, rp, ip, scratch, tmp; \ - double t; \ - mp_size_t size1, itch; \ - TMP_DECL; \ - \ - size1 = (s->r == 0 ? 2 * s->size : s->r); \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - SPEED_RESTRICT_COND (size1 >= s->size); \ - \ - itch = itchfn (size1, s->size, s->size); \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, size1 - s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, size1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \ - SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \ - SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_wp2); /* alignment? */ \ - \ - /* we don't fill in dividend completely when size1 > s->size */ \ - MPN_COPY (tp, s->xp, s->size); \ - MPN_COPY (tp + size1 - s->size, s->xp, s->size); \ - \ - MPN_COPY (dp, s->yp, s->size); \ - \ - /* normalize the data */ \ - dp[s->size-1] |= GMP_NUMB_HIGHBIT; \ - tp[size1 - 1] = dp[s->size-1] - 1; \ - \ - tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size)); \ - mpn_invert (ip, dp, s->size, tmp); \ - \ - speed_operand_dst (s, qp, size1 - s->size); \ - speed_operand_dst (s, rp, s->size); \ - speed_operand_src (s, tp, size1); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_src (s, ip, s->size); \ - speed_operand_dst (s, scratch, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - function (qp, rp, tp, size1, dp, s->size, ip, s->size, scratch); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_PI1_BDIV_QR(function) \ - { \ - unsigned i; \ - mp_ptr dp, tp, ap, qp; \ - mp_limb_t inv; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size, s->align_wp2); \ - \ - MPN_COPY (ap, s->xp, s->size); \ - MPN_COPY (ap+s->size, s->xp, s->size); \ - \ - /* divisor must be odd */ \ - MPN_COPY (dp, s->yp, s->size); \ - dp[0] |= 1; \ - binvert_limb (inv, dp[0]); \ - inv = -inv; \ - \ - speed_operand_src (s, ap, 2*s->size); \ - speed_operand_dst (s, tp, 2*s->size); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, qp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - MPN_COPY (tp, ap, 2*s->size); \ - function (qp, tp, 2*s->size, dp, s->size, inv); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_PI1_BDIV_Q(function) \ - { \ - unsigned i; \ - mp_ptr dp, tp, qp; \ - mp_limb_t inv; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, s->size, s->align_wp2); \ - \ - /* divisor must be odd */ \ - MPN_COPY (dp, s->yp, s->size); \ - dp[0] |= 1; \ - binvert_limb (inv, dp[0]); \ - inv = -inv; \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_dst (s, tp, s->size); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, qp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - MPN_COPY (tp, s->xp, s->size); \ - function (qp, tp, s->size, dp, s->size, inv); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_MU_BDIV_Q(function,itchfn) \ - { \ - unsigned i; \ - mp_ptr dp, qp, scratch; \ - double t; \ - mp_size_t itch; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - \ - itch = itchfn (s->size, s->size); \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \ - \ - /* divisor must be odd */ \ - MPN_COPY (dp, s->yp, s->size); \ - dp[0] |= 1; \ - \ - speed_operand_dst (s, qp, s->size); \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, scratch, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - function (qp, s->xp, s->size, dp, s->size, scratch); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_MPN_MU_BDIV_QR(function,itchfn) \ +/* A division of s->size by 3 limbs */ + +#define SPEED_ROUTINE_MPN_SB_DIVREM_M3(function) \ { \ unsigned i; \ - mp_ptr dp, tp, qp, rp, scratch; \ + mp_ptr a, d, q; \ + mp_size_t qsize; \ double t; \ - mp_size_t itch; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - \ - itch = itchfn (2 * s->size, s->size); \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (dp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (qp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (scratch, itch, s->align_wp2); \ - SPEED_TMP_ALLOC_LIMBS (rp, s->size, s->align_wp2); /* alignment? */ \ - \ - MPN_COPY (tp, s->xp, s->size); \ - MPN_COPY (tp+s->size, s->xp, s->size); \ - \ - /* divisor must be odd */ \ - MPN_COPY (dp, s->yp, s->size); \ - dp[0] |= 1; \ - \ - speed_operand_dst (s, qp, s->size); \ - speed_operand_dst (s, rp, s->size); \ - speed_operand_src (s, tp, 2 * s->size); \ - speed_operand_src (s, dp, s->size); \ - speed_operand_dst (s, scratch, itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - function (qp, rp, tp, 2 * s->size, dp, s->size, scratch); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_BROOT(function) \ - { \ - SPEED_RESTRICT_COND (s->r & 1); \ - s->xp[0] |= 1; \ - SPEED_ROUTINE_MPN_UNARY_1_CALL \ - ((*function) (wp, s->xp, s->size, s->r)); \ - } - -#define SPEED_ROUTINE_MPN_BROOTINV(function, itch) \ - { \ - mp_ptr wp, tp; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - TMP_MARK; \ - SPEED_RESTRICT_COND (s->size >= 1); \ - SPEED_RESTRICT_COND (s->r & 1); \ - wp = TMP_ALLOC_LIMBS (s->size); \ - tp = TMP_ALLOC_LIMBS ( (itch)); \ - s->xp[0] |= 1; \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_dst (s, wp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - (*function) (wp, s->xp, s->size, s->r, tp); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_INVERT(function,itchfn) \ - { \ - long i; \ - mp_ptr up, tp, ip; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \ - \ - MPN_COPY (up, s->xp, s->size); \ - \ - /* normalize the data */ \ - up[s->size-1] |= GMP_NUMB_HIGHBIT; \ - \ - speed_operand_src (s, up, s->size); \ - speed_operand_dst (s, tp, s->size); \ - speed_operand_dst (s, ip, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (ip, up, s->size, tp); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_INVERTAPPR(function,itchfn) \ - { \ - long i; \ - mp_ptr up, tp, ip; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \ - \ - MPN_COPY (up, s->xp, s->size); \ - \ - /* normalize the data */ \ - up[s->size-1] |= GMP_NUMB_HIGHBIT; \ - \ - speed_operand_src (s, up, s->size); \ - speed_operand_dst (s, tp, s->size); \ - speed_operand_dst (s, ip, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (ip, up, s->size, tp); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_NI_INVERTAPPR(function,itchfn) \ - { \ - long i; \ - mp_ptr up, tp, ip; \ - double t; \ TMP_DECL; \ \ SPEED_RESTRICT_COND (s->size >= 3); \ \ TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \ + SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp); \ \ - MPN_COPY (up, s->xp, s->size); \ + SPEED_TMP_ALLOC_LIMBS (d, 3, s->align_yp); \ + MPN_COPY (d, s->yp, 3); \ + d[2] |= GMP_NUMB_HIGHBIT; \ \ - /* normalize the data */ \ - up[s->size-1] |= GMP_NUMB_HIGHBIT; \ + qsize = s->size - 3; \ + SPEED_TMP_ALLOC_LIMBS (q, qsize, s->align_wp); \ \ - speed_operand_src (s, up, s->size); \ - speed_operand_dst (s, tp, s->size); \ - speed_operand_dst (s, ip, s->size); \ + speed_operand_dst (s, a, s->size); \ + speed_operand_src (s, d, 3); \ + speed_operand_dst (s, q, qsize); \ speed_cache_fill (s); \ \ speed_starttime (); \ i = s->reps; \ do \ - function (ip, up, s->size, tp); \ + { \ + MPN_COPY (a, s->xp, s->size); \ + function (q, a, s->size, d, 3); \ + } \ while (--i != 0); \ t = speed_endtime (); \ \ @@ -2267,83 +1220,43 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -#define SPEED_ROUTINE_MPN_BINVERT(function,itchfn) \ +/* A remainder 2*s->size by s->size limbs */ + +#define SPEED_ROUTINE_MPZ_MOD(function) \ { \ - long i; \ - mp_ptr up, tp, ip; \ - double t; \ - TMP_DECL; \ + unsigned i; \ + mpz_t a, d, r; \ \ SPEED_RESTRICT_COND (s->size >= 1); \ \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \ + mpz_init_set_n (d, s->yp, s->size); \ \ - MPN_COPY (up, s->xp, s->size); \ + /* high part less than d, low part a duplicate copied in */ \ + mpz_init_set_n (a, s->xp, s->size); \ + mpz_mod (a, a, d); \ + mpz_mul_2exp (a, a, BITS_PER_MP_LIMB * s->size); \ + MPN_COPY (PTR(a), s->xp, s->size); \ \ - /* normalize the data */ \ - up[0] |= 1; \ + mpz_init (r); \ \ - speed_operand_src (s, up, s->size); \ - speed_operand_dst (s, tp, s->size); \ - speed_operand_dst (s, ip, s->size); \ + speed_operand_src (s, PTR(a), SIZ(a)); \ + speed_operand_src (s, PTR(d), SIZ(d)); \ speed_cache_fill (s); \ \ speed_starttime (); \ i = s->reps; \ do \ - function (ip, up, s->size, tp); \ + function (r, a, d); \ while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ + return speed_endtime (); \ } -#define SPEED_ROUTINE_MPN_SEC_INVERT(function,itchfn) \ - { \ - long i; \ - mp_ptr up, mp, tp, ip; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ip, s->size, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (up, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (tp, itchfn (s->size), s->align_wp); \ - \ - speed_operand_src (s, up, s->size); \ - speed_operand_dst (s, tp, s->size); \ - speed_operand_dst (s, ip, s->size); \ - speed_cache_fill (s); \ - \ - MPN_COPY (mp, s->yp, s->size); \ - /* Must be odd */ \ - mp[0] |= 1; \ - speed_starttime (); \ - i = s->reps; \ - do \ - { \ - MPN_COPY (up, s->xp, s->size); \ - function (ip, up, mp, s->size, 2*s->size*GMP_NUMB_BITS, tp); \ - } \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } #define SPEED_ROUTINE_REDC_1(function) \ { \ unsigned i; \ mp_ptr cp, mp, tp, ap; \ - mp_limb_t inv; \ + mp_limb_t Nprim; \ double t; \ TMP_DECL; \ \ @@ -2361,8 +1274,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); /* modulus must be odd */ \ MPN_COPY (mp, s->yp, s->size); \ mp[0] |= 1; \ - binvert_limb (inv, mp[0]); \ - inv = -inv; \ + binvert_limb (Nprim, mp[0]); \ \ speed_operand_src (s, ap, 2*s->size+1); \ speed_operand_dst (s, tp, 2*s->size+1); \ @@ -2374,90 +1286,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); i = s->reps; \ do { \ MPN_COPY (tp, ap, 2*s->size); \ - function (cp, tp, mp, s->size, inv); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_REDC_2(function) \ - { \ - unsigned i; \ - mp_ptr cp, mp, tp, ap; \ - mp_limb_t invp[2]; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \ - \ - MPN_COPY (ap, s->xp, s->size); \ - MPN_COPY (ap+s->size, s->xp, s->size); \ - \ - /* modulus must be odd */ \ - MPN_COPY (mp, s->yp, s->size); \ - mp[0] |= 1; \ - mpn_binvert (invp, mp, 2, tp); \ - invp[0] = -invp[0]; invp[1] = ~invp[1]; \ - \ - speed_operand_src (s, ap, 2*s->size+1); \ - speed_operand_dst (s, tp, 2*s->size+1); \ - speed_operand_src (s, mp, s->size); \ - speed_operand_dst (s, cp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - MPN_COPY (tp, ap, 2*s->size); \ - function (cp, tp, mp, s->size, invp); \ - } while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } -#define SPEED_ROUTINE_REDC_N(function) \ - { \ - unsigned i; \ - mp_ptr cp, mp, tp, ap, invp; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size > 8); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (ap, 2*s->size+1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (mp, s->size, s->align_yp); \ - SPEED_TMP_ALLOC_LIMBS (cp, s->size, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (tp, 2*s->size+1, s->align_wp2); \ - SPEED_TMP_ALLOC_LIMBS (invp, s->size, s->align_wp2); /* align? */ \ - \ - MPN_COPY (ap, s->xp, s->size); \ - MPN_COPY (ap+s->size, s->xp, s->size); \ - \ - /* modulus must be odd */ \ - MPN_COPY (mp, s->yp, s->size); \ - mp[0] |= 1; \ - mpn_binvert (invp, mp, s->size, tp); \ - \ - speed_operand_src (s, ap, 2*s->size+1); \ - speed_operand_dst (s, tp, 2*s->size+1); \ - speed_operand_src (s, mp, s->size); \ - speed_operand_dst (s, cp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do { \ - MPN_COPY (tp, ap, 2*s->size); \ - function (cp, tp, mp, s->size, invp); \ + function (cp, tp, mp, s->size, Nprim); \ } while (--i != 0); \ t = speed_endtime (); \ \ @@ -2805,107 +1634,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); function (px[j-1], py[j-1], 0)) -#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \ - { \ - mp_size_t hgcd_init_itch, hgcd_itch; \ - mp_ptr ap, bp, wp, tmp1; \ - struct hgcd_matrix hgcd; \ - int res; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - if (s->size < 2) \ - return -1; \ - \ - TMP_MARK; \ - \ - SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \ - \ - s->xp[s->size - 1] |= 1; \ - s->yp[s->size - 1] |= 1; \ - \ - hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \ - hgcd_itch = itchfunc (s->size); \ - \ - SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, ap, s->size + 1); \ - speed_operand_dst (s, bp, s->size + 1); \ - speed_operand_dst (s, wp, hgcd_itch); \ - speed_operand_dst (s, tmp1, hgcd_init_itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - { \ - MPN_COPY (ap, s->xp, s->size); \ - MPN_COPY (bp, s->yp, s->size); \ - mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \ - res = func (ap, bp, s->size, &hgcd, wp); \ - } \ - while (--i != 0); \ - t = speed_endtime (); \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \ - { \ - mp_size_t hgcd_init_itch, hgcd_step_itch; \ - mp_ptr ap, bp, wp, tmp1; \ - struct hgcd_matrix hgcd; \ - mp_size_t p = s->size/2; \ - int res; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - if (s->size < 2) \ - return -1; \ - \ - TMP_MARK; \ - \ - SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \ - SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \ - \ - s->xp[s->size - 1] |= 1; \ - s->yp[s->size - 1] |= 1; \ - \ - hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \ - hgcd_step_itch = itchfunc (s->size, p); \ - \ - SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \ - SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \ - \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, s->yp, s->size); \ - speed_operand_dst (s, ap, s->size + 1); \ - speed_operand_dst (s, bp, s->size + 1); \ - speed_operand_dst (s, wp, hgcd_step_itch); \ - speed_operand_dst (s, tmp1, hgcd_init_itch); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - { \ - MPN_COPY (ap, s->xp, s->size); \ - MPN_COPY (bp, s->yp, s->size); \ - mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \ - res = func (&hgcd, ap, bp, s->size, p, wp); \ - } \ - while (--i != 0); \ - t = speed_endtime (); \ - TMP_FREE; \ - return t; \ - } - /* Run some GCDs of s->size limbs each. The number of different data values is decreased as s->size**2, since GCD is a quadratic algorithm. SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT @@ -3169,111 +1897,6 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); return t; \ } -#define SPEED_ROUTINE_MPN_DIV_QR_1(function) \ - { \ - mp_ptr wp, xp; \ - mp_limb_t d; \ - mp_limb_t r; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ - \ - d = s->r; \ - if (d == 0) \ - d = 1; \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_dst (s, wp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - r = function (wp, wp+s->size-1, s->xp, s->size, d); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_DIV_QR_1N_PI1(function) \ - { \ - mp_ptr wp, xp; \ - mp_limb_t d, dinv; \ - mp_limb_t r; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 1); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ - \ - d = s->r; \ - /* divisor must be normalized */ \ - SPEED_RESTRICT_COND (d & GMP_NUMB_HIGHBIT); \ - invert_limb (dinv, d); \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_dst (s, wp, s->size); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - r = function (wp, s->xp, s->size, 0, d, dinv); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } - -#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm) \ - { \ - mp_ptr wp, xp; \ - mp_limb_t yp[2]; \ - mp_limb_t rp[2]; \ - unsigned i; \ - double t; \ - TMP_DECL; \ - \ - SPEED_RESTRICT_COND (s->size >= 2); \ - \ - TMP_MARK; \ - SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ - \ - /* divisor must be normalized */ \ - MPN_COPY (yp, s->yp_block, 2); \ - if (norm) \ - yp[1] |= GMP_NUMB_HIGHBIT; \ - else \ - { \ - yp[1] &= ~GMP_NUMB_HIGHBIT; \ - if (yp[1] == 0) \ - yp[1] = 1; \ - } \ - speed_operand_src (s, s->xp, s->size); \ - speed_operand_src (s, yp, 2); \ - speed_operand_dst (s, wp, s->size); \ - speed_operand_dst (s, rp, 2); \ - speed_cache_fill (s); \ - \ - speed_starttime (); \ - i = s->reps; \ - do \ - function (wp, rp, s->xp, s->size, yp); \ - while (--i != 0); \ - t = speed_endtime (); \ - \ - TMP_FREE; \ - return t; \ - } #define SPEED_ROUTINE_MODLIMB_INVERT(function) \ { \ @@ -3402,7 +2025,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); \ speed_operand_src (s, s->xp, s->size); \ speed_operand_dst (s, xp, s->size); \ - speed_operand_dst (s, (mp_ptr) wp, wn/GMP_LIMB_BYTES); \ + speed_operand_dst (s, (mp_ptr) wp, wn/BYTES_PER_MP_LIMB); \ speed_cache_fill (s); \ \ speed_starttime (); \ @@ -3442,7 +2065,8 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); for (i = 0; i < s->size; i++) \ xp[i] = s->xp[i] % base; \ \ - LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base); \ + wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly)) \ + / BITS_PER_MP_LIMB + 2; \ SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \ \ /* use this during development to check wn is big enough */ \ @@ -3450,7 +2074,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); ASSERT_ALWAYS (mpn_set_str (wp, xp, s->size, base) <= wn); \ */ \ \ - speed_operand_src (s, (mp_ptr) xp, s->size/GMP_LIMB_BYTES); \ + speed_operand_src (s, (mp_ptr) xp, s->size/BYTES_PER_MP_LIMB); \ speed_operand_dst (s, wp, wn); \ speed_cache_fill (s); \ \ @@ -3466,7 +2090,7 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); } -/* Run an accel gcd find_a() function over various data values. A set of +/* Run an accel gcd find_a() function over various data values. A set of values is used in case some run particularly fast or slow. The size parameter is ignored, the amount of data tested is fixed. */ @@ -3602,6 +2226,9 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); } +#endif + + #define SPEED_ROUTINE_MPN_BACK_TO_BACK(function) \ { \ unsigned i; \ @@ -3641,6 +2268,3 @@ int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int); #define SPEED_ROUTINE_MPN_ZERO(function) \ SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size)) - - -#endif diff --git a/gmp/tune/time.c b/gmp/tune/time.c index 0178b345af..865a92c758 100644 --- a/gmp/tune/time.c +++ b/gmp/tune/time.c @@ -1,32 +1,21 @@ -/* Time routines for speed measurements. +/* Time routines for speed measurments. -Copyright 1999-2004, 2010-2012 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ /* Usage: @@ -266,7 +255,7 @@ static const int use_stck = 1; /* always use when available */ typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */ #define STCK(timestamp) \ do { \ - asm ("stck %0" : "=Q" (timestamp)); \ + asm ("stck %0" : "=m" (timestamp)); \ } while (0) #else static const int have_stck = 0; @@ -467,22 +456,9 @@ cycles_works_p (void) if (result != -1) goto done; - /* FIXME: On linux, the cycle counter is not saved and restored over - * context switches, making it almost useless for precise cputime - * measurements. When available, it's better to use clock_gettime, - * which seems to have reasonable accuracy (tested on x86_32, - * linux-2.6.26, glibc-2.7). However, there are also some linux - * systems where clock_gettime is broken in one way or the other, - * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or - * kind-of implemented but broken (needs code to detect that), and - * on those systems a wall-clock cycle counter is the least bad - * fallback. - * - * So we need some code to disable the cycle counter on some but not - * all linux systems. */ #ifdef SIGILL { - RETSIGTYPE (*old_handler) (int); + RETSIGTYPE (*old_handler) __GMP_PROTO ((int)); unsigned cycles[2]; old_handler = signal (SIGILL, cycles_works_handler); @@ -695,8 +671,8 @@ getrusage_backwards_p (void) if (speed_option_verbose) printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n", i, - (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec, - (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec); + prev.ru_utime.tv_sec, prev.ru_utime.tv_usec, + next.ru_utime.tv_sec, next.ru_utime.tv_usec); result = 1; break; } @@ -733,8 +709,6 @@ const int have_cgt_id = 0; # define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1) #endif -#define CGT_DELAY_COUNT 1000 - int cgt_works_p (void) { @@ -776,44 +750,6 @@ cgt_works_p (void) cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9; printf ("clock_gettime is %s accurate\n", unittime_string (cgt_unittime)); - - if (cgt_unittime < 10e-9) - { - /* Do we believe this? */ - struct timespec start, end; - static volatile int counter; - double duration; - if (clock_gettime (CGT_ID, &start)) - { - if (speed_option_verbose) - printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); - result = 0; - return result; - } - /* Loop of at least 1000 memory accesses, ought to take at - least 100 ns*/ - for (counter = 0; counter < CGT_DELAY_COUNT; counter++) - ; - if (clock_gettime (CGT_ID, &end)) - { - if (speed_option_verbose) - printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno)); - result = 0; - return result; - } - duration = (end.tv_sec + end.tv_nsec * 1e-9 - - start.tv_sec - start.tv_nsec * 1e-9); - if (speed_option_verbose) - printf ("delay loop of %d rounds took %s (according to clock_gettime)\n", - CGT_DELAY_COUNT, unittime_string (duration)); - if (duration < 100e-9) - { - if (speed_option_verbose) - printf ("clock_gettime id=%d not believable\n", CGT_ID); - result = 0; - return result; - } - } result = 1; return result; } @@ -843,7 +779,7 @@ int mftb_works_p (void) { unsigned a[2]; - RETSIGTYPE (*old_handler) (int); + RETSIGTYPE (*old_handler) __GMP_PROTO ((int)); double cycletime; /* suppress a warning about a[] unused */ @@ -1005,7 +941,7 @@ speed_time_init (void) speed_cycletime_init (); - if (!speed_option_cycles_broken && have_cycles && cycles_works_p ()) + if (have_cycles && cycles_works_p ()) { use_cycles = 1; DEFAULT (speed_cycletime, 1.0); @@ -1136,7 +1072,7 @@ speed_time_init (void) use_cgt = 1; speed_unittime = cgt_unittime; DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000)); - strcpy (speed_time_string, "microsecond accurate clock_gettime()"); + strcpy (speed_time_string, "microsecond accurate getrusage()"); } else if (have_times && clk_tck() > 1000000) { @@ -1353,7 +1289,7 @@ speed_mftb_diff (const unsigned end[2], const unsigned start[2]) psecs might overflow. 2^32 microseconds is only a bit over an hour, or 2^32 nanoseconds only about 4 seconds. - The casts to "long" are for the benefit of timebasestruct_t, where the + The casts to "long" are for the beneifit of timebasestruct_t, where the fields are only "unsigned int", but we want a signed difference. */ #define DIFF_SECS_ROUTINE(sec, psec, punit) \ diff --git a/gmp/tune/tune-gcd-p.c b/gmp/tune/tune-gcd-p.c deleted file mode 100644 index 4d52f5610c..0000000000 --- a/gmp/tune/tune-gcd-p.c +++ /dev/null @@ -1,225 +0,0 @@ -/* tune-gcd-p - - Tune the choice for splitting p in divide-and-conquer gcd. - -Copyright 2008, 2010, 2011 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ - -#define TUNE_GCD_P 1 - -#include "../mpn/gcd.c" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <time.h> - -#include "speed.h" - -/* Search for minimum over a range. FIXME: Implement golden-section / - fibonacci search*/ -static int -search (double *minp, double (*f)(void *, int), void *ctx, int start, int end) -{ - int x[4]; - double y[4]; - - int best_i; - - x[0] = start; - x[3] = end; - - y[0] = f(ctx, x[0]); - y[3] = f(ctx, x[3]); - - for (;;) - { - int i; - int length = x[3] - x[0]; - - x[1] = x[0] + length/3; - x[2] = x[0] + 2*length/3; - - y[1] = f(ctx, x[1]); - y[2] = f(ctx, x[2]); - -#if 0 - printf("%d: %f, %d: %f, %d:, %f %d: %f\n", - x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3]); -#endif - for (best_i = 0, i = 1; i < 4; i++) - if (y[i] < y[best_i]) - best_i = i; - - if (length <= 4) - break; - - if (best_i >= 2) - { - x[0] = x[1]; - y[0] = y[1]; - } - else - { - x[3] = x[2]; - y[3] = y[2]; - } - } - *minp = y[best_i]; - return x[best_i]; -} - -static int -compare_double(const void *ap, const void *bp) -{ - double a = * (const double *) ap; - double b = * (const double *) bp; - - if (a < b) - return -1; - else if (a > b) - return 1; - else - return 0; -} - -static double -median (double *v, size_t n) -{ - qsort(v, n, sizeof(*v), compare_double); - - return v[n/2]; -} - -#define TIME(res, code) do { \ - double time_measurement[5]; \ - unsigned time_i; \ - \ - for (time_i = 0; time_i < 5; time_i++) \ - { \ - speed_starttime(); \ - code; \ - time_measurement[time_i] = speed_endtime(); \ - } \ - res = median(time_measurement, 5); \ -} while (0) - -struct bench_data -{ - mp_size_t n; - mp_ptr ap; - mp_ptr bp; - mp_ptr up; - mp_ptr vp; - mp_ptr gp; -}; - -static double -bench_gcd (void *ctx, int p) -{ - struct bench_data *data = ctx; - double t; - - p_table[data->n] = p; - TIME(t, { - MPN_COPY (data->up, data->ap, data->n); - MPN_COPY (data->vp, data->bp, data->n); - mpn_gcd (data->gp, data->up, data->n, data->vp, data->n); - }); - - return t; -} - -int -main(int argc, char **argv) -{ - gmp_randstate_t rands; struct bench_data data; - mp_size_t n; - - TMP_DECL; - - /* Unbuffered so if output is redirected to a file it isn't lost if the - program is killed part way through. */ - setbuf (stdout, NULL); - setbuf (stderr, NULL); - - gmp_randinit_default (rands); - - TMP_MARK; - - data.ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE); - data.bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE); - data.up = TMP_ALLOC_LIMBS (P_TABLE_SIZE); - data.vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE); - data.gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE); - - mpn_random (data.ap, P_TABLE_SIZE); - mpn_random (data.bp, P_TABLE_SIZE); - - memset (p_table, 0, sizeof(p_table)); - - for (n = 100; n < P_TABLE_SIZE; n++) - { - mp_size_t p; - mp_size_t best_p; - double best_time; - double lehmer_time; - - if (data.ap[n-1] == 0) - data.ap[n-1] = 1; - - if (data.bp[n-1] == 0) - data.bp[n-1] = 1; - - data.n = n; - - lehmer_time = bench_gcd (&data, 0); - - best_p = search (&best_time, bench_gcd, &data, n/5, 4*n/5); - if (best_time > lehmer_time) - best_p = 0; - - printf("%6d %6d %5.3g", n, best_p, (double) best_p / n); - if (best_p > 0) - { - double speedup = 100 * (lehmer_time - best_time) / lehmer_time; - printf(" %5.3g%%", speedup); - if (speedup < 1.0) - { - printf(" (ignored)"); - best_p = 0; - } - } - printf("\n"); - - p_table[n] = best_p; - } - TMP_FREE; - gmp_randclear(rands); - return 0; -} diff --git a/gmp/tune/tuneup.c b/gmp/tune/tuneup.c index 2fba6b2955..06cb03c583 100644 --- a/gmp/tune/tuneup.c +++ b/gmp/tune/tuneup.c @@ -1,32 +1,21 @@ /* Create tuned thresholds for various algorithms. -Copyright 1999-2003, 2005, 2006, 2008-2012 Free Software Foundation, Inc. +Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ /* Usage: tuneup [-t] [-t] [-p precision] @@ -75,11 +64,11 @@ see https://www.gnu.org/licenses/. */ instead. #define TUNE_PROGRAM_BUILD does this, with help from code at the end of gmp-impl.h, and rules in tune/Makefile.am. - MUL_TOOM22_THRESHOLD for example uses a recompiled mpn_mul_n. The + MUL_KARATSUBA_THRESHOLD for example uses a recompiled mpn_mul_n. The threshold is set to "size+1" to avoid karatsuba, or to "size" to use one level, but recurse into the basecase. - MUL_TOOM33_THRESHOLD makes use of the tuned MUL_TOOM22_THRESHOLD value. + MUL_TOOM3_THRESHOLD makes use of the tuned MUL_KARATSUBA_THRESHOLD value. Other routines in turn will make use of both of those. Naturally the dependants must be tuned first. @@ -98,7 +87,7 @@ see https://www.gnu.org/licenses/. */ DIVREM_1_NORM_THRESHOLD. An assembler mpn_divrem_1 is expected to be written and tuned all by hand. Assembler routines that might have hard limits are recompiled though, to make them accept a bigger range of sizes - than normal, eg. mpn_sqr_basecase to compare against mpn_toom2_sqr. + than normal, eg. mpn_sqr_basecase to compare against mpn_kara_sqr_n. Limitations: @@ -153,81 +142,46 @@ int allocdat = 0; /* This is not defined if mpn_sqr_basecase doesn't declare a limit. In that case use zero here, which for params.max_size means no limit. */ -#ifndef TUNE_SQR_TOOM2_MAX -#define TUNE_SQR_TOOM2_MAX 0 +#ifndef TUNE_SQR_KARATSUBA_MAX +#define TUNE_SQR_KARATSUBA_MAX 0 #endif -mp_size_t mul_toom22_threshold = MP_SIZE_T_MAX; -mp_size_t mul_toom33_threshold = MUL_TOOM33_THRESHOLD_LIMIT; +mp_size_t mul_karatsuba_threshold = MP_SIZE_T_MAX; +mp_size_t mul_toom3_threshold = MUL_TOOM3_THRESHOLD_LIMIT; mp_size_t mul_toom44_threshold = MUL_TOOM44_THRESHOLD_LIMIT; -mp_size_t mul_toom6h_threshold = MUL_TOOM6H_THRESHOLD_LIMIT; -mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT; -mp_size_t mul_toom32_to_toom43_threshold = MP_SIZE_T_MAX; -mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX; -mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX; -mp_size_t mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX; -mp_size_t mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX; mp_size_t mul_fft_threshold = MP_SIZE_T_MAX; mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX; mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX; -mp_size_t sqr_toom2_threshold - = (TUNE_SQR_TOOM2_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_TOOM2_MAX); +mp_size_t sqr_karatsuba_threshold + = (TUNE_SQR_KARATSUBA_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_KARATSUBA_MAX); mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT; mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT; -mp_size_t sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT; -mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT; mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX; mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX; -mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX; -mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX; -mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX; -mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX; -mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX; -mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX; -mp_size_t div_qr_2_pi2_threshold = MP_SIZE_T_MAX; -mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX; -mp_size_t dc_divappr_q_threshold = MP_SIZE_T_MAX; -mp_size_t mu_div_qr_threshold = MP_SIZE_T_MAX; -mp_size_t mu_divappr_q_threshold = MP_SIZE_T_MAX; -mp_size_t mupi_div_qr_threshold = MP_SIZE_T_MAX; -mp_size_t mu_div_q_threshold = MP_SIZE_T_MAX; -mp_size_t dc_bdiv_qr_threshold = MP_SIZE_T_MAX; -mp_size_t dc_bdiv_q_threshold = MP_SIZE_T_MAX; -mp_size_t mu_bdiv_qr_threshold = MP_SIZE_T_MAX; -mp_size_t mu_bdiv_q_threshold = MP_SIZE_T_MAX; -mp_size_t inv_mulmod_bnm1_threshold = MP_SIZE_T_MAX; -mp_size_t inv_newton_threshold = MP_SIZE_T_MAX; -mp_size_t inv_appr_threshold = MP_SIZE_T_MAX; -mp_size_t binv_newton_threshold = MP_SIZE_T_MAX; -mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX; -mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX; -mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX; +mp_size_t mullow_basecase_threshold = MP_SIZE_T_MAX; +mp_size_t mullow_dc_threshold = MP_SIZE_T_MAX; +mp_size_t mullow_mul_n_threshold = MP_SIZE_T_MAX; +mp_size_t div_sb_preinv_threshold = MP_SIZE_T_MAX; +mp_size_t div_dc_threshold = MP_SIZE_T_MAX; +mp_size_t powm_threshold = MP_SIZE_T_MAX; mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX; mp_size_t hgcd_threshold = MP_SIZE_T_MAX; -mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX; -mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX; +mp_size_t gcd_accel_threshold = MP_SIZE_T_MAX; mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX; mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX; -int div_qr_1n_pi1_method = 0; -mp_size_t div_qr_1_norm_threshold = MP_SIZE_T_MAX; -mp_size_t div_qr_1_unnorm_threshold = MP_SIZE_T_MAX; mp_size_t divrem_1_norm_threshold = MP_SIZE_T_MAX; mp_size_t divrem_1_unnorm_threshold = MP_SIZE_T_MAX; mp_size_t mod_1_norm_threshold = MP_SIZE_T_MAX; mp_size_t mod_1_unnorm_threshold = MP_SIZE_T_MAX; -int mod_1_1p_method = 0; -mp_size_t mod_1n_to_mod_1_1_threshold = MP_SIZE_T_MAX; -mp_size_t mod_1u_to_mod_1_1_threshold = MP_SIZE_T_MAX; -mp_size_t mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX; -mp_size_t mod_1_2_to_mod_1_4_threshold = MP_SIZE_T_MAX; -mp_size_t preinv_mod_1_to_mod_1_threshold = MP_SIZE_T_MAX; +mp_size_t mod_1_1_threshold = MP_SIZE_T_MAX; +mp_size_t mod_1_2_threshold = MP_SIZE_T_MAX; +mp_size_t mod_1_3_threshold = MP_SIZE_T_MAX; +mp_size_t mod_1_4_threshold = MP_SIZE_T_MAX; mp_size_t divrem_2_threshold = MP_SIZE_T_MAX; mp_size_t get_str_dc_threshold = MP_SIZE_T_MAX; mp_size_t get_str_precompute_threshold = MP_SIZE_T_MAX; mp_size_t set_str_dc_threshold = MP_SIZE_T_MAX; mp_size_t set_str_precompute_threshold = MP_SIZE_T_MAX; -mp_size_t fac_odd_threshold = 0; -mp_size_t fac_dsc_threshold = FAC_DSC_THRESHOLD_LIMIT; mp_size_t fft_modf_sqr_threshold = MP_SIZE_T_MAX; mp_size_t fft_modf_mul_threshold = MP_SIZE_T_MAX; @@ -236,8 +190,7 @@ struct param_t { const char *name; speed_function_t function; speed_function_t function2; - double step_factor; /* how much to step relatively */ - int step; /* how much to step absolutely */ + double step_factor; /* how much to step sizes (rounded down) */ double function_fudge; /* multiplier for "function" speeds */ int stop_since_change; double stop_factor; @@ -263,9 +216,6 @@ struct param_t { #ifndef HAVE_NATIVE_mpn_divexact_1 #define HAVE_NATIVE_mpn_divexact_1 0 #endif -#ifndef HAVE_NATIVE_mpn_div_qr_1n_pi1 -#define HAVE_NATIVE_mpn_div_qr_1n_pi1 0 -#endif #ifndef HAVE_NATIVE_mpn_divrem_1 #define HAVE_NATIVE_mpn_divrem_1 0 #endif @@ -275,9 +225,6 @@ struct param_t { #ifndef HAVE_NATIVE_mpn_mod_1 #define HAVE_NATIVE_mpn_mod_1 0 #endif -#ifndef HAVE_NATIVE_mpn_mod_1_1p -#define HAVE_NATIVE_mpn_mod_1_1p 0 -#endif #ifndef HAVE_NATIVE_mpn_modexact_1_odd #define HAVE_NATIVE_mpn_modexact_1_odd 0 #endif @@ -383,13 +330,12 @@ analyze_dat (int final) } -/* Measuring for recompiled mpn/generic/div_qr_1.c, - * mpn/generic/divrem_1.c, mpn/generic/mod_1.c and mpz/fac_ui.c */ +/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */ -mp_limb_t mpn_div_qr_1_tune (mp_ptr, mp_limb_t *, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); -mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t); -void mpz_fac_ui_tune (mpz_ptr, unsigned long); +mp_limb_t mpn_divrem_1_tune + __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); +mp_limb_t mpn_mod_1_tune + __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)); double speed_mpn_mod_1_tune (struct speed_params *s) @@ -401,16 +347,7 @@ speed_mpn_divrem_1_tune (struct speed_params *s) { SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune); } -double -speed_mpz_fac_ui_tune (struct speed_params *s) -{ - SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune); -} -double -speed_mpn_div_qr_1_tune (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_DIV_QR_1 (mpn_div_qr_1_tune); -} + double tuneup_measure (speed_function_t fun, @@ -453,7 +390,7 @@ tuneup_measure (speed_function_t fun, } -#define PRINT_WIDTH 31 +#define PRINT_WIDTH 28 void print_define_start (const char *name) @@ -477,7 +414,6 @@ print_define_end_remark (const char *name, mp_size_t value, const char *remark) if (remark != NULL) printf (" /* %s */", remark); printf ("\n"); - fflush (stdout); } void @@ -519,7 +455,6 @@ one (mp_size_t *threshold, struct param_t *param) DEFAULT (param->function_fudge, 1.0); DEFAULT (param->function2, param->function); DEFAULT (param->step_factor, 0.01); /* small steps by default */ - DEFAULT (param->step, 1); /* small steps by default */ DEFAULT (param->stop_since_change, 80); DEFAULT (param->stop_factor, 1.2); DEFAULT (param->min_size, 10); @@ -575,10 +510,21 @@ one (mp_size_t *threshold, struct param_t *param) for (s.size = param->min_size; s.size < param->max_size; - s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), param->step)) + s.size += MAX ((mp_size_t) floor (s.size * param->step_factor), 1)) { double ti, tiplus1, d; + /* If there's a size limit and it's reached then it should still + be sensible to analyze the data since we want the threshold put + either at or near the limit. */ + if (s.size >= param->max_size) + { + if (option_trace) + printf ("Reached maximum size (%ld) without otherwise stopping\n", + (long) param->max_size); + break; + } + /* FIXME: check minimum size requirements are met, possibly by just checking for the -1 returns from the speed functions. @@ -638,7 +584,7 @@ one (mp_size_t *threshold, struct param_t *param) } /* Stop if the threshold implied hasn't changed in a certain - number of measurements. (It's this condition that usually + number of measurements. (It's this condition that ususally stops the loop.) */ if (thresh_idx != new_thresh_idx) since_thresh_change = 0, thresh_idx = new_thresh_idx; @@ -713,7 +659,6 @@ struct fft_param_t { mp_size_t first_size; mp_size_t max_size; speed_function_t function; - speed_function_t mul_modf_function; speed_function_t mul_function; mp_size_t sqr; }; @@ -728,7 +673,7 @@ fft_step_size (int k) { mp_size_t step; - step = MAX ((mp_size_t) 1 << (k-1), GMP_LIMB_BITS) / GMP_LIMB_BITS; + step = MAX ((mp_size_t) 1 << (k-1), BITS_PER_MP_LIMB) / BITS_PER_MP_LIMB; step *= (mp_size_t) 1 << k; if (step <= 0) @@ -754,435 +699,126 @@ fft_next_size (mp_size_t pl, int k) return pl; } -#define NMAX_DEFAULT 1000000 -#define MAX_REPS 25 -#define MIN_REPS 5 - -static inline size_t -mpn_mul_fft_lcm (size_t a, unsigned int k) -{ - unsigned int l = k; - - while (a % 2 == 0 && k > 0) - { - a >>= 1; - k--; - } - return a << l; -} - -mp_size_t -fftfill (mp_size_t pl, int k, int sqr) -{ - mp_size_t maxLK; - mp_bitcnt_t N, Nprime, nprime, M; - - N = pl * GMP_NUMB_BITS; - M = N >> k; - - maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); - - Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK; - nprime = Nprime / GMP_NUMB_BITS; - if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) - { - size_t K2; - for (;;) - { - K2 = 1L << mpn_fft_best_k (nprime, sqr); - if ((nprime & (K2 - 1)) == 0) - break; - nprime = (nprime + K2 - 1) & -K2; - Nprime = nprime * GMP_LIMB_BITS; - } - } - ASSERT_ALWAYS (nprime < pl); - - return Nprime; -} - -static int -compare_double (const void *ap, const void *bp) -{ - double a = * (const double *) ap; - double b = * (const double *) bp; - - if (a < b) - return -1; - else if (a > b) - return 1; - else - return 0; -} - -double -median (double *times, int n) -{ - qsort (times, n, sizeof (double), compare_double); - return times[n/2]; -} - -#define FFT_CACHE_SIZE 25 -typedef struct fft_cache -{ - mp_size_t n; - double time; -} fft_cache_t; - -fft_cache_t fft_cache[FFT_CACHE_SIZE]; - -double -cached_measure (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, int k, - int n_measurements) -{ - int i; - double t, ttab[MAX_REPS]; - - if (fft_cache[k].n == n) - return fft_cache[k].time; - - for (i = 0; i < n_measurements; i++) - { - speed_starttime (); - mpn_mul_fft (rp, n, ap, n, bp, n, k); - ttab[i] = speed_endtime (); - } - - t = median (ttab, n_measurements); - fft_cache[k].n = n; - fft_cache[k].time = t; - return t; -} - -#define INSERT_FFTTAB(idx, nval, kval) \ - do { \ - fft_tab[idx].n = nval; \ - fft_tab[idx].k = kval; \ - fft_tab[idx+1].n = -1; /* sentinel */ \ - fft_tab[idx+1].k = -1; \ - } while (0) - -int -fftmes (mp_size_t nmin, mp_size_t nmax, int initial_k, struct fft_param_t *p, int idx, int print) -{ - mp_size_t n, n1, prev_n1; - int k, best_k, last_best_k, kmax; - int eff, prev_eff; - double t0, t1; - int n_measurements; - mp_limb_t *ap, *bp, *rp; - mp_size_t alloc; - char *linepref; - struct fft_table_nk *fft_tab; - - fft_tab = mpn_fft_table3[p->sqr]; - - for (k = 0; k < FFT_CACHE_SIZE; k++) - fft_cache[k].n = 0; - - if (nmin < (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) - { - nmin = (p->sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD); - } - - if (print) - printf ("#define %s%*s", p->table_name, 38, ""); - - if (idx == 0) - { - INSERT_FFTTAB (0, nmin, initial_k); - - if (print) - { - printf ("\\\n { "); - printf ("{%7u,%2u}", fft_tab[0].n, fft_tab[0].k); - linepref = " "; - } - - idx = 1; - } - - ap = malloc (sizeof (mp_limb_t)); - if (p->sqr) - bp = ap; - else - bp = malloc (sizeof (mp_limb_t)); - rp = malloc (sizeof (mp_limb_t)); - alloc = 1; - - /* Round n to comply to initial k value */ - n = (nmin + ((1ul << initial_k) - 1)) & (MP_SIZE_T_MAX << initial_k); - - n_measurements = (18 - initial_k) | 1; - n_measurements = MAX (n_measurements, MIN_REPS); - n_measurements = MIN (n_measurements, MAX_REPS); - - last_best_k = initial_k; - best_k = initial_k; - - while (n < nmax) - { - int start_k, end_k; - - /* Assume the current best k is best until we hit its next FFT step. */ - t0 = 99999; - - prev_n1 = n + 1; - - start_k = MAX (4, best_k - 4); - end_k = MIN (24, best_k + 4); - for (k = start_k; k <= end_k; k++) - { - n1 = mpn_fft_next_size (prev_n1, k); - - eff = 200 * (n1 * GMP_NUMB_BITS >> k) / fftfill (n1, k, p->sqr); - - if (eff < 70) /* avoid measuring too slow fft:s */ - continue; - - if (n1 > alloc) - { - alloc = n1; - if (p->sqr) - { - ap = realloc (ap, sizeof (mp_limb_t)); - rp = realloc (rp, sizeof (mp_limb_t)); - ap = bp = realloc (ap, alloc * sizeof (mp_limb_t)); - mpn_random (ap, alloc); - rp = realloc (rp, alloc * sizeof (mp_limb_t)); - } - else - { - ap = realloc (ap, sizeof (mp_limb_t)); - bp = realloc (bp, sizeof (mp_limb_t)); - rp = realloc (rp, sizeof (mp_limb_t)); - ap = realloc (ap, alloc * sizeof (mp_limb_t)); - mpn_random (ap, alloc); - bp = realloc (bp, alloc * sizeof (mp_limb_t)); - mpn_random (bp, alloc); - rp = realloc (rp, alloc * sizeof (mp_limb_t)); - } - } - - t1 = cached_measure (rp, ap, bp, n1, k, n_measurements); - - if (t1 * n_measurements > 0.3) - n_measurements -= 2; - n_measurements = MAX (n_measurements, MIN_REPS); - - if (t1 < t0) - { - best_k = k; - t0 = t1; - } - } - - n1 = mpn_fft_next_size (prev_n1, best_k); - - if (last_best_k != best_k) - { - ASSERT_ALWAYS ((prev_n1 & ((1ul << last_best_k) - 1)) == 1); - - if (idx >= FFT_TABLE3_SIZE) - { - printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n"); - abort (); - } - INSERT_FFTTAB (idx, prev_n1 >> last_best_k, best_k); - - if (print) - { - printf (", "); - if (idx % 4 == 0) - printf ("\\\n "); - printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k); - } - - if (option_trace >= 2) - { - printf ("{%lu,%u}\n", prev_n1, best_k); - fflush (stdout); - } - - last_best_k = best_k; - idx++; - } - - for (;;) - { - prev_n1 = n1; - prev_eff = fftfill (prev_n1, best_k, p->sqr); - n1 = mpn_fft_next_size (prev_n1 + 1, best_k); - eff = fftfill (n1, best_k, p->sqr); - - if (eff != prev_eff) - break; - } - - n = prev_n1; - } - - kmax = sizeof (mp_size_t) * 4; /* GMP_MP_SIZE_T_BITS / 2 */ - kmax = MIN (kmax, 25-1); - for (k = last_best_k + 1; k <= kmax; k++) - { - if (idx >= FFT_TABLE3_SIZE) - { - printf ("FFT table exhausted, increase FFT_TABLE3_SIZE in gmp-impl.h\n"); - abort (); - } - INSERT_FFTTAB (idx, ((1ul << (2*k-2)) + 1) >> (k-1), k); - - if (print) - { - printf (", "); - if (idx % 4 == 0) - printf ("\\\n "); - printf ("{%7u,%2u}", fft_tab[idx].n, fft_tab[idx].k); - } - - idx++; - } - - if (print) - printf (" }\n"); - - free (ap); - if (! p->sqr) - free (bp); - free (rp); - - return idx; -} - void fft (struct fft_param_t *p) { mp_size_t size; - int k, idx, initial_k; - - /*** Generate MUL_FFT_MODF_THRESHOLD / SQR_FFT_MODF_THRESHOLD ***/ + int i, k; -#if 1 - { - /* Use plain one() mechanism, for some reasonable initial values of k. The - advantage is that we don't depend on mpn_fft_table3, which can therefore - leave it completely uninitialized. */ + for (i = 0; i < numberof (mpn_fft_table[p->sqr]); i++) + mpn_fft_table[p->sqr][i] = MP_SIZE_T_MAX; - static struct param_t param; - mp_size_t thres, best_thres; - int best_k; - char buf[20]; + *p->p_threshold = MP_SIZE_T_MAX; + *p->p_modf_threshold = MP_SIZE_T_MAX; - best_thres = MP_SIZE_T_MAX; - best_k = -1; + option_trace = MAX (option_trace, option_fft_trace); - for (k = 5; k <= 7; k++) - { - param.name = p->modf_threshold_name; - param.min_size = 100; - param.max_size = 2000; - param.function = p->mul_function; - param.step_factor = 0.0; - param.step = 4; - param.function2 = p->mul_modf_function; - param.noprint = 1; - s.r = k; - one (&thres, ¶m); - if (thres < best_thres) - { - best_thres = thres; - best_k = k; - } - } + printf ("#define %s {", p->table_name); + if (option_trace >= 2) + printf ("\n"); - *(p->p_modf_threshold) = best_thres; - sprintf (buf, "k = %d", best_k); - print_define_remark (p->modf_threshold_name, best_thres, buf); - initial_k = best_k; - } -#else + k = FFT_FIRST_K; size = p->first_size; for (;;) { - double tk, tm; + double tk, tk1; - size = mpn_fft_next_size (size+1, mpn_fft_best_k (size+1, p->sqr)); - k = mpn_fft_best_k (size, p->sqr); + size = fft_next_size (size+1, k+1); if (size >= p->max_size) break; + if (k >= FFT_FIRST_K + numberof (mpn_fft_table[p->sqr])) + break; - s.size = size + fft_step_size (k) / 2; + /* compare k to k+1 in the middle of the current k+1 step */ + s.size = size + fft_step_size (k+1) / 2; s.r = k; - tk = tuneup_measure (p->mul_modf_function, NULL, &s); + tk = tuneup_measure (p->function, NULL, &s); if (tk == -1.0) abort (); - tm = tuneup_measure (p->mul_function, NULL, &s); - if (tm == -1.0) + s.r = k+1; + tk1 = tuneup_measure (p->function, NULL, &s); + if (tk1 == -1.0) abort (); if (option_trace >= 2) - printf ("at %ld size=%ld k=%d %.9f size=%ld modf %.9f\n", - (long) size, - (long) size + fft_step_size (k) / 2, k, tk, - (long) s.size, tm); + printf ("at %ld size=%ld k=%d %.9f k=%d %.9f\n", + (long) size, (long) s.size, k, tk, k+1, tk1); - if (tk < tm) + /* declare the k+1 threshold as soon as it's faster at its midpoint */ + if (tk1 < tk) { - *p->p_modf_threshold = s.size; - print_define (p->modf_threshold_name, *p->p_modf_threshold); - break; + mpn_fft_table[p->sqr][k-FFT_FIRST_K] = s.size; + printf (" %ld,", (long) s.size); + if (option_trace >= 2) printf ("\n"); + k++; } } - initial_k = ?; -#endif - /*** Generate MUL_FFT_TABLE3 / SQR_FFT_TABLE3 ***/ + mpn_fft_table[p->sqr][k-FFT_FIRST_K] = 0; + printf (" 0 }\n"); - idx = fftmes (*p->p_modf_threshold, p->max_size, initial_k, p, 0, 1); - printf ("#define %s_SIZE %d\n", p->table_name, idx); - /*** Generate MUL_FFT_THRESHOLD / SQR_FFT_THRESHOLD ***/ + size = p->first_size; - size = 2 * *p->p_modf_threshold; /* OK? */ + /* Declare an FFT faster than a plain toom4 etc multiplication found as + soon as one faster measurement obtained. A multiplication in the + middle of the FFT step is tested. */ for (;;) { + int modf = (*p->p_modf_threshold == MP_SIZE_T_MAX); double tk, tm; - mp_size_t mulmod_size, mul_size;; + + /* k=7 should be the first FFT which can beat toom4 on a full + multiply, so jump to that threshold and save some probing after the + modf threshold is found. */ + if (!modf && size < mpn_fft_table[p->sqr][2]) + { + size = mpn_fft_table[p->sqr][2]; + if (option_trace >= 2) + printf ("jump to size=%ld\n", (long) size); + } + + size = fft_next_size (size+1, mpn_fft_best_k (size, p->sqr)); + k = mpn_fft_best_k (size, p->sqr); if (size >= p->max_size) break; - mulmod_size = mpn_mulmod_bnm1_next_size (2 * (size + 1)) / 2; - mul_size = (size + mulmod_size) / 2; /* middle of step */ - - s.size = mulmod_size; + s.size = size + fft_step_size (k) / 2; + s.r = k; tk = tuneup_measure (p->function, NULL, &s); if (tk == -1.0) abort (); - s.size = mul_size; + if (!modf) s.size /= 2; tm = tuneup_measure (p->mul_function, NULL, &s); if (tm == -1.0) abort (); if (option_trace >= 2) - printf ("at %ld size=%ld %.9f size=%ld mul %.9f\n", + printf ("at %ld size=%ld k=%d %.9f size=%ld %s mul %.9f\n", (long) size, - (long) mulmod_size, tk, - (long) mul_size, tm); - - size = mulmod_size; + (long) size + fft_step_size (k) / 2, k, tk, + (long) s.size, modf ? "modf" : "full", tm); if (tk < tm) { - *p->p_threshold = s.size; - print_define (p->threshold_name, *p->p_threshold); - break; + if (modf) + { + *p->p_modf_threshold = s.size; + print_define (p->modf_threshold_name, *p->p_modf_threshold); + } + else + { + *p->p_threshold = s.size; + print_define (p->threshold_name, *p->p_threshold); + break; + } } } + } @@ -1190,232 +826,58 @@ fft (struct fft_param_t *p) /* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2, giving wrong results. */ void -tune_mul_n (void) +tune_mul (void) { static struct param_t param; - mp_size_t next_toom_start; - int something_changed; param.function = speed_mpn_mul_n; - param.name = "MUL_TOOM22_THRESHOLD"; - param.min_size = MAX (4, MPN_TOOM22_MUL_MINSIZE); - param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1; - one (&mul_toom22_threshold, ¶m); + param.name = "MUL_KARATSUBA_THRESHOLD"; + param.min_size = MAX (4, MPN_KARA_MUL_N_MINSIZE); + param.max_size = MUL_KARATSUBA_THRESHOLD_LIMIT-1; + one (&mul_karatsuba_threshold, ¶m); - param.noprint = 1; - - /* Threshold sequence loop. Disable functions that would be used in a very - narrow range, re-measuring things when that happens. */ - something_changed = 1; - while (something_changed) - { - something_changed = 0; - - next_toom_start = mul_toom22_threshold; - - if (mul_toom33_threshold != 0) - { - param.name = "MUL_TOOM33_THRESHOLD"; - param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE); - param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1; - one (&mul_toom33_threshold, ¶m); - - if (next_toom_start * 1.05 >= mul_toom33_threshold) - { - mul_toom33_threshold = 0; - something_changed = 1; - } - } - - next_toom_start = MAX (next_toom_start, mul_toom33_threshold); - - if (mul_toom44_threshold != 0) - { - param.name = "MUL_TOOM44_THRESHOLD"; - param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE); - param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1; - one (&mul_toom44_threshold, ¶m); - - if (next_toom_start * 1.05 >= mul_toom44_threshold) - { - mul_toom44_threshold = 0; - something_changed = 1; - } - } - - next_toom_start = MAX (next_toom_start, mul_toom44_threshold); - - if (mul_toom6h_threshold != 0) - { - param.name = "MUL_TOOM6H_THRESHOLD"; - param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE); - param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1; - one (&mul_toom6h_threshold, ¶m); - - if (next_toom_start * 1.05 >= mul_toom6h_threshold) - { - mul_toom6h_threshold = 0; - something_changed = 1; - } - } - - next_toom_start = MAX (next_toom_start, mul_toom6h_threshold); - - if (mul_toom8h_threshold != 0) - { - param.name = "MUL_TOOM8H_THRESHOLD"; - param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE); - param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1; - one (&mul_toom8h_threshold, ¶m); - - if (next_toom_start * 1.05 >= mul_toom8h_threshold) - { - mul_toom8h_threshold = 0; - something_changed = 1; - } - } - } + param.name = "MUL_TOOM3_THRESHOLD"; + param.min_size = MAX (mul_karatsuba_threshold, MPN_TOOM3_MUL_N_MINSIZE); + param.max_size = MUL_TOOM3_THRESHOLD_LIMIT-1; + one (&mul_toom3_threshold, ¶m); - print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD); - print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD); - print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD); - print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD); + param.name = "MUL_TOOM44_THRESHOLD"; + param.min_size = MAX (mul_toom3_threshold, MPN_TOOM44_MUL_N_MINSIZE); + param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1; + one (&mul_toom44_threshold, ¶m); /* disabled until tuned */ MUL_FFT_THRESHOLD = MP_SIZE_T_MAX; } -void -tune_mul (void) -{ - static struct param_t param; - mp_size_t thres; - - param.noprint = 1; - - param.function = speed_mpn_toom32_for_toom43_mul; - param.function2 = speed_mpn_toom43_for_toom32_mul; - param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD"; - param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17; - one (&thres, ¶m); - mul_toom32_to_toom43_threshold = thres * 17 / 24; - print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold); - - param.function = speed_mpn_toom32_for_toom53_mul; - param.function2 = speed_mpn_toom53_for_toom32_mul; - param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD"; - param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19; - one (&thres, ¶m); - mul_toom32_to_toom53_threshold = thres * 19 / 30; - print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold); - - param.function = speed_mpn_toom42_for_toom53_mul; - param.function2 = speed_mpn_toom53_for_toom42_mul; - param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD"; - param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11; - one (&thres, ¶m); - mul_toom42_to_toom53_threshold = thres * 11 / 20; - print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold); - - param.function = speed_mpn_toom42_mul; - param.function2 = speed_mpn_toom63_mul; - param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD"; - param.min_size = MPN_TOOM63_MUL_MINSIZE * 2; - one (&thres, ¶m); - mul_toom42_to_toom63_threshold = thres / 2; - print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold); - - /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */ - param.function = speed_mpn_toom43_for_toom54_mul; - param.function2 = speed_mpn_toom54_for_toom43_mul; - param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD"; - param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5; - one (&thres, ¶m); - mul_toom43_to_toom54_threshold = thres * 5 / 6; - print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold); -} - +/* This was written by the tuneup challenged tege. Kevin, please delete + this comment when you've reviewed/rewritten this. :-) */ void -tune_mullo (void) +tune_mullow (void) { static struct param_t param; - param.function = speed_mpn_mullo_n; + param.function = speed_mpn_mullow_n; - param.name = "MULLO_BASECASE_THRESHOLD"; - param.min_size = 1; + param.name = "MULLOW_BASECASE_THRESHOLD"; + param.min_size = 3; param.min_is_always = 1; - param.max_size = MULLO_BASECASE_THRESHOLD_LIMIT-1; - param.stop_factor = 1.5; - param.noprint = 1; - one (&mullo_basecase_threshold, ¶m); - - param.name = "MULLO_DC_THRESHOLD"; - param.min_size = 8; - param.min_is_always = 0; - param.max_size = 1000; - one (&mullo_dc_threshold, ¶m); + param.max_size = MULLOW_BASECASE_THRESHOLD_LIMIT-1; + one (&mullow_basecase_threshold, ¶m); - if (mullo_basecase_threshold >= mullo_dc_threshold) - { - print_define ("MULLO_BASECASE_THRESHOLD", mullo_dc_threshold); - print_define_remark ("MULLO_DC_THRESHOLD", 0, "never mpn_mullo_basecase"); - } - else - { - print_define ("MULLO_BASECASE_THRESHOLD", mullo_basecase_threshold); - print_define ("MULLO_DC_THRESHOLD", mullo_dc_threshold); - } - -#if WANT_FFT - param.name = "MULLO_MUL_N_THRESHOLD"; - param.min_size = mullo_dc_threshold; - param.max_size = 2 * mul_fft_threshold; - param.noprint = 0; - param.step_factor = 0.03; - one (&mullo_mul_n_threshold, ¶m); -#else - print_define_remark ("MULLO_MUL_N_THRESHOLD", MP_SIZE_T_MAX, - "without FFT use mullo forever"); -#endif -} - -void -tune_mulmid (void) -{ - static struct param_t param; + param.min_is_always = 0; /* ??? */ - param.name = "MULMID_TOOM42_THRESHOLD"; - param.function = speed_mpn_mulmid_n; - param.min_size = 4; - param.max_size = 100; - one (&mulmid_toom42_threshold, ¶m); -} - -void -tune_mulmod_bnm1 (void) -{ - static struct param_t param; - - param.name = "MULMOD_BNM1_THRESHOLD"; - param.function = speed_mpn_mulmod_bnm1; - param.min_size = 4; - param.max_size = 100; - one (&mulmod_bnm1_threshold, ¶m); -} - -void -tune_sqrmod_bnm1 (void) -{ - static struct param_t param; + param.name = "MULLOW_DC_THRESHOLD"; + param.min_size = mul_karatsuba_threshold; + param.max_size = 1000; + one (&mullow_dc_threshold, ¶m); - param.name = "SQRMOD_BNM1_THRESHOLD"; - param.function = speed_mpn_sqrmod_bnm1; - param.min_size = 4; - param.max_size = 100; - one (&sqrmod_bnm1_threshold, ¶m); + param.name = "MULLOW_MUL_N_THRESHOLD"; + param.min_size = mullow_dc_threshold; + param.max_size = 2000; + one (&mullow_mul_n_threshold, ¶m); } @@ -1438,345 +900,130 @@ tune_sqr (void) { static struct param_t param; param.name = "SQR_BASECASE_THRESHOLD"; - param.function = speed_mpn_sqr; + param.function = speed_mpn_sqr_n; param.min_size = 3; param.min_is_always = 1; - param.max_size = TUNE_SQR_TOOM2_MAX; + param.max_size = TUNE_SQR_KARATSUBA_MAX; param.noprint = 1; one (&sqr_basecase_threshold, ¶m); } { static struct param_t param; - param.name = "SQR_TOOM2_THRESHOLD"; - param.function = speed_mpn_sqr; - param.min_size = MAX (4, MPN_TOOM2_SQR_MINSIZE); - param.max_size = TUNE_SQR_TOOM2_MAX; + param.name = "SQR_KARATSUBA_THRESHOLD"; + param.function = speed_mpn_sqr_n; + param.min_size = MAX (4, MPN_KARA_SQR_N_MINSIZE); + param.max_size = TUNE_SQR_KARATSUBA_MAX; param.noprint = 1; - one (&sqr_toom2_threshold, ¶m); + one (&sqr_karatsuba_threshold, ¶m); if (! HAVE_NATIVE_mpn_sqr_basecase - && sqr_toom2_threshold < sqr_basecase_threshold) + && sqr_karatsuba_threshold < sqr_basecase_threshold) { /* Karatsuba becomes faster than mul_basecase before sqr_basecase does. Arrange for the expression - "BELOW_THRESHOLD (un, SQR_TOOM2_THRESHOLD))" which - selects mpn_sqr_basecase in mpn_sqr to be false, by setting - SQR_TOOM2_THRESHOLD to zero, making - SQR_BASECASE_THRESHOLD the toom2 threshold. */ + "BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))" which + selects mpn_sqr_basecase in mpn_sqr_n to be false, by setting + SQR_KARATSUBA_THRESHOLD to zero, making + SQR_BASECASE_THRESHOLD the karatsuba threshold. */ - sqr_basecase_threshold = SQR_TOOM2_THRESHOLD; - SQR_TOOM2_THRESHOLD = 0; + sqr_basecase_threshold = SQR_KARATSUBA_THRESHOLD; + SQR_KARATSUBA_THRESHOLD = 0; print_define_remark ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold, - "toom2"); - print_define_remark ("SQR_TOOM2_THRESHOLD",SQR_TOOM2_THRESHOLD, + "karatsuba"); + print_define_remark ("SQR_KARATSUBA_THRESHOLD",SQR_KARATSUBA_THRESHOLD, "never sqr_basecase"); } else { if (! HAVE_NATIVE_mpn_sqr_basecase) print_define ("SQR_BASECASE_THRESHOLD", sqr_basecase_threshold); - print_define ("SQR_TOOM2_THRESHOLD", SQR_TOOM2_THRESHOLD); + print_define ("SQR_KARATSUBA_THRESHOLD", SQR_KARATSUBA_THRESHOLD); } } { static struct param_t param; - mp_size_t next_toom_start; - int something_changed; + mp_size_t toom3_start = MAX (sqr_karatsuba_threshold, sqr_basecase_threshold); - param.function = speed_mpn_sqr; - param.noprint = 1; + param.function = speed_mpn_sqr_n; - /* Threshold sequence loop. Disable functions that would be used in a very - narrow range, re-measuring things when that happens. */ - something_changed = 1; - while (something_changed) - { - something_changed = 0; - - next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold); - - sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT; - param.name = "SQR_TOOM3_THRESHOLD"; - param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE); - param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1; - one (&sqr_toom3_threshold, ¶m); - - next_toom_start = MAX (next_toom_start, sqr_toom3_threshold); - - if (sqr_toom4_threshold != 0) - { - param.name = "SQR_TOOM4_THRESHOLD"; - sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT; - param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE); - param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1; - one (&sqr_toom4_threshold, ¶m); - - if (next_toom_start * 1.05 >= sqr_toom4_threshold) - { - sqr_toom4_threshold = 0; - something_changed = 1; - } - } - - next_toom_start = MAX (next_toom_start, sqr_toom4_threshold); - - if (sqr_toom6_threshold != 0) - { - param.name = "SQR_TOOM6_THRESHOLD"; - sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT; - param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE); - param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1; - one (&sqr_toom6_threshold, ¶m); - - if (next_toom_start * 1.05 >= sqr_toom6_threshold) - { - sqr_toom6_threshold = 0; - something_changed = 1; - } - } - - next_toom_start = MAX (next_toom_start, sqr_toom6_threshold); - - if (sqr_toom8_threshold != 0) - { - param.name = "SQR_TOOM8_THRESHOLD"; - sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT; - param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE); - param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1; - one (&sqr_toom8_threshold, ¶m); - - if (next_toom_start * 1.05 >= sqr_toom8_threshold) - { - sqr_toom8_threshold = 0; - something_changed = 1; - } - } - } + param.name = "SQR_TOOM3_THRESHOLD"; + param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_N_MINSIZE); + param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1; + one (&sqr_toom3_threshold, ¶m); - print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD); - print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD); - print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD); - print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD); + param.name = "SQR_TOOM4_THRESHOLD"; + param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_N_MINSIZE); + param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1; + one (&sqr_toom4_threshold, ¶m); } } void -tune_dc_div (void) +tune_sb_preinv (void) { - s.r = 0; /* clear to make speed function do 2n/n */ - { - static struct param_t param; - param.name = "DC_DIV_QR_THRESHOLD"; - param.function = speed_mpn_sbpi1_div_qr; - param.function2 = speed_mpn_dcpi1_div_qr; - param.min_size = 6; - one (&dc_div_qr_threshold, ¶m); - } - { - static struct param_t param; - param.name = "DC_DIVAPPR_Q_THRESHOLD"; - param.function = speed_mpn_sbpi1_divappr_q; - param.function2 = speed_mpn_dcpi1_divappr_q; - param.min_size = 6; - one (&dc_divappr_q_threshold, ¶m); - } -} + static struct param_t param; -static double -speed_mpn_sbordcpi1_div_qr (struct speed_params *s) -{ - if (s->size < DC_DIV_QR_THRESHOLD) - return speed_mpn_sbpi1_div_qr (s); - else - return speed_mpn_dcpi1_div_qr (s); -} + if (GMP_NAIL_BITS != 0) + { + DIV_SB_PREINV_THRESHOLD = MP_SIZE_T_MAX; + print_define_remark ("DIV_SB_PREINV_THRESHOLD", MP_SIZE_T_MAX, + "no preinv with nails"); + return; + } -void -tune_mu_div (void) -{ - s.r = 0; /* clear to make speed function do 2n/n */ - { - static struct param_t param; - param.name = "MU_DIV_QR_THRESHOLD"; - param.function = speed_mpn_dcpi1_div_qr; - param.function2 = speed_mpn_mu_div_qr; - param.min_size = mul_toom22_threshold; - param.max_size = 5000; - param.step_factor = 0.02; - one (&mu_div_qr_threshold, ¶m); - } - { - static struct param_t param; - param.name = "MU_DIVAPPR_Q_THRESHOLD"; - param.function = speed_mpn_dcpi1_divappr_q; - param.function2 = speed_mpn_mu_divappr_q; - param.min_size = mul_toom22_threshold; - param.max_size = 5000; - param.step_factor = 0.02; - one (&mu_divappr_q_threshold, ¶m); - } - { - static struct param_t param; - param.name = "MUPI_DIV_QR_THRESHOLD"; - param.function = speed_mpn_sbordcpi1_div_qr; - param.function2 = speed_mpn_mupi_div_qr; - param.min_size = 6; - param.min_is_always = 1; - param.max_size = 1000; - param.step_factor = 0.02; - one (&mupi_div_qr_threshold, ¶m); - } -} + if (UDIV_PREINV_ALWAYS) + { + print_define_remark ("DIV_SB_PREINV_THRESHOLD", 0L, "preinv always"); + return; + } -void -tune_dc_bdiv (void) -{ - s.r = 0; /* clear to make speed function do 2n/n*/ - { - static struct param_t param; - param.name = "DC_BDIV_QR_THRESHOLD"; - param.function = speed_mpn_sbpi1_bdiv_qr; - param.function2 = speed_mpn_dcpi1_bdiv_qr; - param.min_size = 4; - one (&dc_bdiv_qr_threshold, ¶m); - } - { - static struct param_t param; - param.name = "DC_BDIV_Q_THRESHOLD"; - param.function = speed_mpn_sbpi1_bdiv_q; - param.function2 = speed_mpn_dcpi1_bdiv_q; - param.min_size = 4; - one (&dc_bdiv_q_threshold, ¶m); - } + param.check_size = 256; + param.min_size = 3; + param.min_is_always = 1; + param.size_extra = 3; + param.stop_factor = 2.0; + param.name = "DIV_SB_PREINV_THRESHOLD"; + param.function = speed_mpn_sb_divrem_m3; + one (&div_sb_preinv_threshold, ¶m); } -void -tune_mu_bdiv (void) -{ - s.r = 0; /* clear to make speed function do 2n/n*/ - { - static struct param_t param; - param.name = "MU_BDIV_QR_THRESHOLD"; - param.function = speed_mpn_dcpi1_bdiv_qr; - param.function2 = speed_mpn_mu_bdiv_qr; - param.min_size = mul_toom22_threshold; - param.max_size = 5000; - param.step_factor = 0.02; - one (&mu_bdiv_qr_threshold, ¶m); - } - { - static struct param_t param; - param.name = "MU_BDIV_Q_THRESHOLD"; - param.function = speed_mpn_dcpi1_bdiv_q; - param.function2 = speed_mpn_mu_bdiv_q; - param.min_size = mul_toom22_threshold; - param.max_size = 5000; - param.step_factor = 0.02; - one (&mu_bdiv_q_threshold, ¶m); - } -} void -tune_invertappr (void) +tune_dc (void) { static struct param_t param; - - param.function = speed_mpn_ni_invertappr; - param.name = "INV_MULMOD_BNM1_THRESHOLD"; - param.min_size = 4; - one (&inv_mulmod_bnm1_threshold, ¶m); - - param.function = speed_mpn_invertappr; - param.name = "INV_NEWTON_THRESHOLD"; - param.min_size = 3; - one (&inv_newton_threshold, ¶m); + param.name = "DIV_DC_THRESHOLD"; + param.function = speed_mpn_dc_tdiv_qr; + param.step_factor = 0.02; + one (&div_dc_threshold, ¶m); } -void -tune_invert (void) -{ - static struct param_t param; - param.function = speed_mpn_invert; - param.name = "INV_APPR_THRESHOLD"; - param.min_size = 3; - one (&inv_appr_threshold, ¶m); -} +/* This is an indirect determination, based on a comparison between redc and + mpz_mod. A fudge factor of 1.04 is applied to redc, to represent + additional overheads it gets in mpz_powm. + + stop_factor is 1.1 to hopefully help cray vector systems, where otherwise + currently it hits the 1000 limb limit with only a factor of about 1.18 + (threshold should be around 650). */ void -tune_binvert (void) +tune_powm (void) { static struct param_t param; - - param.function = speed_mpn_binvert; - param.name = "BINV_NEWTON_THRESHOLD"; - param.min_size = 8; /* pointless with smaller operands */ - one (&binv_newton_threshold, ¶m); + param.name = "POWM_THRESHOLD"; + param.function = speed_mpn_redc_1; + param.function2 = speed_mpz_mod; + param.step_factor = 0.03; + param.stop_factor = 1.1; + param.function_fudge = 1.04; + one (&powm_threshold, ¶m); } -void -tune_redc (void) -{ -#define TUNE_REDC_2_MAX 100 -#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 -#define WANT_REDC_2 1 -#endif - -#if WANT_REDC_2 - { - static struct param_t param; - param.name = "REDC_1_TO_REDC_2_THRESHOLD"; - param.function = speed_mpn_redc_1; - param.function2 = speed_mpn_redc_2; - param.min_size = 1; - param.min_is_always = 1; - param.max_size = TUNE_REDC_2_MAX; - param.noprint = 1; - param.stop_factor = 1.5; - one (&redc_1_to_redc_2_threshold, ¶m); - } - { - static struct param_t param; - param.name = "REDC_2_TO_REDC_N_THRESHOLD"; - param.function = speed_mpn_redc_2; - param.function2 = speed_mpn_redc_n; - param.min_size = 16; - param.noprint = 1; - one (&redc_2_to_redc_n_threshold, ¶m); - } - if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold) - { - redc_2_to_redc_n_threshold = 0; /* disable redc_2 */ - - /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as - REDC_1_TO_REDC_2_THRESHOLD. */ - { - static struct param_t param; - param.name = "REDC_1_TO_REDC_2_THRESHOLD"; - param.function = speed_mpn_redc_1; - param.function2 = speed_mpn_redc_n; - param.min_size = 16; - param.noprint = 1; - one (&redc_1_to_redc_2_threshold, ¶m); - } - } - print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD); - print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD); -#else - { - static struct param_t param; - param.name = "REDC_1_TO_REDC_N_THRESHOLD"; - param.function = speed_mpn_redc_1; - param.function2 = speed_mpn_redc_n; - param.min_size = 16; - one (&redc_1_to_redc_n_threshold, ¶m); - } -#endif -} void tune_matrix22_mul (void) @@ -1799,30 +1046,17 @@ tune_hgcd (void) one (&hgcd_threshold, ¶m); } +#if 0 void -tune_hgcd_appr (void) -{ - static struct param_t param; - param.name = "HGCD_APPR_THRESHOLD"; - param.function = speed_mpn_hgcd_appr; - /* We seem to get strange results for small sizes */ - param.min_size = 50; - param.stop_since_change = 150; - one (&hgcd_appr_threshold, ¶m); -} - -void -tune_hgcd_reduce (void) +tune_gcd_accel (void) { static struct param_t param; - param.name = "HGCD_REDUCE_THRESHOLD"; - param.function = speed_mpn_hgcd_reduce; - param.min_size = 30; - param.max_size = 7000; - param.step_factor = 0.04; - one (&hgcd_reduce_threshold, ¶m); + param.name = "GCD_ACCEL_THRESHOLD"; + param.function = speed_mpn_gcd; + param.min_size = 1; + one (&gcd_accel_threshold, ¶m); } - +#endif void tune_gcd_dc (void) { @@ -1847,139 +1081,6 @@ tune_gcdext_dc (void) one (&gcdext_dc_threshold, ¶m); } -/* In tune_powm_sec we compute the table used by the win_size function. The - cutoff points are in exponent bits, disregarding other operand sizes. It is - not possible to use the one framework since it currently uses a granularity - of full limbs. -*/ - -/* This win_size replaces the variant in the powm code, allowing us to - control k in the k-ary algorithms. */ -int winsize; -int -win_size (mp_bitcnt_t eb) -{ - return winsize; -} - -void -tune_powm_sec (void) -{ - mp_size_t n; - int k, i; - mp_size_t itch; - mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff; - const int n_max = 3000 / GMP_NUMB_BITS; - const int n_measurements = 5; - mp_ptr rp, bp, ep, mp, tp; - double ttab[n_measurements], tk, tkp1; - TMP_DECL; - TMP_MARK; - - possible_nbits_cutoff = 0; - - k = 1; - - winsize = 10; /* the itch function needs this */ - itch = mpn_sec_powm_itch (n_max, n_max * GMP_NUMB_BITS, n_max); - - rp = TMP_ALLOC_LIMBS (n_max); - bp = TMP_ALLOC_LIMBS (n_max); - ep = TMP_ALLOC_LIMBS (n_max); - mp = TMP_ALLOC_LIMBS (n_max); - tp = TMP_ALLOC_LIMBS (itch); - - mpn_random (bp, n_max); - mpn_random (mp, n_max); - mp[0] |= 1; - -/* How about taking the M operand size into account? - - An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming - B = O(M)). - - Using k-ary and no sliding window, the precomputation will need time - O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) + - O(log(E)/k*M(N)), for the squarings, multiplications, respectively. - - An operation R=powm_sec(B,E,N) will take time like powm. - - Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the - main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) + - O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full - table reads, respectively. */ - - printf ("#define POWM_SEC_TABLE "); - - /* For nbits == 1, we should always use k == 1, so no need to tune - that. Starting with nbits == 2 also ensure that nbits always is - larger than the windowsize k+1. */ - for (nbits = 2; nbits <= n_max * GMP_NUMB_BITS; ) - { - n = (nbits - 1) / GMP_NUMB_BITS + 1; - - /* Generate E such that sliding-window for k and k+1 works equally - well/poorly (but sliding is not used in powm_sec, of course). */ - for (i = 0; i < n; i++) - ep[i] = ~CNST_LIMB(0); - - winsize = k; - for (i = 0; i < n_measurements; i++) - { - speed_starttime (); - mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp); - ttab[i] = speed_endtime (); - } - tk = median (ttab, n_measurements); - - winsize = k + 1; - speed_starttime (); - for (i = 0; i < n_measurements; i++) - { - speed_starttime (); - mpn_sec_powm (rp, bp, n, ep, nbits, mp, n, tp); - ttab[i] = speed_endtime (); - } - tkp1 = median (ttab, n_measurements); -/* - printf ("testing: %ld, %d", nbits, k, ep[n-1]); - printf (" %10.5f %10.5f\n", tk, tkp1); -*/ - if (tkp1 < tk) - { - if (possible_nbits_cutoff) - { - /* Two consecutive sizes indicate k increase, obey. */ - - /* Must always have x[k] >= k */ - ASSERT_ALWAYS (possible_nbits_cutoff >= k); - - if (k > 1) - printf (","); - printf ("%ld", (long) possible_nbits_cutoff); - k++; - possible_nbits_cutoff = 0; - } - else - { - /* One measurement indicate k increase, save nbits for further - consideration. */ - /* The new larger k gets used for sizes > the cutoff - value, hence the cutoff should be one less than the - smallest size where it gives a speedup. */ - possible_nbits_cutoff = nbits - 1; - } - } - else - possible_nbits_cutoff = 0; - - nbits_next = nbits * 65 / 64; - nbits = nbits_next + (nbits_next == nbits); - } - printf ("\n"); - TMP_FREE; -} - /* size_extra==1 reflects the fact that with high<divisor one division is always skipped. Forcing high<divisor while testing ensures consistency @@ -1999,7 +1100,7 @@ tune_powm_sec (void) param.stop_factor = 2.0; -double (*tuned_speed_mpn_divrem_1) (struct speed_params *); +double (*tuned_speed_mpn_divrem_1) __GMP_PROTO ((struct speed_params *)); void tune_divrem_1 (void) @@ -2052,59 +1153,15 @@ tune_divrem_1 (void) } } -void -tune_div_qr_1 (void) -{ - static struct param_t param; - double t1, t2; - - if (!HAVE_NATIVE_mpn_div_qr_1n_pi1) - { - static struct param_t param; - double t1, t2; - - s.size = 10; - s.r = randlimb_norm (); - - t1 = tuneup_measure (speed_mpn_div_qr_1n_pi1_1, ¶m, &s); - t2 = tuneup_measure (speed_mpn_div_qr_1n_pi1_2, ¶m, &s); - - if (t1 == -1.0 || t2 == -1.0) - { - printf ("Oops, can't measure all mpn_div_qr_1n_pi1 methods at %ld\n", - (long) s.size); - abort (); - } - div_qr_1n_pi1_method = (t1 < t2) ? 1 : 2; - print_define ("DIV_QR_1N_PI1_METHOD", div_qr_1n_pi1_method); - } - - { - static struct param_t param; - param.name = "DIV_QR_1_NORM_THRESHOLD"; - DIV_1_PARAMS; - param.min_size = 1; - param.min_is_always = 0; - s.r = randlimb_norm (); - param.function = speed_mpn_div_qr_1_tune; - one (&div_qr_1_norm_threshold, ¶m); - } - { - static struct param_t param; - param.name = "DIV_QR_1_UNNORM_THRESHOLD"; - DIV_1_PARAMS; - param.min_size = 1; - param.min_is_always = 0; - s.r = randlimb_half(); - param.function = speed_mpn_div_qr_1_tune; - one (&div_qr_1_unnorm_threshold, ¶m); - } -} +double (*tuned_speed_mpn_mod_1) __GMP_PROTO ((struct speed_params *)); void tune_mod_1 (void) { + /* plain version by default */ + tuned_speed_mpn_mod_1 = speed_mpn_mod_1; + /* No support for tuning native assembler code, do that by hand and put the results in the .asm file, there's no need for such thresholds to appear in gmp-mparam.h. */ @@ -2120,27 +1177,6 @@ tune_mod_1 (void) return; } - if (!HAVE_NATIVE_mpn_mod_1_1p) - { - static struct param_t param; - double t1, t2; - - s.size = 10; - s.r = randlimb_half (); - - t1 = tuneup_measure (speed_mpn_mod_1_1_1, ¶m, &s); - t2 = tuneup_measure (speed_mpn_mod_1_1_2, ¶m, &s); - - if (t1 == -1.0 || t2 == -1.0) - { - printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n", - (long) s.size); - abort (); - } - mod_1_1p_method = (t1 < t2) ? 1 : 2; - print_define ("MOD_1_1P_METHOD", mod_1_1p_method); - } - if (UDIV_PREINV_ALWAYS) { print_define ("MOD_1_NORM_THRESHOLD", 0L); @@ -2148,6 +1184,8 @@ tune_mod_1 (void) } else { + tuned_speed_mpn_mod_1 = speed_mpn_mod_1_tune; + { static struct param_t param; param.name = "MOD_1_NORM_THRESHOLD"; @@ -2168,80 +1206,26 @@ tune_mod_1 (void) { static struct param_t param; - param.check_size = 256; - - s.r = randlimb_norm (); + s.r = GMP_NUMB_MASK / 5; param.function = speed_mpn_mod_1_tune; - - param.name = "MOD_1N_TO_MOD_1_1_THRESHOLD"; - param.min_size = 2; - one (&mod_1n_to_mod_1_1_threshold, ¶m); - } - - { - static struct param_t param; - - param.check_size = 256; - s.r = randlimb_half (); - param.noprint = 1; - - param.function = speed_mpn_mod_1_1; - param.function2 = speed_mpn_mod_1_2; - param.min_is_always = 1; - param.name = "MOD_1_1_TO_MOD_1_2_THRESHOLD"; - param.min_size = 2; - one (&mod_1_1_to_mod_1_2_threshold, ¶m); - - param.function = speed_mpn_mod_1_2; - param.function2 = speed_mpn_mod_1_4; - param.min_is_always = 1; - param.name = "MOD_1_2_TO_MOD_1_4_THRESHOLD"; param.min_size = 1; - one (&mod_1_2_to_mod_1_4_threshold, ¶m); - if (mod_1_1_to_mod_1_2_threshold >= mod_1_2_to_mod_1_4_threshold) - { - /* Never use mod_1_2, measure mod_1_1 -> mod_1_4 */ - mod_1_2_to_mod_1_4_threshold = 0; - - param.function = speed_mpn_mod_1_1; - param.function2 = speed_mpn_mod_1_4; - param.min_is_always = 1; - param.name = "MOD_1_1_TO_MOD_1_4_THRESHOLD fake"; - param.min_size = 2; - one (&mod_1_1_to_mod_1_2_threshold, ¶m); - } + param.name = "MOD_1_1_THRESHOLD"; + one (&mod_1_1_threshold, ¶m); - param.function = speed_mpn_mod_1_tune; - param.function2 = NULL; - param.name = "MOD_1U_TO_MOD_1_1_THRESHOLD"; - param.min_size = 2; - param.min_is_always = 0; - one (&mod_1u_to_mod_1_1_threshold, ¶m); - - if (mod_1u_to_mod_1_1_threshold >= mod_1_1_to_mod_1_2_threshold) - mod_1_1_to_mod_1_2_threshold = 0; - if (mod_1u_to_mod_1_1_threshold >= mod_1_2_to_mod_1_4_threshold) - mod_1_2_to_mod_1_4_threshold = 0; - - print_define_remark ("MOD_1U_TO_MOD_1_1_THRESHOLD", mod_1u_to_mod_1_1_threshold, NULL); - print_define_remark ("MOD_1_1_TO_MOD_1_2_THRESHOLD", mod_1_1_to_mod_1_2_threshold, - mod_1_1_to_mod_1_2_threshold == 0 ? "never mpn_mod_1_1p" : NULL); - print_define_remark ("MOD_1_2_TO_MOD_1_4_THRESHOLD", mod_1_2_to_mod_1_4_threshold, - mod_1_2_to_mod_1_4_threshold == 0 ? "never mpn_mod_1s_2p" : NULL); - } + param.name = "MOD_1_2_THRESHOLD"; + param.min_size = mod_1_1_threshold + 1; + one (&mod_1_2_threshold, ¶m); - { - static struct param_t param; - - param.check_size = 256; +#if 0 + param.name = "MOD_1_3_THRESHOLD"; + param.min_size = mod_1_2_threshold + 1; + one (&mod_1_3_threshold, ¶m); +#endif - param.name = "PREINV_MOD_1_TO_MOD_1_THRESHOLD"; - s.r = randlimb_norm (); - param.function = speed_mpn_preinv_mod_1; - param.function2 = speed_mpn_mod_1_tune; - param.min_size = 1; - one (&preinv_mod_1_to_mod_1_threshold, ¶m); + param.name = "MOD_1_4_THRESHOLD"; + param.min_size = mod_1_2_threshold + 1; + one (&mod_1_4_threshold, ¶m); } } @@ -2318,6 +1302,72 @@ tune_preinv_divrem_1 (void) } +/* A non-zero MOD_1_UNNORM_THRESHOLD (or MOD_1_NORM_THRESHOLD) would imply + that udiv_qrnnd_preinv is worth using, but it seems most straightforward + to compare mpn_preinv_mod_1 and mpn_mod_1_div directly. */ + +void +tune_preinv_mod_1 (void) +{ + static struct param_t param; + speed_function_t mod_1; + const char *mod_1_name; + double t1, t2; + + /* Any native version of mpn_preinv_mod_1 is assumed to exist because it's + faster than mpn_mod_1. */ + if (HAVE_NATIVE_mpn_preinv_mod_1) + { + print_define_remark ("USE_PREINV_MOD_1", 1, "native"); + return; + } + + if (GMP_NAIL_BITS != 0) + { + print_define_remark ("USE_PREINV_MOD_1", 0, "no preinv with nails"); + return; + } + + /* If udiv_qrnnd_preinv is the only division method then of course + mpn_preinv_mod_1 should be used. */ + if (UDIV_PREINV_ALWAYS) + { + print_define_remark ("USE_PREINV_MOD_1", 1, "preinv always"); + return; + } + + /* If we've got an assembler version of mpn_mod_1, then compare against + that, not the mpn_mod_1_div generic C. */ + if (HAVE_NATIVE_mpn_mod_1) + { + mod_1 = speed_mpn_mod_1; + mod_1_name = "mpn_mod_1"; + } + else + { + mod_1 = speed_mpn_mod_1_div; + mod_1_name = "mpn_mod_1_div"; + } + + param.data_high = DATA_HIGH_LT_R; /* let mpn_mod_1 skip one division */ + s.size = 200; /* generous but not too big */ + s.r = randlimb_norm(); /* divisor */ + + t1 = tuneup_measure (speed_mpn_preinv_mod_1, ¶m, &s); + t2 = tuneup_measure (mod_1, ¶m, &s); + if (t1 == -1.0 || t2 == -1.0) + { + printf ("Oops, can't measure mpn_preinv_mod_1 and %s at %ld\n", + mod_1_name, (long) s.size); + abort (); + } + if (option_trace >= 1) + printf ("size=%ld, mpn_preinv_mod_1 %.9f, %s %.9f\n", + (long) s.size, t1, mod_1_name, t2); + + print_define_remark ("USE_PREINV_MOD_1", (mp_size_t) (t1 < t2), NULL); +} + void tune_divrem_2 (void) @@ -2361,16 +1411,6 @@ tune_divrem_2 (void) one (&divrem_2_threshold, ¶m); } -void -tune_div_qr_2 (void) -{ - static struct param_t param; - param.name = "DIV_QR_2_PI2_THRESHOLD"; - param.function = speed_mpn_div_qr_2n; - param.check_size = 500; - param.min_size = 4; - one (&div_qr_2_pi2_threshold, ¶m); -} /* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so tune for that. Its speed can differ on odd or even divisor, so take an @@ -2464,22 +1504,22 @@ tune_modexact_1_odd (void) static struct param_t param; mp_size_t thresh_lt, thresh_ge, average; -#if 0 /* Any native mpn_modexact_1_odd is assumed to incorporate all the speed of a full mpn_mod_1. */ if (HAVE_NATIVE_mpn_modexact_1_odd) { - print_define_remark ("BMOD_1_TO_MOD_1_THRESHOLD", MP_SIZE_T_MAX, "always bmod_1"); + print_define_remark ("MODEXACT_1_ODD_THRESHOLD", 0, "always (native)"); return; } -#endif - param.name = "BMOD_1_TO_MOD_1_THRESHOLD"; + ASSERT_ALWAYS (tuned_speed_mpn_mod_1 != NULL); + + param.name = "MODEXACT_1_ODD_THRESHOLD"; param.check_size = 256; param.min_size = 2; param.stop_factor = 1.5; - param.function = speed_mpn_modexact_1c_odd; - param.function2 = speed_mpn_mod_1_tune; + param.function = tuned_speed_mpn_mod_1; + param.function2 = speed_mpn_modexact_1c_odd; param.noprint = 1; s.r = randlimb_half () | 1; @@ -2514,10 +1554,10 @@ void tune_jacobi_base (void) { static struct param_t param; - double t1, t2, t3, t4; + double t1, t2, t3; int method; - s.size = GMP_LIMB_BITS * 3 / 4; + s.size = BITS_PER_MP_LIMB * 3 / 4; t1 = tuneup_measure (speed_mpn_jacobi_base_1, ¶m, &s); if (option_trace >= 1) @@ -2531,25 +1571,19 @@ tune_jacobi_base (void) if (option_trace >= 1) printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3); - t4 = tuneup_measure (speed_mpn_jacobi_base_4, ¶m, &s); - if (option_trace >= 1) - printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4); - - if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0) + if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0) { printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n", (long) s.size); abort (); } - if (t1 < t2 && t1 < t3 && t1 < t4) + if (t1 < t2 && t1 < t3) method = 1; - else if (t2 < t3 && t2 < t4) + else if (t2 < t3) method = 2; - else if (t3 < t4) - method = 3; else - method = 4; + method = 3; print_define ("JACOBI_BASE_METHOD", method); } @@ -2607,7 +1641,8 @@ speed_mpn_pre_set_str (struct speed_params *s) for (i = 0; i < s->size; i++) str[i] = s->xp[i] % base; - LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base); + wn = ((mp_size_t) (s->size / __mp_bases[base].chars_per_bit_exactly)) + / BITS_PER_MP_LIMB + 2; SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); /* use this during development to check wn is big enough */ @@ -2615,11 +1650,11 @@ speed_mpn_pre_set_str (struct speed_params *s) ASSERT_ALWAYS (mpn_set_str (wp, str, s->size, base) <= wn); */ - speed_operand_src (s, (mp_ptr) str, s->size/GMP_LIMB_BYTES); + speed_operand_src (s, (mp_ptr) str, s->size/BYTES_PER_MP_LIMB); speed_operand_dst (s, wp, wn); speed_cache_fill (s); - chars_per_limb = mp_bases[base].chars_per_limb; + chars_per_limb = __mp_bases[base].chars_per_limb; un = s->size / chars_per_limb + 1; powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un)); mpn_set_str_compute_powtab (powtab, powtab_mem, un, base); @@ -2641,6 +1676,8 @@ speed_mpn_pre_set_str (struct speed_params *s) void tune_set_str (void) { + static struct param_t param; + s.r = 10; /* decimal */ { static struct param_t param; @@ -2672,15 +1709,14 @@ tune_fft_mul (void) if (option_fft_max_size == 0) return; - param.table_name = "MUL_FFT_TABLE3"; + param.table_name = "MUL_FFT_TABLE"; param.threshold_name = "MUL_FFT_THRESHOLD"; param.p_threshold = &mul_fft_threshold; param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD"; param.p_modf_threshold = &mul_fft_modf_threshold; - param.first_size = MUL_TOOM33_THRESHOLD / 2; + param.first_size = MUL_TOOM3_THRESHOLD / 2; param.max_size = option_fft_max_size; - param.function = speed_mpn_fft_mul; - param.mul_modf_function = speed_mpn_mul_fft; + param.function = speed_mpn_mul_fft; param.mul_function = speed_mpn_mul_n; param.sqr = 0; fft (¶m); @@ -2695,38 +1731,19 @@ tune_fft_sqr (void) if (option_fft_max_size == 0) return; - param.table_name = "SQR_FFT_TABLE3"; + param.table_name = "SQR_FFT_TABLE"; param.threshold_name = "SQR_FFT_THRESHOLD"; param.p_threshold = &sqr_fft_threshold; param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD"; param.p_modf_threshold = &sqr_fft_modf_threshold; param.first_size = SQR_TOOM3_THRESHOLD / 2; param.max_size = option_fft_max_size; - param.function = speed_mpn_fft_sqr; - param.mul_modf_function = speed_mpn_mul_fft_sqr; - param.mul_function = speed_mpn_sqr; - param.sqr = 1; + param.function = speed_mpn_mul_fft_sqr; + param.mul_function = speed_mpn_sqr_n; + param.sqr = 0; fft (¶m); } -void -tune_fac_ui (void) -{ - static struct param_t param; - - param.function = speed_mpz_fac_ui_tune; - - param.name = "FAC_DSC_THRESHOLD"; - param.min_size = 70; - param.max_size = FAC_DSC_THRESHOLD_LIMIT; - one (&fac_dsc_threshold, ¶m); - - param.name = "FAC_ODD_THRESHOLD"; - param.min_size = 22; - param.stop_factor = 1.7; - param.min_is_always = 1; - one (&fac_odd_threshold, ¶m); -} void all (void) @@ -2794,81 +1811,52 @@ all (void) } printf ("\n"); - tune_divrem_1 (); - tune_mod_1 (); - tune_preinv_divrem_1 (); - tune_div_qr_1 (); -#if 0 - tune_divrem_2 (); -#endif - tune_div_qr_2 (); - tune_divexact_1 (); - tune_modexact_1_odd (); - printf("\n"); - - tune_mul_n (); - printf("\n"); - tune_mul (); printf("\n"); tune_sqr (); printf("\n"); - tune_mulmid (); + tune_mullow (); printf("\n"); - tune_mulmod_bnm1 (); - tune_sqrmod_bnm1 (); - printf("\n"); - - tune_fft_mul (); - printf("\n"); - - tune_fft_sqr (); - printf ("\n"); - - tune_mullo (); - printf("\n"); - - tune_dc_div (); - tune_dc_bdiv (); - - printf("\n"); - tune_invertappr (); - tune_invert (); - printf("\n"); - - tune_binvert (); - tune_redc (); - printf("\n"); - - tune_mu_div (); - tune_mu_bdiv (); - printf("\n"); - - tune_powm_sec (); + tune_sb_preinv (); + tune_dc (); + tune_powm (); printf("\n"); tune_matrix22_mul (); tune_hgcd (); - tune_hgcd_appr (); - tune_hgcd_reduce(); tune_gcd_dc (); tune_gcdext_dc (); +#if 0 + tune_gcd_accel (); +#endif tune_jacobi_base (); printf("\n"); + tune_divrem_1 (); + tune_mod_1 (); + tune_preinv_divrem_1 (); + tune_preinv_mod_1 (); + tune_divrem_2 (); + tune_divexact_1 (); + tune_modexact_1_odd (); + printf("\n"); + tune_get_str (); tune_set_str (); printf("\n"); - tune_fac_ui (); + tune_fft_mul (); printf("\n"); + tune_fft_sqr (); + printf ("\n"); + time (&end_time); printf ("/* Tuneup completed successfully, took %ld seconds */\n", - (long) (end_time - start_time)); + end_time - start_time); TMP_FREE; } diff --git a/gmp/tune/x86_64.asm b/gmp/tune/x86_64.asm index b7ec44c544..509909002a 100644 --- a/gmp/tune/x86_64.asm +++ b/gmp/tune/x86_64.asm @@ -1,32 +1,21 @@ dnl x86 pentium time stamp counter access routine. -dnl Copyright 1999, 2000, 2003-2005 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl Copyright 1999, 2000, 2003, 2004, 2005 Free Software Foundation, Inc. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') |