summaryrefslogtreecommitdiff
path: root/gmp/mpn/x86
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/x86')
-rw-r--r--gmp/mpn/x86/README27
-rw-r--r--gmp/mpn/x86/aors_n.asm50
-rw-r--r--gmp/mpn/x86/aorsmul_1.asm61
-rw-r--r--gmp/mpn/x86/atom/aorrlsh1_n.asm53
-rw-r--r--gmp/mpn/x86/atom/aorrlsh2_n.asm53
-rw-r--r--gmp/mpn/x86/atom/aorrlshC_n.asm156
-rw-r--r--gmp/mpn/x86/atom/aors_n.asm159
-rw-r--r--gmp/mpn/x86/atom/aorslshC_n.asm247
-rw-r--r--gmp/mpn/x86/atom/bdiv_q_1.asm35
-rw-r--r--gmp/mpn/x86/atom/cnd_add_n.asm113
-rw-r--r--gmp/mpn/x86/atom/cnd_sub_n.asm124
-rw-r--r--gmp/mpn/x86/atom/dive_1.asm34
-rw-r--r--gmp/mpn/x86/atom/gmp-mparam.h201
-rw-r--r--gmp/mpn/x86/atom/logops_n.asm151
-rw-r--r--gmp/mpn/x86/atom/lshift.asm218
-rw-r--r--gmp/mpn/x86/atom/lshiftc.asm159
-rw-r--r--gmp/mpn/x86/atom/mmx/copyd.asm34
-rw-r--r--gmp/mpn/x86/atom/mmx/copyi.asm34
-rw-r--r--gmp/mpn/x86/atom/mmx/hamdist.asm34
-rw-r--r--gmp/mpn/x86/atom/mod_34lsub1.asm34
-rw-r--r--gmp/mpn/x86/atom/mode1o.asm34
-rw-r--r--gmp/mpn/x86/atom/rshift.asm152
-rw-r--r--gmp/mpn/x86/atom/sse2/aorsmul_1.asm174
-rw-r--r--gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm34
-rw-r--r--gmp/mpn/x86/atom/sse2/divrem_1.asm34
-rw-r--r--gmp/mpn/x86/atom/sse2/mod_1_1.asm34
-rw-r--r--gmp/mpn/x86/atom/sse2/mod_1_4.asm34
-rw-r--r--gmp/mpn/x86/atom/sse2/mul_1.asm124
-rw-r--r--gmp/mpn/x86/atom/sse2/mul_basecase.asm501
-rw-r--r--gmp/mpn/x86/atom/sse2/popcount.asm35
-rw-r--r--gmp/mpn/x86/atom/sse2/sqr_basecase.asm634
-rw-r--r--gmp/mpn/x86/atom/sublsh1_n.asm34
-rw-r--r--gmp/mpn/x86/atom/sublsh2_n.asm57
-rw-r--r--gmp/mpn/x86/bd1/gmp-mparam.h208
-rw-r--r--gmp/mpn/x86/bd2/gmp-mparam.h209
-rw-r--r--gmp/mpn/x86/bdiv_dbm1c.asm71
-rw-r--r--gmp/mpn/x86/bdiv_q_1.asm208
-rw-r--r--gmp/mpn/x86/bobcat/gmp-mparam.h197
-rw-r--r--gmp/mpn/x86/cnd_aors_n.asm124
-rw-r--r--gmp/mpn/x86/copyd.asm45
-rw-r--r--gmp/mpn/x86/copyi.asm45
-rw-r--r--gmp/mpn/x86/core2/gmp-mparam.h200
-rw-r--r--gmp/mpn/x86/coreihwl/gmp-mparam.h210
-rw-r--r--gmp/mpn/x86/coreinhm/gmp-mparam.h224
-rw-r--r--gmp/mpn/x86/coreisbr/gmp-mparam.h203
-rw-r--r--gmp/mpn/x86/darwin.m482
-rw-r--r--gmp/mpn/x86/dive_1.asm35
-rw-r--r--gmp/mpn/x86/divrem_1.asm36
-rw-r--r--gmp/mpn/x86/divrem_2.asm35
-rw-r--r--gmp/mpn/x86/fat/com.c32
-rw-r--r--gmp/mpn/x86/fat/diveby3.c21
-rw-r--r--gmp/mpn/x86/fat/fat.c228
-rw-r--r--gmp/mpn/x86/fat/fat_entry.asm37
-rw-r--r--gmp/mpn/x86/fat/gcd_1.c25
-rw-r--r--gmp/mpn/x86/fat/gmp-mparam.h42
-rw-r--r--gmp/mpn/x86/fat/lshiftc.c32
-rw-r--r--gmp/mpn/x86/fat/mod_1.c32
-rw-r--r--gmp/mpn/x86/fat/mod_1_1.c36
-rw-r--r--gmp/mpn/x86/fat/mod_1_2.c36
-rw-r--r--gmp/mpn/x86/fat/mod_1_4.c36
-rw-r--r--gmp/mpn/x86/fat/mode1o.c25
-rw-r--r--gmp/mpn/x86/fat/mullo_basecase.c32
-rw-r--r--gmp/mpn/x86/fat/redc_1.c32
-rw-r--r--gmp/mpn/x86/fat/redc_2.c32
-rw-r--r--gmp/mpn/x86/geode/gmp-mparam.h141
-rw-r--r--gmp/mpn/x86/gmp-mparam.h31
-rw-r--r--gmp/mpn/x86/i486/gmp-mparam.h37
-rw-r--r--gmp/mpn/x86/k10/gmp-mparam.h211
-rw-r--r--gmp/mpn/x86/k6/README25
-rw-r--r--gmp/mpn/x86/k6/aors_n.asm35
-rw-r--r--gmp/mpn/x86/k6/aorsmul_1.asm56
-rwxr-xr-xgmp/mpn/x86/k6/cross.pl33
-rw-r--r--gmp/mpn/x86/k6/divrem_1.asm36
-rw-r--r--gmp/mpn/x86/k6/gcd_1.asm35
-rw-r--r--gmp/mpn/x86/k6/gmp-mparam.h202
-rw-r--r--gmp/mpn/x86/k6/k62mmx/copyd.asm33
-rw-r--r--gmp/mpn/x86/k6/k62mmx/lshift.asm33
-rw-r--r--gmp/mpn/x86/k6/k62mmx/rshift.asm33
-rw-r--r--gmp/mpn/x86/k6/mmx/com_n.asm (renamed from gmp/mpn/x86/k6/mmx/com.asm)41
-rw-r--r--gmp/mpn/x86/k6/mmx/dive_1.asm37
-rw-r--r--gmp/mpn/x86/k6/mmx/logops_n.asm35
-rw-r--r--gmp/mpn/x86/k6/mmx/lshift.asm33
-rw-r--r--gmp/mpn/x86/k6/mmx/popham.asm35
-rw-r--r--gmp/mpn/x86/k6/mmx/rshift.asm33
-rw-r--r--gmp/mpn/x86/k6/mod_34lsub1.asm35
-rw-r--r--gmp/mpn/x86/k6/mode1o.asm37
-rw-r--r--gmp/mpn/x86/k6/mul_1.asm47
-rw-r--r--gmp/mpn/x86/k6/mul_basecase.asm35
-rw-r--r--gmp/mpn/x86/k6/pre_mod_1.asm33
-rw-r--r--gmp/mpn/x86/k6/sqr_basecase.asm57
-rw-r--r--gmp/mpn/x86/k7/README25
-rw-r--r--gmp/mpn/x86/k7/addlsh1_n.asm196
-rw-r--r--gmp/mpn/x86/k7/aors_n.asm35
-rw-r--r--gmp/mpn/x86/k7/aorsmul_1.asm50
-rw-r--r--gmp/mpn/x86/k7/bdiv_q_1.asm244
-rw-r--r--gmp/mpn/x86/k7/dive_1.asm35
-rw-r--r--gmp/mpn/x86/k7/gcd_1.asm481
-rw-r--r--gmp/mpn/x86/k7/gmp-mparam.h292
-rw-r--r--gmp/mpn/x86/k7/invert_limb.asm193
-rw-r--r--gmp/mpn/x86/k7/mmx/com_n.asm (renamed from gmp/mpn/x86/k7/mmx/com.asm)39
-rw-r--r--gmp/mpn/x86/k7/mmx/copyd.asm33
-rw-r--r--gmp/mpn/x86/k7/mmx/copyi.asm33
-rw-r--r--gmp/mpn/x86/k7/mmx/divrem_1.asm49
-rw-r--r--gmp/mpn/x86/k7/mmx/lshift.asm35
-rw-r--r--gmp/mpn/x86/k7/mmx/mod_1.asm509
-rw-r--r--gmp/mpn/x86/k7/mmx/popham.asm37
-rw-r--r--gmp/mpn/x86/k7/mmx/rshift.asm35
-rw-r--r--gmp/mpn/x86/k7/mod_1_1.asm221
-rw-r--r--gmp/mpn/x86/k7/mod_1_4.asm260
-rw-r--r--gmp/mpn/x86/k7/mod_34lsub1.asm36
-rw-r--r--gmp/mpn/x86/k7/mode1o.asm37
-rw-r--r--gmp/mpn/x86/k7/mul_1.asm46
-rw-r--r--gmp/mpn/x86/k7/mul_basecase.asm35
-rw-r--r--gmp/mpn/x86/k7/sqr_basecase.asm47
-rw-r--r--gmp/mpn/x86/k7/sublsh1_n.asm173
-rw-r--r--gmp/mpn/x86/k8/gmp-mparam.h198
-rw-r--r--gmp/mpn/x86/lshift.asm48
-rw-r--r--gmp/mpn/x86/mmx/sec_tabselect.asm163
-rw-r--r--gmp/mpn/x86/mod_1.asm163
-rw-r--r--gmp/mpn/x86/mod_34lsub1.asm45
-rw-r--r--gmp/mpn/x86/mul_1.asm56
-rw-r--r--gmp/mpn/x86/mul_basecase.asm46
-rw-r--r--gmp/mpn/x86/nano/gmp-mparam.h162
-rw-r--r--gmp/mpn/x86/p6/README27
-rw-r--r--gmp/mpn/x86/p6/aors_n.asm37
-rw-r--r--gmp/mpn/x86/p6/aorsmul_1.asm53
-rw-r--r--gmp/mpn/x86/p6/bdiv_q_1.asm286
-rw-r--r--gmp/mpn/x86/p6/copyd.asm33
-rw-r--r--gmp/mpn/x86/p6/dive_1.asm37
-rw-r--r--gmp/mpn/x86/p6/gcd_1.asm156
-rw-r--r--gmp/mpn/x86/p6/gmp-mparam.h240
-rw-r--r--gmp/mpn/x86/p6/lshsub_n.asm29
-rw-r--r--gmp/mpn/x86/p6/mmx/divrem_1.asm35
-rw-r--r--gmp/mpn/x86/p6/mmx/gmp-mparam.h255
-rw-r--r--gmp/mpn/x86/p6/mmx/lshift.asm33
-rw-r--r--gmp/mpn/x86/p6/mmx/popham.asm33
-rw-r--r--gmp/mpn/x86/p6/mmx/rshift.asm33
-rw-r--r--gmp/mpn/x86/p6/mod_1.asm472
-rw-r--r--gmp/mpn/x86/p6/mod_34lsub1.asm35
-rw-r--r--gmp/mpn/x86/p6/mode1o.asm39
-rw-r--r--gmp/mpn/x86/p6/mul_basecase.asm35
-rw-r--r--gmp/mpn/x86/p6/p3mmx/popham.asm33
-rw-r--r--gmp/mpn/x86/p6/sqr_basecase.asm45
-rw-r--r--gmp/mpn/x86/p6/sse2/addmul_1.asm33
-rw-r--r--gmp/mpn/x86/p6/sse2/gmp-mparam.h233
-rw-r--r--gmp/mpn/x86/p6/sse2/mod_1_1.asm34
-rw-r--r--gmp/mpn/x86/p6/sse2/mod_1_4.asm34
-rw-r--r--gmp/mpn/x86/p6/sse2/mul_1.asm33
-rw-r--r--gmp/mpn/x86/p6/sse2/mul_basecase.asm33
-rw-r--r--gmp/mpn/x86/p6/sse2/popcount.asm33
-rw-r--r--gmp/mpn/x86/p6/sse2/sqr_basecase.asm33
-rw-r--r--gmp/mpn/x86/p6/sse2/submul_1.asm33
-rw-r--r--gmp/mpn/x86/pentium/README27
-rw-r--r--gmp/mpn/x86/pentium/aors_n.asm40
-rw-r--r--gmp/mpn/x86/pentium/aorsmul_1.asm33
-rw-r--r--gmp/mpn/x86/pentium/bdiv_q_1.asm260
-rw-r--r--gmp/mpn/x86/pentium/com_n.asm (renamed from gmp/mpn/x86/pentium/com.asm)39
-rw-r--r--gmp/mpn/x86/pentium/copyd.asm33
-rw-r--r--gmp/mpn/x86/pentium/copyi.asm33
-rw-r--r--gmp/mpn/x86/pentium/dive_1.asm35
-rw-r--r--gmp/mpn/x86/pentium/gmp-mparam.h38
-rw-r--r--gmp/mpn/x86/pentium/hamdist.asm33
-rw-r--r--gmp/mpn/x86/pentium/logops_n.asm33
-rw-r--r--gmp/mpn/x86/pentium/lshift.asm36
-rw-r--r--gmp/mpn/x86/pentium/mmx/gmp-mparam.h194
-rw-r--r--gmp/mpn/x86/pentium/mmx/hamdist.asm33
-rw-r--r--gmp/mpn/x86/pentium/mmx/lshift.asm35
-rw-r--r--gmp/mpn/x86/pentium/mmx/mul_1.asm35
-rw-r--r--gmp/mpn/x86/pentium/mmx/rshift.asm33
-rw-r--r--gmp/mpn/x86/pentium/mod_1.asm454
-rw-r--r--gmp/mpn/x86/pentium/mod_34lsub1.asm35
-rw-r--r--gmp/mpn/x86/pentium/mode1o.asm37
-rw-r--r--gmp/mpn/x86/pentium/mul_1.asm33
-rw-r--r--gmp/mpn/x86/pentium/mul_2.asm33
-rw-r--r--gmp/mpn/x86/pentium/mul_basecase.asm35
-rw-r--r--gmp/mpn/x86/pentium/popcount.asm33
-rw-r--r--gmp/mpn/x86/pentium/rshift.asm36
-rw-r--r--gmp/mpn/x86/pentium/sqr_basecase.asm35
-rw-r--r--gmp/mpn/x86/pentium4/README25
-rw-r--r--gmp/mpn/x86/pentium4/copyd.asm36
-rw-r--r--gmp/mpn/x86/pentium4/copyi.asm36
-rw-r--r--gmp/mpn/x86/pentium4/mmx/lshift.asm33
-rw-r--r--gmp/mpn/x86/pentium4/mmx/popham.asm35
-rw-r--r--gmp/mpn/x86/pentium4/mmx/rshift.asm33
-rw-r--r--gmp/mpn/x86/pentium4/sse2/add_n.asm79
-rw-r--r--gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm76
-rw-r--r--gmp/mpn/x86/pentium4/sse2/addmul_1.asm64
-rw-r--r--gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm141
-rw-r--r--gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm233
-rw-r--r--gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm95
-rw-r--r--gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm114
-rw-r--r--gmp/mpn/x86/pentium4/sse2/dive_1.asm49
-rw-r--r--gmp/mpn/x86/pentium4/sse2/divrem_1.asm36
-rw-r--r--gmp/mpn/x86/pentium4/sse2/gmp-mparam.h252
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mod_1.asm391
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mod_1_1.asm166
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mod_1_4.asm269
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm35
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mode1o.asm49
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mul_1.asm64
-rw-r--r--gmp/mpn/x86/pentium4/sse2/mul_basecase.asm27
-rw-r--r--gmp/mpn/x86/pentium4/sse2/popcount.asm76
-rw-r--r--gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm35
-rw-r--r--gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm29
-rw-r--r--gmp/mpn/x86/pentium4/sse2/sub_n.asm82
-rw-r--r--gmp/mpn/x86/pentium4/sse2/submul_1.asm176
-rw-r--r--gmp/mpn/x86/rshift.asm48
-rw-r--r--gmp/mpn/x86/sec_tabselect.asm115
-rw-r--r--gmp/mpn/x86/sqr_basecase.asm37
-rwxr-xr-xgmp/mpn/x86/t-zdisp.sh33
-rwxr-xr-xgmp/mpn/x86/t-zdisp2.pl41
-rw-r--r--gmp/mpn/x86/udiv.asm33
-rw-r--r--gmp/mpn/x86/umul.asm33
-rw-r--r--gmp/mpn/x86/x86-defs.m487
214 files changed, 4630 insertions, 14803 deletions
diff --git a/gmp/mpn/x86/README b/gmp/mpn/x86/README
index 8d7ac9080d..883db227d2 100644
--- a/gmp/mpn/x86/README
+++ b/gmp/mpn/x86/README
@@ -1,30 +1,19 @@
-Copyright 1999-2002 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
diff --git a/gmp/mpn/x86/aors_n.asm b/gmp/mpn/x86/aors_n.asm
index 5d359f59b6..c8969995c8 100644
--- a/gmp/mpn/x86/aors_n.asm
+++ b/gmp/mpn/x86/aors_n.asm
@@ -1,42 +1,32 @@
dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C P5 3.375
-C P6 3.125
-C K6 3.5
-C K7 2.25
-C P4 8.75
+C P5: 3.375
+C P6: 3.125
+C K6: 3.5
+C K7: 2.25
+C P4: 8.75
ifdef(`OPERATION_add_n',`
@@ -109,7 +99,7 @@ L(0a): leal (%eax,%eax,8),%eax
C possible to simplify.
pushl %ebp FRAME_pushl()
movl PARAM_CARRY,%ebp
- shrl %ebp C shift bit 0 into carry
+ shrl $1,%ebp C shift bit 0 into carry
popl %ebp FRAME_popl()
jmp *%eax C jump into loop
@@ -158,7 +148,7 @@ L(0b): leal (%eax,%eax,8),%eax
L(oopgo):
pushl %ebp FRAME_pushl()
movl PARAM_CARRY,%ebp
- shrl %ebp C shift bit 0 into carry
+ shrl $1,%ebp C shift bit 0 into carry
popl %ebp FRAME_popl()
ALIGN(16)
diff --git a/gmp/mpn/x86/aorsmul_1.asm b/gmp/mpn/x86/aorsmul_1.asm
index 54a8905441..b4db427657 100644
--- a/gmp/mpn/x86/aorsmul_1.asm
+++ b/gmp/mpn/x86/aorsmul_1.asm
@@ -1,51 +1,40 @@
dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
dnl limb and add the result to a second limb vector.
-dnl Copyright 1992, 1994, 1997, 1999-2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software
+dnl Foundation, Inc.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5 14.75
-C P6 model 0-8,10-12 7.5
-C P6 model 9 (Banias) 6.7
-C P6 model 13 (Dothan) 6.75
-C P4 model 0 (Willamette) 24.0
-C P4 model 1 (?) 24.0
-C P4 model 2 (Northwood) 24.0
+
+C cycles/limb
+C P5: 14.75
+C P6 model 0-8,10-12) 7.5
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan) 6.75
+C P4 model 0 (Willamette) 24.0
+C P4 model 1 (?) 24.0
+C P4 model 2 (Northwood) 24.0
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C Intel Atom
-C AMD K6 12.5
-C AMD K7 5.25
-C AMD K8
-C AMD K10
+C K6: 12.5
+C K7: 5.25
+C K8:
ifdef(`OPERATION_addmul_1',`
diff --git a/gmp/mpn/x86/atom/aorrlsh1_n.asm b/gmp/mpn/x86/atom/aorrlsh1_n.asm
deleted file mode 100644
index cd1a650022..0000000000
--- a/gmp/mpn/x86/atom/aorrlsh1_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-define(RSH, 31)
-
-ifdef(`OPERATION_addlsh1_n', `
- define(M4_inst, adc)
- define(M4_opp, sub)
- define(M4_function, mpn_addlsh1_n)
- define(M4_function_c, mpn_addlsh1_nc)
-',`ifdef(`OPERATION_rsblsh1_n', `
- define(M4_inst, sbb)
- define(M4_opp, add)
- define(M4_function, mpn_rsblsh1_n)
- define(M4_function_c, mpn_rsblsh1_nc)
-',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
-
-include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86/atom/aorrlsh2_n.asm b/gmp/mpn/x86/atom/aorrlsh2_n.asm
deleted file mode 100644
index 10f4419de9..0000000000
--- a/gmp/mpn/x86/atom/aorrlsh2_n.asm
+++ /dev/null
@@ -1,53 +0,0 @@
-dnl Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 30)
-
-ifdef(`OPERATION_addlsh2_n', `
- define(M4_inst, adcl)
- define(M4_opp, subl)
- define(M4_function, mpn_addlsh2_n)
- define(M4_function_c, mpn_addlsh2_nc)
-',`ifdef(`OPERATION_rsblsh2_n', `
- define(M4_inst, sbbl)
- define(M4_opp, addl)
- define(M4_function, mpn_rsblsh2_n)
- define(M4_function_c, mpn_rsblsh2_nc)
-',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
-
-include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86/atom/aorrlshC_n.asm b/gmp/mpn/x86/atom/aorrlshC_n.asm
deleted file mode 100644
index 71cfe490d6..0000000000
--- a/gmp/mpn/x86/atom/aorrlshC_n.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t carry);
-C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_signed_limb_t carry);
-
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 6
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CORB, 20)
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_DBLD, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBP,`PARAM_DBLD')
-define(SAVE_VP,`PARAM_SRC')
-define(SAVE_UP,`PARAM_DST')
-
-define(M, eval(m4_lshift(1,LSH)))
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebx')
-
-ASM_START()
- TEXT
- ALIGN(8)
-
-PROLOGUE(M4_function_c)
-deflit(`FRAME',0)
- movl PARAM_CORB, %eax
- movl %eax, %edx
- shr $LSH, %edx
- andl $1, %edx
- M4_opp %edx, %eax
- jmp L(start_nc)
-EPILOGUE()
-
-PROLOGUE(M4_function)
-deflit(`FRAME',0)
-
- xor %eax, %eax
- xor %edx, %edx
-L(start_nc):
- push rp FRAME_pushl()
-
- mov PARAM_SIZE, %ecx C size
- mov PARAM_DST, rp
- mov up, SAVE_UP
- incl %ecx C size + 1
- mov PARAM_SRC, up
- mov vp, SAVE_VP
- shr %ecx C (size+1)\2
- mov PARAM_DBLD, vp
- mov %ebp, SAVE_EBP
- mov %ecx, VAR_COUNT
- jnc L(entry) C size odd
-
- shr %edx C size even
- mov (vp), %ecx
- lea 4(vp), vp
- lea (%eax,%ecx,M), %edx
- mov %ecx, %eax
- lea -4(up), up
- lea -4(rp), rp
- jmp L(enteven)
-
- ALIGN(16)
-L(oop):
- lea (%eax,%ecx,M), %ebp
- shr $RSH, %ecx
- mov 4(vp), %eax
- shr %edx
- lea 8(vp), vp
- M4_inst (up), %ebp
- lea (%ecx,%eax,M), %edx
- mov %ebp, (rp)
-L(enteven):
- M4_inst 4(up), %edx
- lea 8(up), up
- mov %edx, 4(rp)
- adc %edx, %edx
- shr $RSH, %eax
- lea 8(rp), rp
-L(entry):
- mov (vp), %ecx
- decl VAR_COUNT
- jnz L(oop)
-
- lea (%eax,%ecx,M), %ebp
- shr $RSH, %ecx
- shr %edx
- mov SAVE_VP, vp
- M4_inst (up), %ebp
- mov %ecx, %eax
- mov SAVE_UP, up
- M4_inst $0, %eax
- mov %ebp, (rp)
- mov SAVE_EBP, %ebp
- pop rp FRAME_popl()
- ret
-EPILOGUE()
-
-ASM_END()
diff --git a/gmp/mpn/x86/atom/aors_n.asm b/gmp/mpn/x86/atom/aors_n.asm
deleted file mode 100644
index 45ec287c3a..0000000000
--- a/gmp/mpn/x86/atom/aors_n.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 3
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-ifdef(`OPERATION_add_n', `
- define(M4_inst, adcl)
- define(M4_function_n, mpn_add_n)
- define(M4_function_nc, mpn_add_nc)
- define(M4_description, add)
-',`ifdef(`OPERATION_sub_n', `
- define(M4_inst, sbbl)
- define(M4_function_n, mpn_sub_n)
- define(M4_function_nc, mpn_sub_nc)
- define(M4_description, subtract)
-',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t carry);
-C
-C Calculate src1,size M4_description src2,size, and store the result in
-C dst,size. The return value is the carry bit from the top of the result (1
-C or 0).
-C
-C The _nc version accepts 1 or 0 for an initial carry into the low limb of
-C the calculation. Note values other than 1 or 0 here will lead to garbage
-C results.
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_SRC2, 12)
-defframe(PARAM_SRC1, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_RP,`PARAM_SIZE')
-define(SAVE_VP,`PARAM_SRC1')
-define(SAVE_UP,`PARAM_DST')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebx')
-define(`cy', `%ecx')
-define(`r1', `%ecx')
-define(`r2', `%edx')
-
-ASM_START()
- TEXT
- ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(M4_function_n)
- xor cy, cy C carry
-L(start):
- mov PARAM_SIZE, %eax C size
- mov rp, SAVE_RP
- mov PARAM_DST, rp
- mov up, SAVE_UP
- mov PARAM_SRC1, up
- shr %eax C size >> 1
- mov vp, SAVE_VP
- mov PARAM_SRC2, vp
- jz L(one) C size == 1
- jc L(three) C size % 2 == 1
-
- shr cy
- mov (up), r2
- lea 4(up), up
- lea 4(vp), vp
- lea -4(rp), rp
- jmp L(entry)
-L(one):
- shr cy
- mov (up), r1
- jmp L(end)
-L(three):
- shr cy
- mov (up), r1
-
- ALIGN(16)
-L(oop):
- M4_inst (vp), r1
- lea 8(up), up
- mov -4(up), r2
- lea 8(vp), vp
- mov r1, (rp)
-L(entry):
- M4_inst -4(vp), r2
- lea 8(rp), rp
- dec %eax
- mov (up), r1
- mov r2, -4(rp)
- jnz L(oop)
-
-L(end): C %eax is zero here
- mov SAVE_UP, up
- M4_inst (vp), r1
- mov SAVE_VP, vp
- mov r1, (rp)
- adc %eax, %eax
- mov SAVE_RP, rp
- ret
-EPILOGUE()
-
-PROLOGUE(M4_function_nc)
- mov PARAM_CARRY, cy C carry
- jmp L(start)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/aorslshC_n.asm b/gmp/mpn/x86/atom/aorslshC_n.asm
deleted file mode 100644
index 75ace65e51..0000000000
--- a/gmp/mpn/x86/atom/aorslshC_n.asm
+++ /dev/null
@@ -1,247 +0,0 @@
-dnl Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
-C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t carry);
-C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
-C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_signed_limb_t borrow);
-
-defframe(PARAM_CORB, 16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size,);
-C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t carry);
-C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size,);
-C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t borrow);
-
-C if src1 == dst, _ip1 is used
-
-C cycles/limb
-C dst!=src1,src2 dst==src1
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 7 6
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(GPARAM_CORB, 20)
-defframe(GPARAM_SIZE, 16)
-defframe(GPARAM_SRC2, 12)
-
-dnl re-use parameter space
-define(SAVE_EBP,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_UP,`PARAM_DST')
-
-define(M, eval(m4_lshift(1,LSH)))
-define(`rp', `%edi')
-define(`up', `%esi')
-
-ASM_START()
- TEXT
- ALIGN(8)
-
-PROLOGUE(M4_ip_function_c)
-deflit(`FRAME',0)
- movl PARAM_CORB, %ecx
- movl %ecx, %edx
- shr $LSH, %edx
- andl $1, %edx
- M4_opp %edx, %ecx
- jmp L(start_nc)
-EPILOGUE()
-
-PROLOGUE(M4_ip_function)
-deflit(`FRAME',0)
-
- xor %ecx, %ecx
- xor %edx, %edx
-L(start_nc):
- push rp FRAME_pushl()
- mov PARAM_DST, rp
- mov up, SAVE_UP
- mov PARAM_SRC, up
- mov %ebx, SAVE_EBX
- mov PARAM_SIZE, %ebx C size
-L(inplace):
- incl %ebx C size + 1
- shr %ebx C (size+1)\2
- mov %ebp, SAVE_EBP
- jnc L(entry) C size odd
-
- add %edx, %edx C size even
- mov %ecx, %ebp
- mov (up), %ecx
- lea -4(rp), rp
- lea (%ebp,%ecx,M), %eax
- lea 4(up), up
- jmp L(enteven)
-
- ALIGN(16)
-L(oop):
- lea (%ecx,%eax,M), %ebp
- shr $RSH, %eax
- mov 4(up), %ecx
- add %edx, %edx
- lea 8(up), up
- M4_inst %ebp, (rp)
- lea (%eax,%ecx,M), %eax
-
-L(enteven):
- M4_inst %eax, 4(rp)
- lea 8(rp), rp
-
- sbb %edx, %edx
- shr $RSH, %ecx
-
-L(entry):
- mov (up), %eax
- decl %ebx
- jnz L(oop)
-
- lea (%ecx,%eax,M), %ebp
- shr $RSH, %eax
- shr %edx
- M4_inst %ebp, (rp)
- mov SAVE_UP, up
- adc $0, %eax
- mov SAVE_EBP, %ebp
- mov SAVE_EBX, %ebx
- pop rp FRAME_popl()
- ret
-EPILOGUE()
-
-PROLOGUE(M4_function_c)
-deflit(`FRAME',0)
- movl GPARAM_CORB, %ecx
- movl %ecx, %edx
- shr $LSH, %edx
- andl $1, %edx
- M4_opp %edx, %ecx
- jmp L(generic_nc)
-EPILOGUE()
-
-PROLOGUE(M4_function)
-deflit(`FRAME',0)
-
- xor %ecx, %ecx
- xor %edx, %edx
-L(generic_nc):
- push rp FRAME_pushl()
- mov PARAM_DST, rp
- mov up, SAVE_UP
- mov PARAM_SRC, up
- cmp rp, up
- mov %ebx, SAVE_EBX
- jne L(general)
- mov GPARAM_SIZE, %ebx C size
- mov GPARAM_SRC2, up
- jmp L(inplace)
-
-L(general):
- mov GPARAM_SIZE, %eax C size
- mov %ebx, SAVE_EBX
- incl %eax C size + 1
- mov up, %ebx C vp
- mov GPARAM_SRC2, up C up
- shr %eax C (size+1)\2
- mov %ebp, SAVE_EBP
- mov %eax, GPARAM_SIZE
- jnc L(entry2) C size odd
-
- add %edx, %edx C size even
- mov %ecx, %ebp
- mov (up), %ecx
- lea -4(rp), rp
- lea -4(%ebx), %ebx
- lea (%ebp,%ecx,M), %eax
- lea 4(up), up
- jmp L(enteven2)
-
- ALIGN(16)
-L(oop2):
- lea (%ecx,%eax,M), %ebp
- shr $RSH, %eax
- mov 4(up), %ecx
- add %edx, %edx
- lea 8(up), up
- mov (%ebx), %edx
- M4_inst %ebp, %edx
- lea (%eax,%ecx,M), %eax
- mov %edx, (rp)
-L(enteven2):
- mov 4(%ebx), %edx
- lea 8(%ebx), %ebx
- M4_inst %eax, %edx
- mov %edx, 4(rp)
- sbb %edx, %edx
- shr $RSH, %ecx
- lea 8(rp), rp
-L(entry2):
- mov (up), %eax
- decl GPARAM_SIZE
- jnz L(oop2)
-
- lea (%ecx,%eax,M), %ebp
- shr $RSH, %eax
- shr %edx
- mov (%ebx), %edx
- M4_inst %ebp, %edx
- mov %edx, (rp)
- mov SAVE_UP, up
- adc $0, %eax
- mov SAVE_EBP, %ebp
- mov SAVE_EBX, %ebx
- pop rp FRAME_popl()
- ret
-EPILOGUE()
-
-ASM_END()
diff --git a/gmp/mpn/x86/atom/bdiv_q_1.asm b/gmp/mpn/x86/atom/bdiv_q_1.asm
deleted file mode 100644
index 31e908ec44..0000000000
--- a/gmp/mpn/x86/atom/bdiv_q_1.asm
+++ /dev/null
@@ -1,35 +0,0 @@
-dnl Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel
-dnl division by 1-limb divisor, returning quotient only.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-include_mpn(`x86/pentium/bdiv_q_1.asm')
diff --git a/gmp/mpn/x86/atom/cnd_add_n.asm b/gmp/mpn/x86/atom/cnd_add_n.asm
deleted file mode 100644
index 50bf2ad64b..0000000000
--- a/gmp/mpn/x86/atom/cnd_add_n.asm
+++ /dev/null
@@ -1,113 +0,0 @@
-dnl X86 mpn_cnd_add_n optimised for Intel Atom.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) ?
-C P4 model 3-4 (Prescott) ?
-C Intel atom 4.67
-C AMD K6 ?
-C AMD K7 ?
-C AMD K8 ?
-
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebp')
-define(`n', `%ecx')
-define(`cnd', `20(%esp)')
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_cnd_add_n)
- push %edi
- push %esi
- push %ebx
- push %ebp
-
- mov cnd, %eax C make cnd into a mask (1)
- mov 24(%esp), rp
- neg %eax C make cnd into a mask (1)
- mov 28(%esp), up
- sbb %eax, %eax C make cnd into a mask (1)
- mov 32(%esp), vp
- mov %eax, cnd C make cnd into a mask (1)
- mov 36(%esp), n
-
- xor %edx, %edx
-
- shr $1, n
- jnc L(top)
-
- mov 0(vp), %eax
- and cnd, %eax
- lea 4(vp), vp
- add 0(up), %eax
- lea 4(rp), rp
- lea 4(up), up
- sbb %edx, %edx
- mov %eax, -4(rp)
- inc n
- dec n
- je L(end)
-
-L(top): sbb %edx, %edx
- mov 0(vp), %eax
- and cnd, %eax
- lea 8(vp), vp
- lea 8(rp), rp
- mov -4(vp), %ebx
- and cnd, %ebx
- add %edx, %edx
- adc 0(up), %eax
- lea 8(up), up
- mov %eax, -8(rp)
- adc -4(up), %ebx
- dec n
- mov %ebx, -4(rp)
- jne L(top)
-
-L(end): mov $0, %eax
- adc %eax, %eax
-
- pop %ebp
- pop %ebx
- pop %esi
- pop %edi
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/cnd_sub_n.asm b/gmp/mpn/x86/atom/cnd_sub_n.asm
deleted file mode 100644
index 221bedca37..0000000000
--- a/gmp/mpn/x86/atom/cnd_sub_n.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl X86 mpn_cnd_sub_n optimised for Intel Atom.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) ?
-C P4 model 3-4 (Prescott) ?
-C Intel atom 5.67
-C AMD K6 ?
-C AMD K7 ?
-C AMD K8 ?
-
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebp')
-define(`n', `%ecx')
-define(`cnd', `20(%esp)')
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_cnd_sub_n)
- push %edi
- push %esi
- push %ebx
- push %ebp
-
- mov cnd, %eax C make cnd into a mask (1)
- mov 24(%esp), rp
- neg %eax C make cnd into a mask (1)
- mov 28(%esp), up
- sbb %eax, %eax C make cnd into a mask (1)
- mov 32(%esp), vp
- mov %eax, cnd C make cnd into a mask (1)
- mov 36(%esp), n
-
- xor %edx, %edx
-
- inc n
- shr n
- jnc L(ent)
-
- mov 0(vp), %eax
- and cnd, %eax
- lea 4(vp), vp
- mov 0(up), %edx
- sub %eax, %edx
- lea 4(rp), rp
- lea 4(up), up
- mov %edx, -4(rp)
- sbb %edx, %edx C save cy
-
-L(ent): mov 0(vp), %ebx
- and cnd, %ebx
- add %edx, %edx C restore cy
- mov 0(up), %edx
- dec n
- je L(end)
-
-L(top): sbb %ebx, %edx
- mov 4(vp), %eax
- mov %edx, 0(rp)
- sbb %edx, %edx C save cy
- mov 8(vp), %ebx
- lea 8(up), up
- and cnd, %ebx
- and cnd, %eax
- add %edx, %edx C restore cy
- mov -4(up), %edx
- lea 8(rp), rp
- sbb %eax, %edx
- mov %edx, -4(rp)
- dec n
- mov 0(up), %edx
- lea 8(vp), vp
- jne L(top)
-
-L(end): sbb %ebx, %edx
- mov %edx, 0(rp)
-
- mov $0, %eax
- adc %eax, %eax
-
- pop %ebp
- pop %ebx
- pop %esi
- pop %edi
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/dive_1.asm b/gmp/mpn/x86/atom/dive_1.asm
deleted file mode 100644
index 71036a15a4..0000000000
--- a/gmp/mpn/x86/atom/dive_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_divexact_1 -- mpn by limb exact division.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_divexact_1)
-include_mpn(`x86/pentium/dive_1.asm')
diff --git a/gmp/mpn/x86/atom/gmp-mparam.h b/gmp/mpn/x86/atom/gmp-mparam.h
deleted file mode 100644
index 45df12806c..0000000000
--- a/gmp/mpn/x86/atom/gmp-mparam.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1667 MHz Pineview (Atom D510) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 3
-#define MOD_1_UNNORM_THRESHOLD 5
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 4
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 31
-
-#define MUL_TOOM22_THRESHOLD 20
-#define MUL_TOOM33_THRESHOLD 74
-#define MUL_TOOM44_THRESHOLD 178
-#define MUL_TOOM6H_THRESHOLD 270
-#define MUL_TOOM8H_THRESHOLD 399
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 127
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 30
-#define SQR_TOOM3_THRESHOLD 105
-#define SQR_TOOM4_THRESHOLD 178
-#define SQR_TOOM6_THRESHOLD 303
-#define SQR_TOOM8_THRESHOLD 527
-
-#define MULMID_TOOM42_THRESHOLD 54
-
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 18
-
-#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 380, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
- { 47, 8}, { 95,10}, { 31, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 127, 8}, { 255, 9}, { 135,10}, { 79, 9}, \
- { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
- { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \
- { 143, 9}, { 287, 8}, { 575,10}, { 159,11}, \
- { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
- { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \
- { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
- { 671,10}, { 351, 9}, { 703,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \
- { 223,10}, { 447,12}, { 127,11}, { 255,10}, \
- { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 319,10}, { 671,11}, { 351,10}, { 703,12}, \
- { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
- { 831,11}, { 447,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 735,12}, { 383,11}, { 831,12}, \
- { 447,11}, { 959,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1151,12}, { 703,11}, \
- { 1471,13}, { 383,12}, { 831,11}, { 1663,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
- { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1663,13}, { 895,12}, { 1919,14}, { 511,13}, \
- { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
- { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \
- { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \
- { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \
- { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 150
-#define MUL_FFT_THRESHOLD 4544
-
-#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
- { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \
- { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
- { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
- { 255,10}, { 79, 9}, { 159, 8}, { 319,10}, \
- { 95, 9}, { 191,11}, { 63,10}, { 127, 9}, \
- { 255, 8}, { 511, 9}, { 271,10}, { 143, 9}, \
- { 287, 8}, { 575, 9}, { 303, 8}, { 607,10}, \
- { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
- { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,10}, { 287, 9}, \
- { 575,10}, { 303, 9}, { 607,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671,10}, { 351, 9}, \
- { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \
- { 415,11}, { 223,10}, { 447,12}, { 127,11}, \
- { 255,10}, { 543,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,11}, { 351,10}, { 703,12}, \
- { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
- { 831,11}, { 479,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \
- { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \
- { 831,12}, { 447,11}, { 959,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1407,13}, \
- { 383,12}, { 831,11}, { 1663,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1215,13}, { 639,12}, \
- { 1471,13}, { 767,12}, { 1663,13}, { 895,12}, \
- { 1791,14}, { 511,13}, { 1023,12}, { 2111,13}, \
- { 1151,12}, { 2431,13}, { 1407,14}, { 767,13}, \
- { 1663,12}, { 3455,13}, { 1791,15}, { 511,14}, \
- { 1023,13}, { 2431,14}, { 1279,13}, { 2943,12}, \
- { 5887,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 151
-#define SQR_FFT_THRESHOLD 2880
-
-#define MULLO_BASECASE_THRESHOLD 6
-#define MULLO_DC_THRESHOLD 48
-#define MULLO_MUL_N_THRESHOLD 8907
-
-#define DC_DIV_QR_THRESHOLD 59
-#define DC_DIVAPPR_Q_THRESHOLD 250
-#define DC_BDIV_QR_THRESHOLD 59
-#define DC_BDIV_Q_THRESHOLD 169
-
-#define INV_MULMOD_BNM1_THRESHOLD 38
-#define INV_NEWTON_THRESHOLD 246
-#define INV_APPR_THRESHOLD 246
-
-#define BINV_NEWTON_THRESHOLD 276
-#define REDC_1_TO_REDC_N_THRESHOLD 67
-
-#define MU_DIV_QR_THRESHOLD 1334
-#define MU_DIVAPPR_Q_THRESHOLD 1442
-#define MUPI_DIV_QR_THRESHOLD 114
-#define MU_BDIV_QR_THRESHOLD 1142
-#define MU_BDIV_Q_THRESHOLD 1334
-
-#define POWM_SEC_TABLE 1,22,98,416,1378
-
-#define MATRIX22_STRASSEN_THRESHOLD 13
-#define HGCD_THRESHOLD 133
-#define HGCD_APPR_THRESHOLD 169
-#define HGCD_REDUCE_THRESHOLD 2479
-#define GCD_DC_THRESHOLD 460
-#define GCDEXT_DC_THRESHOLD 342
-#define JACOBI_BASE_METHOD 3
-
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 23
-#define SET_STR_DC_THRESHOLD 321
-#define SET_STR_PRECOMPUTE_THRESHOLD 1099
-
-#define FAC_DSC_THRESHOLD 198
-#define FAC_ODD_THRESHOLD 34
diff --git a/gmp/mpn/x86/atom/logops_n.asm b/gmp/mpn/x86/atom/logops_n.asm
deleted file mode 100644
index 3cb6d7310c..0000000000
--- a/gmp/mpn/x86/atom/logops_n.asm
+++ /dev/null
@@ -1,151 +0,0 @@
-dnl Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C op nop opn
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 3 3.5 3.5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-define(M4_choose_op,
-`ifdef(`OPERATION_$1',`
-define(`M4_function', `mpn_$1')
-define(`M4_want_pre', `$4')
-define(`M4_inst', `$3')
-define(`M4_want_post',`$2')
-')')
-define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
-define(M4post,`ifelse(M4_want_post,yes,`$1')')
-
-M4_choose_op( and_n, , andl, )
-M4_choose_op( andn_n, , andl, yes)
-M4_choose_op( nand_n, yes, andl, )
-M4_choose_op( ior_n, , orl, )
-M4_choose_op( iorn_n, , orl, yes)
-M4_choose_op( nior_n, yes, orl, )
-M4_choose_op( xor_n, , xorl, )
-M4_choose_op( xnor_n, yes, xorl, )
-
-ifdef(`M4_function',,
-`m4_error(`Unrecognised or undefined OPERATION symbol
-')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size);
-C
-
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_SRC1, 12)
-defframe(PARAM_SRC2, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_RP,`PARAM_SIZE')
-define(SAVE_VP,`PARAM_SRC1')
-define(SAVE_UP,`PARAM_DST')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebx')
-define(`cnt', `%eax')
-define(`r1', `%ecx')
-define(`r2', `%edx')
-
-ASM_START()
- TEXT
- ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(M4_function)
- mov PARAM_SIZE, cnt C size
- mov rp, SAVE_RP
- mov PARAM_DST, rp
- mov up, SAVE_UP
- mov PARAM_SRC1, up
- shr cnt C size >> 1
- mov vp, SAVE_VP
- mov PARAM_SRC2, vp
- mov (up), r1
- jz L(end) C size == 1
- jnc L(even) C size % 2 == 0
-
- ALIGN(16)
-L(oop):
-M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)')
- M4_inst (vp), r1
- lea 8(up), up
- mov -4(up), r2
-M4post(` notl_or_xorl_GMP_NUMB_MASK(r1)')
- lea 8(vp), vp
- mov r1, (rp)
-L(entry):
-M4pre(` notl_or_xorl_GMP_NUMB_MASK(r2)')
- M4_inst -4(vp), r2
- lea 8(rp), rp
-M4post(` notl_or_xorl_GMP_NUMB_MASK(r2)')
- dec cnt
- mov (up), r1
- mov r2, -4(rp)
- jnz L(oop)
-
-L(end):
-M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)')
- mov SAVE_UP, up
- M4_inst (vp), r1
-M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)')
- mov SAVE_VP, vp
- mov r1, (rp)
- mov SAVE_RP, rp
- ret
-
-L(even):
- mov r1, r2
- lea 4(up), up
- lea 4(vp), vp
- lea -4(rp), rp
- jmp L(entry)
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/lshift.asm b/gmp/mpn/x86/atom/lshift.asm
deleted file mode 100644
index f2c70dd3e8..0000000000
--- a/gmp/mpn/x86/atom/lshift.asm
+++ /dev/null
@@ -1,218 +0,0 @@
-dnl Intel Atom mpn_lshift -- mpn left shift.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C unsigned cnt);
-
-C cycles/limb
-C cnt!=1 cnt==1
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 5 2.5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CNT, 16)
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_UP,`PARAM_CNT')
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`cnt', `%ecx')
-
-ASM_START()
- TEXT
- ALIGN(8)
-deflit(`FRAME',0)
-PROLOGUE(mpn_lshift)
- mov PARAM_CNT, cnt
- mov PARAM_SIZE, %edx
- mov up, SAVE_UP
- mov PARAM_SRC, up
- push rp FRAME_pushl()
- mov PARAM_DST, rp
-
-C We can use faster code for shift-by-1 under certain conditions.
- cmp $1,cnt
- jne L(normal)
- cmpl rp, up
- jnc L(special) C jump if s_ptr + 1 >= res_ptr
- leal (up,%edx,4),%eax
- cmpl %eax,rp
- jnc L(special) C jump if res_ptr >= s_ptr + size
-
-L(normal):
- lea -4(up,%edx,4), up
- mov %ebx, SAVE_EBX
- lea -4(rp,%edx,4), rp
-
- shr %edx
- mov (up), %eax
- mov %edx, VAR_COUNT
- jnc L(evn)
-
- mov %eax, %ebx
- shl %cl, %ebx
- neg cnt
- shr %cl, %eax
- test %edx, %edx
- jnz L(gt1)
- mov %ebx, (rp)
- jmp L(quit)
-
-L(gt1): mov %ebp, SAVE_EBP
- push %eax
- mov -4(up), %eax
- mov %eax, %ebp
- shr %cl, %eax
- jmp L(lo1)
-
-L(evn): mov %ebp, SAVE_EBP
- neg cnt
- mov %eax, %ebp
- mov -4(up), %edx
- shr %cl, %eax
- mov %edx, %ebx
- shr %cl, %edx
- neg cnt
- decl VAR_COUNT
- lea 4(rp), rp
- lea -4(up), up
- jz L(end)
- push %eax FRAME_pushl()
-
- ALIGN(8)
-L(top): shl %cl, %ebp
- or %ebp, %edx
- shl %cl, %ebx
- neg cnt
- mov -4(up), %eax
- mov %eax, %ebp
- mov %edx, -4(rp)
- shr %cl, %eax
- lea -8(rp), rp
-L(lo1): mov -8(up), %edx
- or %ebx, %eax
- mov %edx, %ebx
- shr %cl, %edx
- lea -8(up), up
- neg cnt
- mov %eax, (rp)
- decl VAR_COUNT
- jg L(top)
-
- pop %eax FRAME_popl()
-L(end):
- shl %cl, %ebp
- shl %cl, %ebx
- or %ebp, %edx
- mov SAVE_EBP, %ebp
- mov %edx, -4(rp)
- mov %ebx, -8(rp)
-
-L(quit):
- mov SAVE_UP, up
- mov SAVE_EBX, %ebx
- pop rp FRAME_popl()
- ret
-
-L(special):
-deflit(`FRAME',4)
- lea 3(%edx), %eax C size + 3
- dec %edx C size - 1
- mov (up), %ecx
- shr $2, %eax C (size + 3) / 4
- and $3, %edx C (size - 1) % 4
- jz L(goloop) C jmp if size == 1 (mod 4)
- shr %edx
- jnc L(odd) C jum if size == 3 (mod 4)
-
- add %ecx, %ecx
- lea 4(up), up
- mov %ecx, (rp)
- mov (up), %ecx
- lea 4(rp), rp
-
- dec %edx
- jnz L(goloop) C jump if size == 0 (mod 4)
-L(odd): lea -8(up), up
- lea -8(rp), rp
- jmp L(sentry) C reached if size == 2 or 3 (mod 4)
-
-L(sloop):
- adc %ecx, %ecx
- mov 4(up), %edx
- mov %ecx, (rp)
- adc %edx, %edx
- mov 8(up), %ecx
- mov %edx, 4(rp)
-L(sentry):
- adc %ecx, %ecx
- mov 12(up), %edx
- mov %ecx, 8(rp)
- adc %edx, %edx
- lea 16(up), up
- mov %edx, 12(rp)
- lea 16(rp), rp
- mov (up), %ecx
-L(goloop):
- decl %eax
- jnz L(sloop)
-
-L(squit):
- adc %ecx, %ecx
- mov %ecx, (rp)
- adc %eax, %eax
-
- mov SAVE_UP, up
- pop rp FRAME_popl()
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/lshiftc.asm b/gmp/mpn/x86/atom/lshiftc.asm
deleted file mode 100644
index 5be53ed19d..0000000000
--- a/gmp/mpn/x86/atom/lshiftc.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl Intel Atom mpn_lshiftc -- mpn left shift with complement.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C unsigned cnt);
-
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 5.5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CNT, 16)
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_UP,`PARAM_CNT')
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`cnt', `%ecx')
-
-ASM_START()
- TEXT
-
-PROLOGUE(mpn_lshiftc)
-deflit(`FRAME',0)
- mov PARAM_CNT, cnt
- mov PARAM_SIZE, %edx
- mov up, SAVE_UP
- mov PARAM_SRC, up
- push rp FRAME_pushl()
- mov PARAM_DST, rp
-
- lea -4(up,%edx,4), up
- mov %ebx, SAVE_EBX
- lea -4(rp,%edx,4), rp
-
- shr %edx
- mov (up), %eax
- mov %edx, VAR_COUNT
- jnc L(evn)
-
- mov %eax, %ebx
- shl %cl, %ebx
- neg cnt
- shr %cl, %eax
- test %edx, %edx
- jnz L(gt1)
- not %ebx
- mov %ebx, (rp)
- jmp L(quit)
-
-L(gt1): mov %ebp, SAVE_EBP
- push %eax
- mov -4(up), %eax
- mov %eax, %ebp
- shr %cl, %eax
- jmp L(lo1)
-
-L(evn): mov %ebp, SAVE_EBP
- neg cnt
- mov %eax, %ebp
- mov -4(up), %edx
- shr %cl, %eax
- mov %edx, %ebx
- shr %cl, %edx
- neg cnt
- decl VAR_COUNT
- lea 4(rp), rp
- lea -4(up), up
- jz L(end)
- push %eax FRAME_pushl()
-
-L(top): shl %cl, %ebp
- or %ebp, %edx
- shl %cl, %ebx
- neg cnt
- not %edx
- mov -4(up), %eax
- mov %eax, %ebp
- mov %edx, -4(rp)
- shr %cl, %eax
- lea -8(rp), rp
-L(lo1): mov -8(up), %edx
- or %ebx, %eax
- mov %edx, %ebx
- shr %cl, %edx
- not %eax
- lea -8(up), up
- neg cnt
- mov %eax, (rp)
- decl VAR_COUNT
- jg L(top)
-
- pop %eax FRAME_popl()
-L(end):
- shl %cl, %ebp
- shl %cl, %ebx
- or %ebp, %edx
- mov SAVE_EBP, %ebp
- not %edx
- not %ebx
- mov %edx, -4(rp)
- mov %ebx, -8(rp)
-
-L(quit):
- mov SAVE_UP, up
- mov SAVE_EBX, %ebx
- pop rp FRAME_popl()
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/mmx/copyd.asm b/gmp/mpn/x86/atom/mmx/copyd.asm
deleted file mode 100644
index b80fb033fe..0000000000
--- a/gmp/mpn/x86/atom/mmx/copyd.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_copyd -- copy limb vector, decrementing.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_copyd)
-include_mpn(`x86/k7/mmx/copyd.asm')
diff --git a/gmp/mpn/x86/atom/mmx/copyi.asm b/gmp/mpn/x86/atom/mmx/copyi.asm
deleted file mode 100644
index 49b6b8d662..0000000000
--- a/gmp/mpn/x86/atom/mmx/copyi.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_copyi -- copy limb vector, incrementing.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_copyi)
-include_mpn(`x86/k7/mmx/copyi.asm')
diff --git a/gmp/mpn/x86/atom/mmx/hamdist.asm b/gmp/mpn/x86/atom/mmx/hamdist.asm
deleted file mode 100644
index 3fe8253240..0000000000
--- a/gmp/mpn/x86/atom/mmx/hamdist.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_hamdist -- hamming distance.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_hamdist)
-include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/gmp/mpn/x86/atom/mod_34lsub1.asm b/gmp/mpn/x86/atom/mod_34lsub1.asm
deleted file mode 100644
index 6d57ba385d..0000000000
--- a/gmp/mpn/x86/atom/mod_34lsub1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_34lsub1)
-include_mpn(`x86/p6/mod_34lsub1.asm')
diff --git a/gmp/mpn/x86/atom/mode1o.asm b/gmp/mpn/x86/atom/mode1o.asm
deleted file mode 100644
index c9ee6bd2db..0000000000
--- a/gmp/mpn/x86/atom/mode1o.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_modexact_1_odd -- exact division style remainder.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd)
-include_mpn(`x86/pentium/mode1o.asm')
diff --git a/gmp/mpn/x86/atom/rshift.asm b/gmp/mpn/x86/atom/rshift.asm
deleted file mode 100644
index 1cb5dbefe9..0000000000
--- a/gmp/mpn/x86/atom/rshift.asm
+++ /dev/null
@@ -1,152 +0,0 @@
-dnl Intel Atom mpn_rshift -- mpn right shift.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Converted from AMD64 by Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C unsigned cnt);
-
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 5
-C AMD K6
-C AMD K7
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CNT, 16)
-defframe(PARAM_SIZE,12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_UP,`PARAM_CNT')
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`cnt', `%ecx')
-
-ASM_START()
- TEXT
- ALIGN(8)
-deflit(`FRAME',0)
-PROLOGUE(mpn_rshift)
- mov PARAM_CNT, cnt
- mov PARAM_SIZE, %edx
- mov up, SAVE_UP
- mov PARAM_SRC, up
- push rp FRAME_pushl()
- mov PARAM_DST, rp
- mov %ebx, SAVE_EBX
-
- shr %edx
- mov (up), %eax
- mov %edx, VAR_COUNT
- jnc L(evn)
-
- mov %eax, %ebx
- shr %cl, %ebx
- neg cnt
- shl %cl, %eax
- test %edx, %edx
- jnz L(gt1)
- mov %ebx, (rp)
- jmp L(quit)
-
-L(gt1): mov %ebp, SAVE_EBP
- push %eax
- mov 4(up), %eax
- mov %eax, %ebp
- shl %cl, %eax
- jmp L(lo1)
-
-L(evn): mov %ebp, SAVE_EBP
- neg cnt
- mov %eax, %ebp
- mov 4(up), %edx
- shl %cl, %eax
- mov %edx, %ebx
- shl %cl, %edx
- neg cnt
- decl VAR_COUNT
- lea -4(rp), rp
- lea 4(up), up
- jz L(end)
- push %eax FRAME_pushl()
-
- ALIGN(8)
-L(top): shr %cl, %ebp
- or %ebp, %edx
- shr %cl, %ebx
- neg cnt
- mov 4(up), %eax
- mov %eax, %ebp
- mov %edx, 4(rp)
- shl %cl, %eax
- lea 8(rp), rp
-L(lo1): mov 8(up), %edx
- or %ebx, %eax
- mov %edx, %ebx
- shl %cl, %edx
- lea 8(up), up
- neg cnt
- mov %eax, (rp)
- decl VAR_COUNT
- jg L(top)
-
- pop %eax FRAME_popl()
-L(end):
- shr %cl, %ebp
- shr %cl, %ebx
- or %ebp, %edx
- mov SAVE_EBP, %ebp
- mov %edx, 4(rp)
- mov %ebx, 8(rp)
-
-L(quit):
- mov SAVE_UP, up
- mov SAVE_EBX, %ebx
- pop rp FRAME_popl()
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm b/gmp/mpn/x86/atom/sse2/aorsmul_1.asm
deleted file mode 100644
index 969a14a919..0000000000
--- a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm
+++ /dev/null
@@ -1,174 +0,0 @@
-dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C cycles/limb
-C P5 -
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 8
-C AMD K6
-C AMD K7 -
-C AMD K8
-C AMD K10
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`n', `%ecx')
-
-ifdef(`OPERATION_addmul_1',`
- define(ADDSUB, add)
- define(func_1, mpn_addmul_1)
- define(func_1c, mpn_addmul_1c)')
-ifdef(`OPERATION_submul_1',`
- define(ADDSUB, sub)
- define(func_1, mpn_submul_1)
- define(func_1c, mpn_submul_1c)')
-
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
-
- TEXT
- ALIGN(16)
-PROLOGUE(func_1)
- xor %edx, %edx
-L(ent): push %edi
- push %esi
- push %ebx
- mov 16(%esp), rp
- mov 20(%esp), up
- mov 24(%esp), n
- movd 28(%esp), %mm7
- test $1, n
- jz L(fi0or2)
- movd (up), %mm0
- pmuludq %mm7, %mm0
- shr $2, n
- jnc L(fi1)
-
-L(fi3): lea -8(up), up
- lea -8(rp), rp
- movd 12(up), %mm1
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- add $1, n C increment and clear carry
- jmp L(lo3)
-
-L(fi1): movd %mm0, %ebx
- jz L(wd1)
- movd 4(up), %mm1
- pmuludq %mm7, %mm1
- jmp L(lo1)
-
-L(fi0or2):
- movd (up), %mm1
- pmuludq %mm7, %mm1
- shr $2, n
- movd 4(up), %mm0
- jc L(fi2)
- lea -4(up), up
- lea -4(rp), rp
- movd %mm1, %eax
- pmuludq %mm7, %mm0
- jmp L(lo0)
-
-L(fi2): lea 4(up), up
- add $1, n C increment and clear carry
- movd %mm1, %eax
- lea -12(rp), rp
- jmp L(lo2)
-
-C ALIGN(16) C alignment seems irrelevant
-L(top): movd 4(up), %mm1
- adc $0, %edx
- ADDSUB %eax, 12(rp)
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- lea 16(rp), rp
-L(lo1): psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- ADDSUB %ebx, (rp)
-L(lo0): psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- movd %mm0, %ebx
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- ADDSUB %eax, 4(rp)
-L(lo3): psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- ADDSUB %ebx, 8(rp)
-L(lo2): psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- dec n
- jnz L(top)
-
-L(end): adc n, %edx C n is zero here
- ADDSUB %eax, 12(rp)
- movd %mm0, %ebx
- lea 16(rp), rp
-L(wd1): psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %eax
- adc n, %eax
- ADDSUB %ebx, (rp)
- emms
- adc n, %eax
- pop %ebx
- pop %esi
- pop %edi
- ret
-EPILOGUE()
-PROLOGUE(func_1c)
- mov 20(%esp), %edx C carry
- jmp L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm
deleted file mode 100644
index 782e914019..0000000000
--- a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_bdiv_dbm1.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_bdiv_dbm1c)
-include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm')
diff --git a/gmp/mpn/x86/atom/sse2/divrem_1.asm b/gmp/mpn/x86/atom/sse2/divrem_1.asm
deleted file mode 100644
index f84709a22e..0000000000
--- a/gmp/mpn/x86/atom/sse2/divrem_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_divrem_1 -- mpn by limb division.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1)
-include_mpn(`x86/pentium4/sse2/divrem_1.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mod_1_1.asm b/gmp/mpn/x86/atom/sse2/mod_1_1.asm
deleted file mode 100644
index ae6581d9b6..0000000000
--- a/gmp/mpn/x86/atom/sse2/mod_1_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom/SSE2 mpn_mod_1_1.
-
-dnl Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1_1p)
-include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mod_1_4.asm b/gmp/mpn/x86/atom/sse2/mod_1_4.asm
deleted file mode 100644
index 31faa3f0a3..0000000000
--- a/gmp/mpn/x86/atom/sse2/mod_1_4.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom/SSE2 mpn_mod_1_4.
-
-dnl Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1s_4p)
-include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mul_1.asm b/gmp/mpn/x86/atom/sse2/mul_1.asm
deleted file mode 100644
index aa3bb974bb..0000000000
--- a/gmp/mpn/x86/atom/sse2/mul_1.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl Intel Atom mpn_mul_1.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C cycles/limb
-C P5 -
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 7.5
-C AMD K6 -
-C AMD K7 -
-C AMD K8
-C AMD K10
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_MUL, 16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-define(`rp', `%edx')
-define(`up', `%esi')
-define(`n', `%ecx')
-
-ASM_START()
- TEXT
- ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(mpn_mul_1c)
- movd PARAM_CARRY, %mm6 C carry
- jmp L(ent)
-EPILOGUE()
-
- ALIGN(8) C for compact code
-PROLOGUE(mpn_mul_1)
- pxor %mm6, %mm6
-L(ent): push %esi FRAME_pushl()
- mov PARAM_SRC, up
- mov PARAM_SIZE, %eax C size
- movd PARAM_MUL, %mm7
- movd (up), %mm0
- mov %eax, n
- and $3, %eax
- pmuludq %mm7, %mm0
- mov PARAM_DST, rp
- jz L(lo0)
- cmp $2, %eax
- lea -16(up,%eax,4),up
- lea -16(rp,%eax,4),rp
- jc L(lo1)
- jz L(lo2)
- jmp L(lo3)
-
- ALIGN(16)
-L(top): movd (up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
-L(lo0): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
-L(lo3): paddq %mm0, %mm6
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 4(rp)
- psrlq $32, %mm6
-L(lo2): paddq %mm0, %mm6
- movd 12(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 8(rp)
- psrlq $32, %mm6
-L(lo1): paddq %mm0, %mm6
- sub $4, n
- movd %mm6, 12(rp)
- lea 16(up), up
- ja L(top)
-
- psrlq $32, %mm6
- movd %mm6, %eax
- emms
- pop %esi FRAME_popl()
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/atom/sse2/mul_basecase.asm b/gmp/mpn/x86/atom/sse2/mul_basecase.asm
deleted file mode 100644
index 97d3aeb5ad..0000000000
--- a/gmp/mpn/x86/atom/sse2/mul_basecase.asm
+++ /dev/null
@@ -1,501 +0,0 @@
-dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in
-dnl a third limb vector.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
-C 4 large loops into one; we could use it for the outer loop branch.
-C * Optimise code outside of inner loops.
-C * Write combined addmul_1 feed-in a wind-down code, and use when iterating
-C outer each loop. ("Overlapping software pipelining")
-C * Postpone push of ebx until we know vn > 1. Perhaps use caller-saves regs
-C for inlined mul_1, allowing us to postpone all pushes.
-C * Perhaps write special code for vn <= un < M, for some small M.
-
-C void mpn_mul_basecase (mp_ptr wp,
-C mp_srcptr xp, mp_size_t xn,
-C mp_srcptr yp, mp_size_t yn);
-C
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`un', `%ecx')
-define(`vp', `%ebp')
-define(`vn', `36(%esp)')
-
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_mul_basecase)
- push %edi
- push %esi
- push %ebx
- push %ebp
- mov 20(%esp), rp
- mov 24(%esp), up
- mov 28(%esp), un
- mov 32(%esp), vp
-
- movd (up), %mm0
- movd (vp), %mm7
- pmuludq %mm7, %mm0
- pxor %mm6, %mm6
-
- mov un, %eax
- and $3, %eax
- jz L(of0)
- cmp $2, %eax
- jc L(of1)
- jz L(of2)
-
-C ================================================================
- jmp L(m3)
- ALIGN(16)
-L(lm3): movd -4(up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
- paddq %mm0, %mm6
- movd (up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -4(rp)
- psrlq $32, %mm6
-L(m3): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 4(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- sub $4, un
- movd %mm6, 8(rp)
- lea 16(up), up
- ja L(lm3)
-
- psrlq $32, %mm6
- movd %mm6, 12(rp)
-
- decl vn
- jz L(done)
- lea -8(rp), rp
-
-L(ol3): mov 28(%esp), un
- neg un
- lea 4(vp), vp
- movd (vp), %mm7 C read next V limb
- mov 24(%esp), up
- lea 16(rp,un,4), rp
-
- movd (up), %mm0
- pmuludq %mm7, %mm0
- sar $2, un
- movd 4(up), %mm1
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- lea -8(up), up
- xor %edx, %edx C zero edx and CF
- jmp L(a3)
-
-L(la3): movd 4(up), %mm1
- adc $0, %edx
- add %eax, 12(rp)
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %ebx, (rp)
- psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- movd %mm0, %ebx
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %eax, 4(rp)
-L(a3): psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %ebx, 8(rp)
- psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- jnz L(la3)
-
- adc un, %edx C un is zero here
- add %eax, 12(rp)
- movd %mm0, %ebx
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %eax
- adc un, %eax
- add %ebx, 16(rp)
- adc un, %eax
- mov %eax, 20(rp)
-
- decl vn
- jnz L(ol3)
- jmp L(done)
-
-C ================================================================
- ALIGN(16)
-L(lm0): movd (up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
-L(of0): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 4(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd 12(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 8(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- sub $4, un
- movd %mm6, 12(rp)
- lea 16(up), up
- ja L(lm0)
-
- psrlq $32, %mm6
- movd %mm6, 16(rp)
-
- decl vn
- jz L(done)
- lea -4(rp), rp
-
-L(ol0): mov 28(%esp), un
- neg un
- lea 4(vp), vp
- movd (vp), %mm7 C read next V limb
- mov 24(%esp), up
- lea 20(rp,un,4), rp
-
- movd (up), %mm1
- pmuludq %mm7, %mm1
- sar $2, un
- movd 4(up), %mm0
- lea -4(up), up
- movd %mm1, %eax
- pmuludq %mm7, %mm0
- xor %edx, %edx C zero edx and CF
- jmp L(a0)
-
-L(la0): movd 4(up), %mm1
- adc $0, %edx
- add %eax, 12(rp)
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %ebx, (rp)
-L(a0): psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- movd %mm0, %ebx
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %eax, 4(rp)
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %ebx, 8(rp)
- psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- jnz L(la0)
-
- adc un, %edx C un is zero here
- add %eax, 12(rp)
- movd %mm0, %ebx
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %eax
- adc un, %eax
- add %ebx, 16(rp)
- adc un, %eax
- mov %eax, 20(rp)
-
- decl vn
- jnz L(ol0)
- jmp L(done)
-
-C ================================================================
- ALIGN(16)
-L(lm1): movd -12(up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
- paddq %mm0, %mm6
- movd -8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -12(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd -4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -8(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd (up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -4(rp)
- psrlq $32, %mm6
-L(of1): paddq %mm0, %mm6
- sub $4, un
- movd %mm6, (rp)
- lea 16(up), up
- ja L(lm1)
-
- psrlq $32, %mm6
- movd %mm6, 4(rp)
-
- decl vn
- jz L(done)
- lea -16(rp), rp
-
-L(ol1): mov 28(%esp), un
- neg un
- lea 4(vp), vp
- movd (vp), %mm7 C read next V limb
- mov 24(%esp), up
- lea 24(rp,un,4), rp
-
- movd (up), %mm0
- pmuludq %mm7, %mm0
- sar $2, un
- movd %mm0, %ebx
- movd 4(up), %mm1
- pmuludq %mm7, %mm1
- xor %edx, %edx C zero edx and CF
- inc un
- jmp L(a1)
-
-L(la1): movd 4(up), %mm1
- adc $0, %edx
- add %eax, 12(rp)
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- lea 16(rp), rp
-L(a1): psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %ebx, (rp)
- psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- movd %mm0, %ebx
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %eax, 4(rp)
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %ebx, 8(rp)
- psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- jnz L(la1)
-
- adc un, %edx C un is zero here
- add %eax, 12(rp)
- movd %mm0, %ebx
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %eax
- adc un, %eax
- add %ebx, 16(rp)
- adc un, %eax
- mov %eax, 20(rp)
-
- decl vn
- jnz L(ol1)
- jmp L(done)
-
-C ================================================================
- ALIGN(16)
-L(lm2): movd -8(up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
- paddq %mm0, %mm6
- movd -4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -8(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd (up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -4(rp)
- psrlq $32, %mm6
-L(of2): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- sub $4, un
- movd %mm6, 4(rp)
- lea 16(up), up
- ja L(lm2)
-
- psrlq $32, %mm6
- movd %mm6, 8(rp)
-
- decl vn
- jz L(done)
- lea -12(rp), rp
-
-L(ol2): mov 28(%esp), un
- neg un
- lea 4(vp), vp
- movd (vp), %mm7 C read next V limb
- mov 24(%esp), up
- lea 12(rp,un,4), rp
-
- movd (up), %mm1
- pmuludq %mm7, %mm1
- sar $2, un
- movd 4(up), %mm0
- lea 4(up), up
- movd %mm1, %eax
- xor %edx, %edx C zero edx and CF
- jmp L(lo2)
-
-L(la2): movd 4(up), %mm1
- adc $0, %edx
- add %eax, 12(rp)
- movd %mm0, %ebx
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %ebx, (rp)
- psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- movd %mm0, %ebx
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %eax, 4(rp)
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %edx
- movd %mm1, %eax
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %ebx, 8(rp)
-L(lo2): psrlq $32, %mm1
- adc %edx, %eax
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- jnz L(la2)
-
- adc un, %edx C un is zero here
- add %eax, 12(rp)
- movd %mm0, %ebx
- psrlq $32, %mm0
- adc %edx, %ebx
- movd %mm0, %eax
- adc un, %eax
- add %ebx, 16(rp)
- adc un, %eax
- mov %eax, 20(rp)
-
- decl vn
- jnz L(ol2)
-C jmp L(done)
-
-C ================================================================
-L(done):
- emms
- pop %ebp
- pop %ebx
- pop %esi
- pop %edi
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sse2/popcount.asm b/gmp/mpn/x86/atom/sse2/popcount.asm
deleted file mode 100644
index 7847aec8e6..0000000000
--- a/gmp/mpn/x86/atom/sse2/popcount.asm
+++ /dev/null
@@ -1,35 +0,0 @@
-dnl Intel Atom mpn_popcount -- population count.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-MULFUNC_PROLOGUE(mpn_popcount)
-include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm b/gmp/mpn/x86/atom/sse2/sqr_basecase.asm
deleted file mode 100644
index af19ed854d..0000000000
--- a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm
+++ /dev/null
@@ -1,634 +0,0 @@
-dnl x86 mpn_sqr_basecase -- square an mpn number, optimised for atom.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO
-C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
-C 4 large loops into one; we could use it for the outer loop branch.
-C * Optimise code outside of inner loops.
-C * Write combined addmul_1 feed-in a wind-down code, and use when iterating
-C outer each loop. ("Overlapping software pipelining")
-C * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone
-C all pushes.
-C * Perhaps write special code for n < M, for some small M.
-C * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps
-C with even less pipelined code.
-C * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left.
-C Consider breaking out earlier, saving high the cost of short loops.
-
-C void mpn_sqr_basecase (mp_ptr wp,
-C mp_srcptr xp, mp_size_t xn);
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`n', `%ecx')
-
-define(`un', `%ebp')
-
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_sqr_basecase)
- push %edi
- push %esi
- mov 12(%esp), rp
- mov 16(%esp), up
- mov 20(%esp), n
-
- lea 4(rp), rp C write triangular product starting at rp[1]
- dec n
- movd (up), %mm7
-
- jz L(one)
- lea 4(up), up
- push %ebx
- push %ebp
- mov n, %eax
-
- movd (up), %mm0
- neg n
- pmuludq %mm7, %mm0
- pxor %mm6, %mm6
- mov n, un
-
- and $3, %eax
- jz L(of0)
- cmp $2, %eax
- jc L(of1)
- jz L(of2)
-
-C ================================================================
- jmp L(m3)
- ALIGN(16)
-L(lm3): movd -4(up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
- paddq %mm0, %mm6
- movd (up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -4(rp)
- psrlq $32, %mm6
-L(m3): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 4(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- add $4, un
- movd %mm6, 8(rp)
- lea 16(up), up
- js L(lm3)
-
- psrlq $32, %mm6
- movd %mm6, 12(rp)
-
- inc n
-C jz L(done)
- lea -12(up), up
- lea 4(rp), rp
- jmp L(ol2)
-
-C ================================================================
- ALIGN(16)
-L(lm0): movd (up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
-L(of0): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 4(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd 12(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, 8(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- add $4, un
- movd %mm6, 12(rp)
- lea 16(up), up
- js L(lm0)
-
- psrlq $32, %mm6
- movd %mm6, 16(rp)
-
- inc n
-C jz L(done)
- lea -8(up), up
- lea 8(rp), rp
- jmp L(ol3)
-
-C ================================================================
- ALIGN(16)
-L(lm1): movd -12(up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
- paddq %mm0, %mm6
- movd -8(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -12(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd -4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -8(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd (up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -4(rp)
- psrlq $32, %mm6
-L(of1): paddq %mm0, %mm6
- add $4, un
- movd %mm6, (rp)
- lea 16(up), up
- js L(lm1)
-
- psrlq $32, %mm6
- movd %mm6, 4(rp)
-
- inc n
- jz L(done) C goes away when we add special n=2 code
- lea -20(up), up
- lea -4(rp), rp
- jmp L(ol0)
-
-C ================================================================
- ALIGN(16)
-L(lm2): movd -8(up), %mm0
- pmuludq %mm7, %mm0
- psrlq $32, %mm6
- lea 16(rp), rp
- paddq %mm0, %mm6
- movd -4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -8(rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- movd (up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, -4(rp)
- psrlq $32, %mm6
-L(of2): paddq %mm0, %mm6
- movd 4(up), %mm0
- pmuludq %mm7, %mm0
- movd %mm6, (rp)
- psrlq $32, %mm6
- paddq %mm0, %mm6
- add $4, un
- movd %mm6, 4(rp)
- lea 16(up), up
- js L(lm2)
-
- psrlq $32, %mm6
- movd %mm6, 8(rp)
-
- inc n
-C jz L(done)
- lea -16(up), up
-C lea (rp), rp
-C jmp L(ol1)
-
-C ================================================================
-
-L(ol1): lea 4(up,n,4), up
- movd (up), %mm7 C read next U invariant limb
- lea 8(rp,n,4), rp
- mov n, un
-
- movd 4(up), %mm1
- pmuludq %mm7, %mm1
- sar $2, un
- movd %mm1, %ebx
- inc un
- jz L(re1)
-
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- xor %edx, %edx C zero edx and CF
- jmp L(a1)
-
-L(la1): adc $0, %edx
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %eax, (rp)
-L(a1): psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- movd %mm0, %eax
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %ebx, 4(rp)
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %eax, 8(rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- movd 4(up), %mm1
- jnz L(la1)
-
- adc un, %edx C un is zero here
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- adc un, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %eax
- adc un, %eax
- add %ebx, 4(rp)
- adc un, %eax
- mov %eax, 8(rp)
-
- inc n
-
-C ================================================================
-
-L(ol0): lea (up,n,4), up
- movd 4(up), %mm7 C read next U invariant limb
- lea 4(rp,n,4), rp
- mov n, un
-
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- sar $2, un
- movd 12(up), %mm1
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- xor %edx, %edx C zero edx and CF
- jmp L(a0)
-
-L(la0): adc $0, %edx
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- movd %mm0, %eax
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %ebx, 4(rp)
-L(a0): psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %eax, 8(rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- movd 4(up), %mm1
- jnz L(la0)
-
- adc un, %edx C un is zero here
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- adc un, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %eax
- adc un, %eax
- add %ebx, 4(rp)
- adc un, %eax
- mov %eax, 8(rp)
-
- inc n
-
-C ================================================================
-
-L(ol3): lea 12(up,n,4), up
- movd -8(up), %mm7 C read next U invariant limb
- lea (rp,n,4), rp C put rp back
- mov n, un
-
- movd -4(up), %mm1
- pmuludq %mm7, %mm1
- sar $2, un
- movd %mm1, %ebx
- movd (up), %mm0
- xor %edx, %edx C zero edx and CF
- jmp L(a3)
-
-L(la3): adc $0, %edx
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- movd %mm0, %eax
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %ebx, 4(rp)
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %eax, 8(rp)
-L(a3): psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- movd 4(up), %mm1
- jnz L(la3)
-
- adc un, %edx C un is zero here
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- adc un, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %eax
- adc un, %eax
- add %ebx, 4(rp)
- adc un, %eax
- mov %eax, 8(rp)
-
- inc n
-
-C ================================================================
-
-L(ol2): lea 8(up,n,4), up
- movd -4(up), %mm7 C read next U invariant limb
- lea 12(rp,n,4), rp
- mov n, un
-
- movd (up), %mm0
- pmuludq %mm7, %mm0
- xor %edx, %edx
- sar $2, un
- movd 4(up), %mm1
- test un, un C clear carry
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- inc un
- jnz L(a2)
- jmp L(re2)
-
-L(la2): adc $0, %edx
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
-L(a2): psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- movd 8(up), %mm0
- pmuludq %mm7, %mm0
- adc $0, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- movd %mm0, %eax
- movd 12(up), %mm1
- pmuludq %mm7, %mm1
- adc $0, %edx
- add %ebx, 4(rp)
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- lea 16(up), up
- movd (up), %mm0
- adc $0, %edx
- add %eax, 8(rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %edx
- pmuludq %mm7, %mm0
- inc un
- movd 4(up), %mm1
- jnz L(la2)
-
- adc un, %edx C un is zero here
- add %ebx, 12(rp)
- movd %mm0, %eax
- pmuludq %mm7, %mm1
- lea 16(rp), rp
- psrlq $32, %mm0
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- adc un, %edx
- add %eax, (rp)
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %eax
- adc un, %eax
- add %ebx, 4(rp)
- adc un, %eax
- mov %eax, 8(rp)
-
- inc n
- jmp L(ol1)
-
-C ================================================================
-L(re2): psrlq $32, %mm0
- movd (up), %mm7 C read next U invariant limb
- adc %edx, %eax
- movd %mm0, %edx
- movd %mm1, %ebx
- adc un, %edx
- add %eax, (rp)
- lea 4(rp), rp
- psrlq $32, %mm1
- adc %edx, %ebx
- movd %mm1, %eax
- movd 4(up), %mm1
- adc un, %eax
- add %ebx, (rp)
- pmuludq %mm7, %mm1
- adc un, %eax
- mov %eax, 4(rp)
- movd %mm1, %ebx
-
-L(re1): psrlq $32, %mm1
- add %ebx, 4(rp)
- movd %mm1, %eax
- adc un, %eax
- xor n, n C make n zeroness assumption below true
- mov %eax, 8(rp)
-
-L(done): C n is zero here
- mov 24(%esp), up
- mov 28(%esp), %eax
-
- movd (up), %mm0
- inc %eax
- pmuludq %mm0, %mm0
- lea 4(up), up
- mov 20(%esp), rp
- shr %eax
- movd %mm0, (rp)
- psrlq $32, %mm0
- lea -12(rp), rp
- mov %eax, 28(%esp)
- jnc L(odd)
-
- movd %mm0, %ebp
- movd (up), %mm0
- lea 8(rp), rp
- pmuludq %mm0, %mm0
- lea -4(up), up
- add 8(rp), %ebp
- movd %mm0, %edx
- adc 12(rp), %edx
- rcr n
- jmp L(ent)
-
-C ALIGN(16) C alignment seems irrelevant
-L(top): movd (up), %mm1
- adc n, n
- movd %mm0, %eax
- pmuludq %mm1, %mm1
- movd 4(up), %mm0
- adc (rp), %eax
- movd %mm1, %ebx
- pmuludq %mm0, %mm0
- psrlq $32, %mm1
- adc 4(rp), %ebx
- movd %mm1, %ebp
- movd %mm0, %edx
- adc 8(rp), %ebp
- adc 12(rp), %edx
- rcr n C FIXME: isn't this awfully slow on atom???
- adc %eax, (rp)
- adc %ebx, 4(rp)
-L(ent): lea 8(up), up
- adc %ebp, 8(rp)
- psrlq $32, %mm0
- adc %edx, 12(rp)
-L(odd): decl 28(%esp)
- lea 16(rp), rp
- jnz L(top)
-
-L(end): adc n, n
- movd %mm0, %eax
- adc n, %eax
- mov %eax, (rp)
-
-L(rtn): emms
- pop %ebp
- pop %ebx
- pop %esi
- pop %edi
- ret
-
-L(one): pmuludq %mm7, %mm7
- movq %mm7, -4(rp)
- emms
- pop %esi
- pop %edi
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sublsh1_n.asm b/gmp/mpn/x86/atom/sublsh1_n.asm
deleted file mode 100644
index d3e7e5b5cb..0000000000
--- a/gmp/mpn/x86/atom/sublsh1_n.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1)
-include_mpn(`x86/k7/sublsh1_n.asm')
diff --git a/gmp/mpn/x86/atom/sublsh2_n.asm b/gmp/mpn/x86/atom/sublsh2_n.asm
deleted file mode 100644
index 79405cf9f4..0000000000
--- a/gmp/mpn/x86/atom/sublsh2_n.asm
+++ /dev/null
@@ -1,57 +0,0 @@
-dnl Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
-
-dnl Contributed to the GNU project by Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-define(RSH, 30)
-
-ifdef(`OPERATION_addlsh2_n', `
- define(M4_inst, adcl)
- define(M4_opp, subl)
- define(M4_function, mpn_addlsh2_n)
- define(M4_function_c, mpn_addlsh2_nc)
- define(M4_ip_function_c, mpn_addlsh2_nc_ip1)
- define(M4_ip_function, mpn_addlsh2_n_ip1)
-',`ifdef(`OPERATION_sublsh2_n', `
- define(M4_inst, sbbl)
- define(M4_opp, addl)
- define(M4_function, mpn_sublsh2_n)
- define(M4_function_c, mpn_sublsh2_nc)
- define(M4_ip_function_c, mpn_sublsh2_nc_ip1)
- define(M4_ip_function, mpn_sublsh2_n_ip1)
-',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n
-')')')
-
-MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1)
-
-include_mpn(`x86/atom/aorslshC_n.asm')
diff --git a/gmp/mpn/x86/bd1/gmp-mparam.h b/gmp/mpn/x86/bd1/gmp-mparam.h
deleted file mode 100644
index 7d80a1cb4c..0000000000
--- a/gmp/mpn/x86/bd1/gmp-mparam.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 3600 MHz Bulldozer Zambezi */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 3
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 27
-
-#define MUL_TOOM22_THRESHOLD 32
-#define MUL_TOOM33_THRESHOLD 65
-#define MUL_TOOM44_THRESHOLD 154
-#define MUL_TOOM6H_THRESHOLD 230
-#define MUL_TOOM8H_THRESHOLD 354
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 48
-#define SQR_TOOM3_THRESHOLD 87
-#define SQR_TOOM4_THRESHOLD 204
-#define SQR_TOOM6_THRESHOLD 315
-#define SQR_TOOM8_THRESHOLD 430
-
-#define MULMID_TOOM42_THRESHOLD 48
-
-#define MULMOD_BNM1_THRESHOLD 21
-#define SQRMOD_BNM1_THRESHOLD 23
-
-#define MUL_FFT_MODF_THRESHOLD 840 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 840, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \
- { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 31, 6}, \
- { 63, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \
- { 31, 7}, { 63, 8}, { 39, 7}, { 79, 9}, \
- { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
- { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \
- { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
- { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \
- { 127, 9}, { 255,10}, { 159,11}, { 95,10}, \
- { 191,12}, { 63,11}, { 127,10}, { 271,11}, \
- { 159,10}, { 319, 9}, { 639,11}, { 191,10}, \
- { 383, 9}, { 767,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 511,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 639,12}, { 191,11}, { 383,10}, \
- { 799,13}, { 127,12}, { 255,11}, { 543,10}, \
- { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \
- { 1343,11}, { 735,10}, { 1471, 9}, { 2943,12}, \
- { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \
- { 1727,12}, { 447,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \
- { 2943,13}, { 383,12}, { 767,11}, { 1599,12}, \
- { 831,11}, { 1727,10}, { 3455,14}, { 255,13}, \
- { 511,12}, { 1087,11}, { 2239,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \
- { 1919,11}, { 3839,12}, { 1983,11}, { 3967,10}, \
- { 7935,14}, { 511,13}, { 1023,12}, { 2239,13}, \
- { 1151,12}, { 2495,11}, { 4991,13}, { 1279,12}, \
- { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \
- { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \
- { 1791,12}, { 3583,13}, { 1919,12}, { 3967,11}, \
- { 7935,15}, { 511,14}, { 1023,13}, { 2175,12}, \
- { 4479,13}, { 2431,12}, { 4991,14}, { 1279,13}, \
- { 2943,12}, { 6015,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD 7808
-
-#define SQR_FFT_MODF_THRESHOLD 690 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 690, 5}, { 28, 6}, { 15, 5}, { 32, 6}, \
- { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \
- { 43, 7}, { 23, 6}, { 47, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \
- { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
- { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \
- { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 127,10}, { 79, 9}, { 167,10}, { 95, 9}, \
- { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
- { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
- { 271,11}, { 159,10}, { 319, 9}, { 639,11}, \
- { 191,10}, { 383,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \
- { 287,10}, { 607, 9}, { 1215,11}, { 319,10}, \
- { 639,12}, { 191,11}, { 383,10}, { 799,11}, \
- { 415,13}, { 127,12}, { 255,11}, { 511,10}, \
- { 1023,11}, { 543,10}, { 1087,11}, { 607,10}, \
- { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 863,12}, { 447,11}, { 927,13}, \
- { 255,12}, { 511,11}, { 1055,10}, { 2111,11}, \
- { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,10}, { 3455,12}, { 895,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \
- { 2239,10}, { 4479,12}, { 1215,11}, { 2431,13}, \
- { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1727,11}, { 3455,13}, { 895,12}, { 1983,11}, \
- { 3967,14}, { 511,13}, { 1023,12}, { 2239,11}, \
- { 4479,13}, { 1151,12}, { 2495,11}, { 4991,10}, \
- { 9983,13}, { 1279,12}, { 2623,13}, { 1407,12}, \
- { 2943,14}, { 767,13}, { 1663,12}, { 3455,13}, \
- { 1791,12}, { 3583,13}, { 1919,12}, { 3967,15}, \
- { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \
- { 2431,12}, { 4991,11}, { 9983,14}, { 1279,13}, \
- { 2687,12}, { 5375,13}, { 2943,12}, { 5887,14}, \
- { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 166
-#define SQR_FFT_THRESHOLD 6784
-
-#define MULLO_BASECASE_THRESHOLD 5
-#define MULLO_DC_THRESHOLD 31
-#define MULLO_MUL_N_THRESHOLD 14709
-
-#define DC_DIV_QR_THRESHOLD 53
-#define DC_DIVAPPR_Q_THRESHOLD 230
-#define DC_BDIV_QR_THRESHOLD 50
-#define DC_BDIV_Q_THRESHOLD 136
-
-#define INV_MULMOD_BNM1_THRESHOLD 78
-#define INV_NEWTON_THRESHOLD 202
-#define INV_APPR_THRESHOLD 202
-
-#define BINV_NEWTON_THRESHOLD 236
-#define REDC_1_TO_REDC_N_THRESHOLD 55
-
-#define MU_DIV_QR_THRESHOLD 1442
-#define MU_DIVAPPR_Q_THRESHOLD 1652
-#define MUPI_DIV_QR_THRESHOLD 81
-#define MU_BDIV_QR_THRESHOLD 1787
-#define MU_BDIV_Q_THRESHOLD 1685
-
-#define POWM_SEC_TABLE 1,22,194,376,692,2657
-
-#define MATRIX22_STRASSEN_THRESHOLD 21
-#define HGCD_THRESHOLD 85
-#define HGCD_APPR_THRESHOLD 50
-#define HGCD_REDUCE_THRESHOLD 4455
-#define GCD_DC_THRESHOLD 456
-#define GCDEXT_DC_THRESHOLD 345
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 17
-#define GET_STR_PRECOMPUTE_THRESHOLD 27
-#define SET_STR_DC_THRESHOLD 100
-#define SET_STR_PRECOMPUTE_THRESHOLD 960
-
-#define FAC_DSC_THRESHOLD 208
-#define FAC_ODD_THRESHOLD 26
diff --git a/gmp/mpn/x86/bd2/gmp-mparam.h b/gmp/mpn/x86/bd2/gmp-mparam.h
deleted file mode 100644
index c5a53f2f9f..0000000000
--- a/gmp/mpn/x86/bd2/gmp-mparam.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* AMD bd2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 4000 MHz Piledriver Vishera */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.8 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 19
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 3
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 24
-
-#define MUL_TOOM22_THRESHOLD 30
-#define MUL_TOOM33_THRESHOLD 81
-#define MUL_TOOM44_THRESHOLD 153
-#define MUL_TOOM6H_THRESHOLD 222
-#define MUL_TOOM8H_THRESHOLD 357
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 99
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 96
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 38
-#define SQR_TOOM3_THRESHOLD 89
-#define SQR_TOOM4_THRESHOLD 196
-#define SQR_TOOM6_THRESHOLD 290
-#define SQR_TOOM8_THRESHOLD 454
-
-#define MULMID_TOOM42_THRESHOLD 68
-
-#define MULMOD_BNM1_THRESHOLD 19
-#define SQRMOD_BNM1_THRESHOLD 22
-
-#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 636, 5}, { 27, 6}, { 27, 7}, { 15, 6}, \
- { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 23, 6}, { 47, 7}, { 29, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 7}, { 55, 9}, \
- { 15, 8}, { 31, 7}, { 63, 8}, { 43, 9}, \
- { 23, 8}, { 55, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
- { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
- { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
- { 95, 9}, { 191,11}, { 63,10}, { 127, 6}, \
- { 2111, 5}, { 4351, 6}, { 2239, 7}, { 1215, 9}, \
- { 311, 8}, { 639,10}, { 175, 8}, { 703,10}, \
- { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
- { 159, 9}, { 671,11}, { 191,10}, { 383, 9}, \
- { 799,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,12}, { 447,11}, { 895,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \
- { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1471,13}, { 383,12}, { 767,11}, { 1599,12}, \
- { 831,11}, { 1727,10}, { 3455,12}, { 895,14}, \
- { 255,13}, { 511,12}, { 1023,11}, { 2047,12}, \
- { 1087,11}, { 2239,10}, { 4479,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \
- { 1919,14}, { 511,13}, { 1023,12}, { 2239,11}, \
- { 4479,13}, { 1151,12}, { 2495,11}, { 4991,13}, \
- { 1279,12}, { 2623,13}, { 1407,12}, { 2943,14}, \
- { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \
- { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \
- { 2175,12}, { 4479,13}, { 2431,12}, { 4991,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \
- { 3455,14}, { 1791,13}, { 3967,12}, { 7935,11}, \
- { 15871,15}, { 1023,14}, { 2047,13}, { 4479,14}, \
- { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 172
-#define MUL_FFT_THRESHOLD 6784
-
-#define SQR_FFT_MODF_THRESHOLD 606 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 606, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
- { 29, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
- { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
- { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \
- { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
- { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
- { 95, 9}, { 191,11}, { 63,10}, { 159,11}, \
- { 95,10}, { 191, 6}, { 3135, 5}, { 6399, 6}, \
- { 3455, 8}, { 895, 9}, { 479, 8}, { 991,10}, \
- { 255, 9}, { 575,11}, { 159, 9}, { 639,10}, \
- { 335, 8}, { 1343,10}, { 351,11}, { 191, 9}, \
- { 799,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543,11}, { 287,10}, { 607, 9}, { 1215,10}, \
- { 671,12}, { 191,11}, { 383,10}, { 767, 9}, \
- { 1535,10}, { 799,11}, { 415,10}, { 863,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
- { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \
- { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \
- { 799,10}, { 1599,11}, { 863,12}, { 447,11}, \
- { 927,13}, { 255,12}, { 511,11}, { 1087,12}, \
- { 575,11}, { 1215,12}, { 639,11}, { 1343,12}, \
- { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1727,12}, { 895,11}, \
- { 1791,12}, { 959,14}, { 255,13}, { 511,12}, \
- { 1087,11}, { 2239,10}, { 4479,12}, { 1215,13}, \
- { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \
- { 1023,12}, { 2239,11}, { 4479,13}, { 1151,12}, \
- { 2495,11}, { 4991,13}, { 1279,12}, { 2623,13}, \
- { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \
- { 3455,13}, { 1791,12}, { 3583,13}, { 1919,15}, \
- { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \
- { 2431,12}, { 4991,14}, { 1279,13}, { 2943,12}, \
- { 5887,14}, { 1535,13}, { 3455,14}, { 1791,13}, \
- { 3967,15}, { 1023,14}, { 2047,13}, { 4479,14}, \
- { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 160
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 3
-#define MULLO_DC_THRESHOLD 34
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 67
-#define DC_DIVAPPR_Q_THRESHOLD 196
-#define DC_BDIV_QR_THRESHOLD 67
-#define DC_BDIV_Q_THRESHOLD 112
-
-#define INV_MULMOD_BNM1_THRESHOLD 70
-#define INV_NEWTON_THRESHOLD 262
-#define INV_APPR_THRESHOLD 222
-
-#define BINV_NEWTON_THRESHOLD 288
-#define REDC_1_TO_REDC_N_THRESHOLD 67
-
-#define MU_DIV_QR_THRESHOLD 1718
-#define MU_DIVAPPR_Q_THRESHOLD 1652
-#define MUPI_DIV_QR_THRESHOLD 122
-#define MU_BDIV_QR_THRESHOLD 1387
-#define MU_BDIV_Q_THRESHOLD 1528
-
-#define POWM_SEC_TABLE 1,16,69,508,1378,2657,2825
-
-#define MATRIX22_STRASSEN_THRESHOLD 19
-#define HGCD_THRESHOLD 61
-#define HGCD_APPR_THRESHOLD 50
-#define HGCD_REDUCE_THRESHOLD 3389
-#define GCD_DC_THRESHOLD 492
-#define GCDEXT_DC_THRESHOLD 345
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 9
-#define GET_STR_PRECOMPUTE_THRESHOLD 21
-#define SET_STR_DC_THRESHOLD 189
-#define SET_STR_PRECOMPUTE_THRESHOLD 541
-
-#define FAC_DSC_THRESHOLD 141
-#define FAC_ODD_THRESHOLD 29
diff --git a/gmp/mpn/x86/bdiv_dbm1c.asm b/gmp/mpn/x86/bdiv_dbm1c.asm
index 0288c475cd..dbee28fd94 100644
--- a/gmp/mpn/x86/bdiv_dbm1c.asm
+++ b/gmp/mpn/x86/bdiv_dbm1c.asm
@@ -1,51 +1,32 @@
dnl x86 mpn_bdiv_dbm1.
-dnl Copyright 2008, 2011 Free Software Foundation, Inc.
+dnl Copyright 2008 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
-include(`../config.m4')
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-C cycles/limb
-C P5
-C P6 model 0-8,10-12)
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 5.1
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood) 13.67
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom
-C AMD K6
-C AMD K7 3.5
-C AMD K8
-C AMD K10
+include(`../config.m4')
+C cycles/limb
+C K7: 3.5
+C P4 m0: ?
+C P4 m1: ?
+C P4 m2: 13.67
+C P4 m3: ?
+C P4 m4: ?
+C P6-13: 5.1
C TODO
C * Optimize for more x86 processors
@@ -76,17 +57,18 @@ PROLOGUE(mpn_bdiv_dbm1c)
cmp $2, %eax
jc L(b1)
jz L(b2)
-
-L(b3): lea -8(%esi), %esi
- lea 8(%edi), %edi
- add $-3, %ebp
- jmp L(3)
+ jmp L(b3)
L(b0): mov 4(%esi), %eax
lea -4(%esi), %esi
lea 12(%edi), %edi
add $-4, %ebp
jmp L(0)
+L(b3):
+ lea -8(%esi), %esi
+ lea 8(%edi), %edi
+ add $-3, %ebp
+ jmp L(3)
L(b2): mov 4(%esi), %eax
lea 4(%esi), %esi
@@ -95,7 +77,8 @@ L(b2): mov 4(%esi), %eax
jmp L(2)
ALIGN(8)
-L(top): mov 4(%esi), %eax
+L(top):
+ mov 4(%esi), %eax
mul %ecx
lea 16(%edi), %edi
sub %eax, %ebx
diff --git a/gmp/mpn/x86/bdiv_q_1.asm b/gmp/mpn/x86/bdiv_q_1.asm
deleted file mode 100644
index 825cd296a1..0000000000
--- a/gmp/mpn/x86/bdiv_q_1.asm
+++ /dev/null
@@ -1,208 +0,0 @@
-dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division.
-
-dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
-
-dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb
-C P54 30.0
-C P55 29.0
-C P6 13.0 odd divisor, 12.0 even (strangely)
-C K6 14.0
-C K7 12.0
-C P4 42.0
-
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-
-defframe(PARAM_SHIFT, 24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(VAR_INVERSE,`PARAM_SRC')
-
- TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C mp_limb_t inverse, int shift)
-
- ALIGN(16)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_SHIFT, %ecx
- pushl %ebp FRAME_pushl()
-
- movl PARAM_INVERSE, %eax
- movl PARAM_SIZE, %ebp
- pushl %ebx FRAME_pushl()
-L(common):
- pushl %edi FRAME_pushl()
- pushl %esi FRAME_pushl()
-
- movl PARAM_SRC, %esi
- movl PARAM_DST, %edi
-
- leal (%esi,%ebp,4), %esi C src end
- leal (%edi,%ebp,4), %edi C dst end
- negl %ebp C -size
-
- movl %eax, VAR_INVERSE
- movl (%esi,%ebp,4), %eax C src[0]
-
- xorl %ebx, %ebx
- xorl %edx, %edx
-
- incl %ebp
- jz L(one)
-
- movl (%esi,%ebp,4), %edx C src[1]
-
- shrdl( %cl, %edx, %eax)
-
- movl VAR_INVERSE, %edx
- jmp L(entry)
-
-
- ALIGN(8)
- nop C k6 code alignment
- nop
-L(top):
- C eax q
- C ebx carry bit, 0 or -1
- C ecx shift
- C edx carry limb
- C esi src end
- C edi dst end
- C ebp counter, limbs, negative
-
- movl -4(%esi,%ebp,4), %eax
- subl %ebx, %edx C accumulate carry bit
-
- movl (%esi,%ebp,4), %ebx
-
- shrdl( %cl, %ebx, %eax)
-
- subl %edx, %eax C apply carry limb
- movl VAR_INVERSE, %edx
-
- sbbl %ebx, %ebx
-
-L(entry):
- imull %edx, %eax
-
- movl %eax, -4(%edi,%ebp,4)
- movl PARAM_DIVISOR, %edx
-
- mull %edx
-
- incl %ebp
- jnz L(top)
-
-
- movl -4(%esi), %eax C src high limb
-L(one):
- shrl %cl, %eax
- popl %esi FRAME_popl()
-
- addl %ebx, %eax C apply carry bit
-
- subl %edx, %eax C apply carry limb
-
- imull VAR_INVERSE, %eax
-
- movl %eax, -4(%edi)
-
- popl %edi
- popl %ebx
- popl %ebp
-
- ret
-
-EPILOGUE()
-
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C
-
- ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_DIVISOR, %eax
- pushl %ebp FRAME_pushl()
-
- movl $-1, %ecx C shift count
- movl PARAM_SIZE, %ebp
-
- pushl %ebx FRAME_pushl()
-
-L(strip_twos):
- incl %ecx
-
- shrl %eax
- jnc L(strip_twos)
-
- leal 1(%eax,%eax), %ebx C d without twos
- andl $127, %eax C d/2, 7 bits
-
-ifdef(`PIC',`
- LEA( binvert_limb_table, %edx)
- movzbl (%eax,%edx), %eax C inv 8 bits
-',`
- movzbl binvert_limb_table(%eax), %eax C inv 8 bits
-')
-
- leal (%eax,%eax), %edx C 2*inv
- movl %ebx, PARAM_DIVISOR C d without twos
- imull %eax, %eax C inv*inv
- imull %ebx, %eax C inv*inv*d
- subl %eax, %edx C inv = 2*inv - inv*inv*d
-
- leal (%edx,%edx), %eax C 2*inv
- imull %edx, %edx C inv*inv
- imull %ebx, %edx C inv*inv*d
- subl %edx, %eax C inv = 2*inv - inv*inv*d
-
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
- pushl %eax FRAME_pushl()
- imull PARAM_DIVISOR, %eax
- cmpl $1, %eax
- popl %eax FRAME_popl()')
-
- jmp L(common)
-EPILOGUE()
-
diff --git a/gmp/mpn/x86/bobcat/gmp-mparam.h b/gmp/mpn/x86/bobcat/gmp-mparam.h
deleted file mode 100644
index 198081f9fd..0000000000
--- a/gmp/mpn/x86/bobcat/gmp-mparam.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* x86/bobcat gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1600 MHz AMD Bobcat Zacate E-350 */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 12
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 2
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 40
-
-#define MUL_TOOM22_THRESHOLD 28
-#define MUL_TOOM33_THRESHOLD 90
-#define MUL_TOOM44_THRESHOLD 154
-#define MUL_TOOM6H_THRESHOLD 270
-#define MUL_TOOM8H_THRESHOLD 490
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 107
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 95
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 38
-#define SQR_TOOM3_THRESHOLD 121
-#define SQR_TOOM4_THRESHOLD 212
-#define SQR_TOOM6_THRESHOLD 303
-#define SQR_TOOM8_THRESHOLD 454
-
-#define MULMID_TOOM42_THRESHOLD 74
-
-#define MULMOD_BNM1_THRESHOLD 18
-#define SQRMOD_BNM1_THRESHOLD 23
-
-#define MUL_FFT_MODF_THRESHOLD 660 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 660, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \
- { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
- { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
- { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511,10}, { 271, 9}, { 543,11}, \
- { 159, 9}, { 639,10}, { 335, 9}, { 671,11}, \
- { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
- { 799,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
- { 1215,10}, { 671,12}, { 191,11}, { 383,10}, \
- { 799, 9}, { 1599,11}, { 415,13}, { 127,12}, \
- { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \
- { 1215,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,12}, { 447,11}, { 991,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,12}, { 959,14}, { 255,13}, { 511,12}, \
- { 1215,13}, { 639,12}, { 1471,13}, { 767,12}, \
- { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \
- { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
- { 1407,14}, { 767,13}, { 1663,12}, { 3455,13}, \
- { 1919,15}, { 511,14}, { 1023,13}, { 2175,12}, \
- { 4479,13}, { 2431,14}, { 1279,13}, { 2943,12}, \
- { 5887,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 139
-#define MUL_FFT_THRESHOLD 7552
-
-#define SQR_FFT_MODF_THRESHOLD 606 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 606, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
- { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
- { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
- { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
- { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
- { 543, 8}, { 1087,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671, 8}, { 1343,10}, \
- { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \
- { 399, 9}, { 799,10}, { 415, 9}, { 831,12}, \
- { 127,11}, { 255,10}, { 511, 9}, { 1023,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
- { 1215,11}, { 319,10}, { 671, 9}, { 1343,12}, \
- { 191,11}, { 383,10}, { 799,11}, { 415,10}, \
- { 831,13}, { 127,12}, { 255,11}, { 511,10}, \
- { 1023,11}, { 543,10}, { 1087,11}, { 607,10}, \
- { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 863,12}, { 447,11}, { 991,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1471,13}, { 383,12}, { 767,11}, { 1599,12}, \
- { 831,11}, { 1727,12}, { 959,14}, { 255,13}, \
- { 511,12}, { 1215,13}, { 639,12}, { 1471,13}, \
- { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \
- { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
- { 2431,13}, { 1407,14}, { 767,13}, { 1663,12}, \
- { 3455,13}, { 1919,15}, { 511,14}, { 1023,13}, \
- { 2175,12}, { 4479,13}, { 2431,14}, { 1279,13}, \
- { 2943,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 147
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 5
-#define MULLO_DC_THRESHOLD 45
-#define MULLO_MUL_N_THRESHOLD 14281
-
-#define DC_DIV_QR_THRESHOLD 71
-#define DC_DIVAPPR_Q_THRESHOLD 238
-#define DC_BDIV_QR_THRESHOLD 67
-#define DC_BDIV_Q_THRESHOLD 151
-
-#define INV_MULMOD_BNM1_THRESHOLD 66
-#define INV_NEWTON_THRESHOLD 228
-#define INV_APPR_THRESHOLD 222
-
-#define BINV_NEWTON_THRESHOLD 270
-#define REDC_1_TO_REDC_N_THRESHOLD 71
-
-#define MU_DIV_QR_THRESHOLD 1718
-#define MU_DIVAPPR_Q_THRESHOLD 1718
-#define MUPI_DIV_QR_THRESHOLD 91
-#define MU_BDIV_QR_THRESHOLD 1589
-#define MU_BDIV_Q_THRESHOLD 1718
-
-#define POWM_SEC_TABLE 1,16,96,416,1185
-
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 88
-#define HGCD_APPR_THRESHOLD 137
-#define HGCD_REDUCE_THRESHOLD 3664
-#define GCD_DC_THRESHOLD 465
-#define GCDEXT_DC_THRESHOLD 345
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 18
-#define GET_STR_PRECOMPUTE_THRESHOLD 34
-#define SET_STR_DC_THRESHOLD 270
-#define SET_STR_PRECOMPUTE_THRESHOLD 828
-
-#define FAC_DSC_THRESHOLD 256
-#define FAC_ODD_THRESHOLD 34
diff --git a/gmp/mpn/x86/cnd_aors_n.asm b/gmp/mpn/x86/cnd_aors_n.asm
deleted file mode 100644
index 74f4917ecc..0000000000
--- a/gmp/mpn/x86/cnd_aors_n.asm
+++ /dev/null
@@ -1,124 +0,0 @@
-dnl X86 mpn_cnd_add_n, mpn_cnd_sub_n
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 5.4
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 14.5
-C P4 model 3-4 (Prescott) 21
-C Intel atom 11
-C AMD K6 ?
-C AMD K7 3.4
-C AMD K8 ?
-
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebp')
-define(`n', `%ecx')
-define(`cnd', `20(%esp)')
-define(`cy', `%edx')
-
-ifdef(`OPERATION_cnd_add_n', `
- define(ADDSUB, add)
- define(ADCSBB, adc)
- define(func, mpn_cnd_add_n)')
-ifdef(`OPERATION_cnd_sub_n', `
- define(ADDSUB, sub)
- define(ADCSBB, sbb)
- define(func, mpn_cnd_sub_n)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func)
- add $-16, %esp
- mov %ebp, (%esp)
- mov %ebx, 4(%esp)
- mov %esi, 8(%esp)
- mov %edi, 12(%esp)
-
- C make cnd into a full mask
- mov cnd, %eax
- neg %eax
- sbb %eax, %eax
- mov %eax, cnd
-
- C load parameters into registers
- mov 24(%esp), rp
- mov 28(%esp), up
- mov 32(%esp), vp
- mov 36(%esp), n
-
- mov (vp), %eax
- mov (up), %ebx
-
- C put operand pointers just beyond their last limb
- lea (vp,n,4), vp
- lea (up,n,4), up
- lea -4(rp,n,4), rp
- neg n
-
- and cnd, %eax
- ADDSUB %eax, %ebx
- sbb cy, cy
- inc n
- je L(end)
-
- ALIGN(16)
-L(top): mov (vp,n,4), %eax
- and cnd, %eax
- mov %ebx, (rp,n,4)
- mov (up,n,4), %ebx
- add cy, cy
- ADCSBB %eax, %ebx
- sbb cy, cy
- inc n
- jne L(top)
-
-L(end): mov %ebx, (rp)
- xor %eax, %eax
- sub cy, %eax
-
- mov (%esp), %ebp
- mov 4(%esp), %ebx
- mov 8(%esp), %esi
- mov 12(%esp), %edi
- add $16, %esp
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/copyd.asm b/gmp/mpn/x86/copyd.asm
index 51fa19568b..4ce3bbbc69 100644
--- a/gmp/mpn/x86/copyd.asm
+++ b/gmp/mpn/x86/copyd.asm
@@ -1,42 +1,31 @@
dnl x86 mpn_copyd -- copy limb vector, decrementing.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb startup (approx)
-C P5 1.0 40
-C P6 2.4 70
-C K6 1.0 55
-C K7 1.3 75
-C P4 2.6 175
+C P5: 1.0 40
+C P6 2.4 70
+C K6 1.0 55
+C K7: 1.3 75
+C P4: 2.6 175
C
C (Startup time includes some function call overheads.)
diff --git a/gmp/mpn/x86/copyi.asm b/gmp/mpn/x86/copyi.asm
index f6b0354b4f..c6bbaeee65 100644
--- a/gmp/mpn/x86/copyi.asm
+++ b/gmp/mpn/x86/copyi.asm
@@ -1,42 +1,31 @@
dnl x86 mpn_copyi -- copy limb vector, incrementing.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb startup (approx)
-C P5 1.0 35
-C P6 0.75 45
-C K6 1.0 30
-C K7 1.3 65
-C P4 1.0 120
+C P5: 1.0 35
+C P6 0.75 45
+C K6 1.0 30
+C K7: 1.3 65
+C P4: 1.0 120
C
C (Startup time includes some function call overheads.)
diff --git a/gmp/mpn/x86/core2/gmp-mparam.h b/gmp/mpn/x86/core2/gmp-mparam.h
deleted file mode 100644
index b370eb5877..0000000000
--- a/gmp/mpn/x86/core2/gmp-mparam.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/* x86/core2 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2133 MHz Core 2 (65nm) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 3
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 19
-
-#define MUL_TOOM22_THRESHOLD 26
-#define MUL_TOOM33_THRESHOLD 90
-#define MUL_TOOM44_THRESHOLD 144
-#define MUL_TOOM6H_THRESHOLD 286
-#define MUL_TOOM8H_THRESHOLD 430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 140
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 136
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 34
-#define SQR_TOOM3_THRESHOLD 114
-#define SQR_TOOM4_THRESHOLD 178
-#define SQR_TOOM6_THRESHOLD 262
-#define SQR_TOOM8_THRESHOLD 357
-
-#define MULMID_TOOM42_THRESHOLD 66
-
-#define MULMOD_BNM1_THRESHOLD 15
-#define SQRMOD_BNM1_THRESHOLD 21
-
-#define MUL_FFT_MODF_THRESHOLD 600 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 600, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \
- { 25, 7}, { 13, 6}, { 29, 7}, { 15, 6}, \
- { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \
- { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
- { 43, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
- { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
- { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
- { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
- { 95, 9}, { 191,11}, { 63,10}, { 159,11}, \
- { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
- { 271, 9}, { 543, 8}, { 1087,11}, { 159,10}, \
- { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
- { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \
- { 399, 9}, { 799,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 543, 9}, { 1087,11}, { 287,10}, \
- { 607, 9}, { 1215,11}, { 319,10}, { 671,11}, \
- { 351,12}, { 191,11}, { 383,10}, { 799, 9}, \
- { 1599,13}, { 127,12}, { 255,11}, { 543,10}, \
- { 1087,11}, { 607,10}, { 1215,12}, { 319,11}, \
- { 671,10}, { 1343,11}, { 735,12}, { 383,11}, \
- { 799,10}, { 1599,11}, { 863,10}, { 1727,12}, \
- { 447,11}, { 959,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \
- { 1343,12}, { 703,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1727,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1087,11}, { 2239,10}, \
- { 4479,12}, { 1215,13}, { 639,12}, { 1471,11}, \
- { 2943,13}, { 767,12}, { 1727,13}, { 895,12}, \
- { 1983,14}, { 511,13}, { 1023,12}, { 2239,11}, \
- { 4479,13}, { 1151,12}, { 2495,13}, { 1279,12}, \
- { 2623,13}, { 1407,12}, { 2815,14}, { 767,13}, \
- { 1663,12}, { 3455,13}, { 1919,15}, { 511,14}, \
- { 1023,13}, { 2175,12}, { 4479,13}, { 2431,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \
- { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 149
-#define MUL_FFT_THRESHOLD 6784
-
-#define SQR_FFT_MODF_THRESHOLD 500 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 500, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
- { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 55,10}, { 31, 9}, \
- { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
- { 63, 9}, { 127,10}, { 79, 9}, { 159,10}, \
- { 95,11}, { 63,10}, { 143, 9}, { 287,10}, \
- { 159,11}, { 95,12}, { 63,11}, { 127,10}, \
- { 271, 9}, { 543,10}, { 287,11}, { 159,10}, \
- { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
- { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \
- { 831,10}, { 431,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 543, 9}, { 1087,11}, { 287,10}, \
- { 607,11}, { 319,10}, { 671,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 799,11}, \
- { 415,10}, { 863,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,12}, { 447,11}, { 959,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \
- { 383,12}, { 831,11}, { 1727,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1087,11}, { 2239,12}, \
- { 1215,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \
- { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \
- { 2495,13}, { 1407,12}, { 2943,14}, { 767,13}, \
- { 1663,12}, { 3455,13}, { 1919,15}, { 511,14}, \
- { 1023,13}, { 2175,12}, { 4479,13}, { 2431,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \
- { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 145
-#define SQR_FFT_THRESHOLD 5312
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 29
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 21
-#define DC_DIVAPPR_Q_THRESHOLD 50
-#define DC_BDIV_QR_THRESHOLD 79
-#define DC_BDIV_Q_THRESHOLD 174
-
-#define INV_MULMOD_BNM1_THRESHOLD 50
-#define INV_NEWTON_THRESHOLD 39
-#define INV_APPR_THRESHOLD 37
-
-#define BINV_NEWTON_THRESHOLD 318
-#define REDC_1_TO_REDC_N_THRESHOLD 87
-
-#define MU_DIV_QR_THRESHOLD 1099
-#define MU_DIVAPPR_Q_THRESHOLD 792
-#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 1442
-#define MU_BDIV_Q_THRESHOLD 1589
-
-#define POWM_SEC_TABLE 3,32,95,480,597,2657
-
-#define MATRIX22_STRASSEN_THRESHOLD 21
-#define HGCD_THRESHOLD 83
-#define HGCD_APPR_THRESHOLD 159
-#define HGCD_REDUCE_THRESHOLD 3389
-#define GCD_DC_THRESHOLD 379
-#define GCDEXT_DC_THRESHOLD 309
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 10
-#define GET_STR_PRECOMPUTE_THRESHOLD 25
-#define SET_STR_DC_THRESHOLD 442
-#define SET_STR_PRECOMPUTE_THRESHOLD 1104
-
-#define FAC_DSC_THRESHOLD 155
-#define FAC_ODD_THRESHOLD 34
diff --git a/gmp/mpn/x86/coreihwl/gmp-mparam.h b/gmp/mpn/x86/coreihwl/gmp-mparam.h
deleted file mode 100644
index e2b289cc3c..0000000000
--- a/gmp/mpn/x86/coreihwl/gmp-mparam.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/* x86/coreihwl gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2900 MHz Core i5 Haswell */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 16
-#define MOD_1_UNNORM_THRESHOLD 13
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 5
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 15
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 19
-
-#define MUL_TOOM22_THRESHOLD 27
-#define MUL_TOOM33_THRESHOLD 90
-#define MUL_TOOM44_THRESHOLD 218
-#define MUL_TOOM6H_THRESHOLD 318
-#define MUL_TOOM8H_THRESHOLD 490
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 153
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 101
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 44
-#define SQR_TOOM3_THRESHOLD 137
-#define SQR_TOOM4_THRESHOLD 242
-#define SQR_TOOM6_THRESHOLD 351
-#define SQR_TOOM8_THRESHOLD 597
-
-#define MULMID_TOOM42_THRESHOLD 98
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 21
-
-#define MUL_FFT_MODF_THRESHOLD 630 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 630, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 15, 5}, { 31, 6}, { 28, 7}, { 15, 6}, \
- { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \
- { 111,11}, { 63,10}, { 159,11}, { 95,10}, \
- { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,11}, { 159,10}, \
- { 319, 9}, { 639,10}, { 335, 9}, { 671,11}, \
- { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
- { 799,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 319,10}, { 671,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,10}, { 1727,12}, { 447,11}, { 959,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \
- { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1727,10}, { 3455,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \
- { 1471,11}, { 2943,10}, { 5887,13}, { 767,12}, \
- { 1727,11}, { 3455,13}, { 895,12}, { 1983,14}, \
- { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \
- { 2495,13}, { 1279,12}, { 2559,13}, { 1407,12}, \
- { 2943,11}, { 5887,14}, { 767,13}, { 1535,12}, \
- { 3071,13}, { 1663,12}, { 3455,13}, { 1919,15}, \
- { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \
- { 2431,14}, { 1279,13}, { 2943,12}, { 5887,14}, \
- { 1535,13}, { 3455,14}, { 1791,13}, { 3967,12}, \
- { 7935,15}, { 1023,14}, { 2047,13}, { 4479,14}, \
- { 2303,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 168
-#define MUL_FFT_THRESHOLD 7424
-
-#define SQR_FFT_MODF_THRESHOLD 530 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 530, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
- { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
- { 27, 7}, { 55, 9}, { 15, 8}, { 31, 7}, \
- { 63, 8}, { 39, 9}, { 23, 8}, { 55,10}, \
- { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \
- { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \
- { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
- { 799,12}, { 127,11}, { 255,10}, { 511, 9}, \
- { 1023,10}, { 543,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,11}, { 351,12}, { 191,11}, \
- { 383,10}, { 799,11}, { 415,10}, { 831,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
- { 543,10}, { 1087,11}, { 607,12}, { 319,11}, \
- { 671,10}, { 1343,11}, { 735,10}, { 1471,12}, \
- { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \
- { 1727,12}, { 447,11}, { 991,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,12}, { 959,11}, { 1983,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \
- { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \
- { 1471,11}, { 2943,13}, { 767,12}, { 1727,13}, \
- { 895,12}, { 1983,14}, { 511,13}, { 1023,12}, \
- { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \
- { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \
- { 1535,12}, { 3071,13}, { 1663,12}, { 3455,13}, \
- { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \
- { 2175,12}, { 4479,13}, { 2431,12}, { 4863,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \
- { 3455,14}, { 1791,13}, { 3967,15}, { 1023,14}, \
- { 2047,13}, { 4479,14}, { 2303,13}, { 8192,14}, \
- { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 170
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 57
-#define MULLO_MUL_N_THRESHOLD 14281
-
-#define DC_DIV_QR_THRESHOLD 23
-#define DC_DIVAPPR_Q_THRESHOLD 63
-#define DC_BDIV_QR_THRESHOLD 87
-#define DC_BDIV_Q_THRESHOLD 204
-
-#define INV_MULMOD_BNM1_THRESHOLD 54
-#define INV_NEWTON_THRESHOLD 75
-#define INV_APPR_THRESHOLD 67
-
-#define BINV_NEWTON_THRESHOLD 296
-#define REDC_1_TO_REDC_N_THRESHOLD 79
-
-#define MU_DIV_QR_THRESHOLD 872
-#define MU_DIVAPPR_Q_THRESHOLD 654
-#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 1858
-#define MU_BDIV_Q_THRESHOLD 2089
-
-#define POWM_SEC_TABLE 1,17,127,508,1603
-
-#define MATRIX22_STRASSEN_THRESHOLD 19
-#define HGCD_THRESHOLD 61
-#define HGCD_APPR_THRESHOLD 60
-#define HGCD_REDUCE_THRESHOLD 3810
-#define GCD_DC_THRESHOLD 263
-#define GCDEXT_DC_THRESHOLD 278
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 11
-#define GET_STR_PRECOMPUTE_THRESHOLD 21
-#define SET_STR_DC_THRESHOLD 527
-#define SET_STR_PRECOMPUTE_THRESHOLD 1178
-
-#define FAC_DSC_THRESHOLD 187
-#define FAC_ODD_THRESHOLD 34
diff --git a/gmp/mpn/x86/coreinhm/gmp-mparam.h b/gmp/mpn/x86/coreinhm/gmp-mparam.h
deleted file mode 100644
index 13289c0c23..0000000000
--- a/gmp/mpn/x86/coreinhm/gmp-mparam.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* x86/coreinhm gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2667 MHz Core i7 Nehalem */
-/* FFT tuning limit = 100000000 */
-/* Generated by tuneup.c, 2014-03-19, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 25
-#define MOD_1_UNNORM_THRESHOLD 15
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 3
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 18
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 15
-
-#define MUL_TOOM22_THRESHOLD 26
-#define MUL_TOOM33_THRESHOLD 89
-#define MUL_TOOM44_THRESHOLD 214
-#define MUL_TOOM6H_THRESHOLD 327
-#define MUL_TOOM8H_THRESHOLD 466
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 159
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 95
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 101
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 142
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 44
-#define SQR_TOOM3_THRESHOLD 145
-#define SQR_TOOM4_THRESHOLD 232
-#define SQR_TOOM6_THRESHOLD 342
-#define SQR_TOOM8_THRESHOLD 502
-
-#define MULMID_TOOM42_THRESHOLD 78
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 21
-
-#define MUL_FFT_MODF_THRESHOLD 606 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 606, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \
- { 15, 5}, { 33, 6}, { 29, 7}, { 15, 6}, \
- { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \
- { 39, 7}, { 23, 6}, { 47, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \
- { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
- { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \
- { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399,12}, { 127,11}, { 255,10}, \
- { 511, 9}, { 1023,10}, { 543,11}, { 287,10}, \
- { 607,11}, { 319,10}, { 639,12}, { 191,11}, \
- { 383,10}, { 767,13}, { 127,12}, { 255,11}, \
- { 511,10}, { 1023,11}, { 543,10}, { 1087,11}, \
- { 607,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,10}, { 1727,12}, { 447,11}, { 927,10}, \
- { 1855,11}, { 991,13}, { 255,12}, { 511,11}, \
- { 1119,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,12}, { 895,11}, { 1855,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1023,11}, { 2111,12}, \
- { 1087,11}, { 2239,10}, { 4479,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,13}, { 767,12}, \
- { 1727,11}, { 3455,13}, { 895,12}, { 1983,11}, \
- { 3967,14}, { 511,13}, { 1023,12}, { 2239,11}, \
- { 4479,13}, { 1151,12}, { 2495,11}, { 4991,13}, \
- { 1279,12}, { 2623,13}, { 1407,12}, { 2943,14}, \
- { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \
- { 3455,13}, { 1919,12}, { 3967,15}, { 511,14}, \
- { 1023,13}, { 2175,12}, { 4479,13}, { 2431,12}, \
- { 4991,14}, { 1279,13}, { 2687,12}, { 5503,13}, \
- { 2943,12}, { 6015,14}, { 1535,13}, { 3455,14}, \
- { 1791,13}, { 3967,12}, { 7935,15}, { 1023,14}, \
- { 2047,13}, { 4479,14}, { 2303,13}, { 4991,12}, \
- { 9983,14}, { 2559,13}, { 5503,14}, { 2815,13}, \
- { 6015,15}, { 1535,14}, { 3839,13}, { 7935,16}, \
- { 1023,15}, { 2047,14}, { 4095,13}, { 8191,12}, \
- { 16383,11}, { 32767,10}, { 65535, 9}, { 131071, 8}, \
- { 256, 9}, { 512,10}, { 1024,11}, { 2048,12}, \
- { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 192
-#define MUL_FFT_THRESHOLD 6784
-
-#define SQR_FFT_MODF_THRESHOLD 555 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 555, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
- { 16, 5}, { 33, 6}, { 29, 7}, { 15, 6}, \
- { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
- { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
- { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
- { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 159,10}, { 95,11}, \
- { 63,10}, { 143, 9}, { 287,10}, { 159,11}, \
- { 95,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511, 8}, { 1023,10}, { 271, 9}, { 543,10}, \
- { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \
- { 335, 9}, { 671,10}, { 351,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \
- { 415,12}, { 127,11}, { 255,10}, { 511, 9}, \
- { 1023,10}, { 543,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,11}, { 351,12}, { 191,11}, \
- { 383,10}, { 799,11}, { 415,13}, { 127,12}, \
- { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
- { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \
- { 1343,11}, { 735,10}, { 1471,12}, { 383,11}, \
- { 799,10}, { 1599,11}, { 863,10}, { 1727,12}, \
- { 447,11}, { 991,10}, { 1983,13}, { 255,12}, \
- { 511,11}, { 1023,10}, { 2047,11}, { 1087,12}, \
- { 575,11}, { 1215,10}, { 2431,12}, { 639,11}, \
- { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \
- { 767,11}, { 1599,12}, { 831,11}, { 1727,10}, \
- { 3455,12}, { 895,11}, { 1791,12}, { 959,11}, \
- { 1983,14}, { 255,13}, { 511,12}, { 1023,11}, \
- { 2111,12}, { 1087,11}, { 2239,10}, { 4479,12}, \
- { 1215,11}, { 2431,13}, { 639,12}, { 1471,11}, \
- { 2943,13}, { 767,12}, { 1727,11}, { 3455,13}, \
- { 895,12}, { 1983,11}, { 3967,14}, { 511,13}, \
- { 1023,12}, { 2239,11}, { 4479,13}, { 1151,12}, \
- { 2495,13}, { 1279,12}, { 2623,13}, { 1407,12}, \
- { 2943,14}, { 767,13}, { 1663,12}, { 3455,13}, \
- { 1919,12}, { 3967,15}, { 511,14}, { 1023,13}, \
- { 2175,12}, { 4479,13}, { 2431,12}, { 4863,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \
- { 3455,14}, { 1791,13}, { 3967,12}, { 7935,15}, \
- { 1023,14}, { 2047,13}, { 4479,14}, { 2303,13}, \
- { 4991,12}, { 9983,14}, { 2815,13}, { 5887,15}, \
- { 1535,14}, { 3327,13}, { 6655,14}, { 3839,13}, \
- { 7935,16}, { 1023,15}, { 2047,14}, { 4095,13}, \
- { 8191,12}, { 16383,11}, { 32767,10}, { 65535, 9}, \
- { 131071, 8}, { 256, 9}, { 512,10}, { 1024,11}, \
- { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
- { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 201
-#define SQR_FFT_THRESHOLD 5312
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 38
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 22
-#define DC_DIVAPPR_Q_THRESHOLD 43
-#define DC_BDIV_QR_THRESHOLD 78
-#define DC_BDIV_Q_THRESHOLD 157
-
-#define INV_MULMOD_BNM1_THRESHOLD 50
-#define INV_NEWTON_THRESHOLD 15
-#define INV_APPR_THRESHOLD 18
-
-#define BINV_NEWTON_THRESHOLD 351
-#define REDC_1_TO_REDC_N_THRESHOLD 84
-
-#define MU_DIV_QR_THRESHOLD 889
-#define MU_DIVAPPR_Q_THRESHOLD 483
-#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 1589
-#define MU_BDIV_Q_THRESHOLD 1787
-
-#define POWM_SEC_TABLE 2,25,95,473,1357
-
-#define MATRIX22_STRASSEN_THRESHOLD 20
-#define HGCD_THRESHOLD 52
-#define HGCD_APPR_THRESHOLD 51
-#define HGCD_REDUCE_THRESHOLD 3524
-#define GCD_DC_THRESHOLD 213
-#define GCDEXT_DC_THRESHOLD 249
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 24
-#define SET_STR_DC_THRESHOLD 145
-#define SET_STR_PRECOMPUTE_THRESHOLD 545
-
-#define FAC_DSC_THRESHOLD 91
-#define FAC_ODD_THRESHOLD 29
diff --git a/gmp/mpn/x86/coreisbr/gmp-mparam.h b/gmp/mpn/x86/coreisbr/gmp-mparam.h
deleted file mode 100644
index 9b227a71ba..0000000000
--- a/gmp/mpn/x86/coreisbr/gmp-mparam.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/* x86/coreisbr gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 3300 MHz Core i5 Sandy Bridge */
-/* FFT tuning limit = 40000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 18
-#define MOD_1_UNNORM_THRESHOLD 11
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 9
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 16
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 19
-
-#define MUL_TOOM22_THRESHOLD 28
-#define MUL_TOOM33_THRESHOLD 99
-#define MUL_TOOM44_THRESHOLD 160
-#define MUL_TOOM6H_THRESHOLD 268
-#define MUL_TOOM8H_THRESHOLD 490
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 106
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 140
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 109
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 108
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 137
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 48
-#define SQR_TOOM3_THRESHOLD 105
-#define SQR_TOOM4_THRESHOLD 256
-#define SQR_TOOM6_THRESHOLD 366
-#define SQR_TOOM8_THRESHOLD 562
-
-#define MULMID_TOOM42_THRESHOLD 98
-
-#define MULMOD_BNM1_THRESHOLD 19
-#define SQRMOD_BNM1_THRESHOLD 23
-
-#define MUL_FFT_MODF_THRESHOLD 636 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 636, 5}, { 27, 6}, { 28, 7}, { 15, 6}, \
- { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 55,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
- { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 399, 9}, { 799,11}, \
- { 223,12}, { 127,11}, { 255,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 319,10}, { 671,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,12}, { 447,11}, { 959,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \
- { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1471,13}, { 383,12}, { 767,11}, { 1599,12}, \
- { 831,11}, { 1727,12}, { 959,14}, { 255,13}, \
- { 511,12}, { 1087,11}, { 2239,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \
- { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \
- { 2431,13}, { 1279,12}, { 2559,13}, { 1407,12}, \
- { 2943,14}, { 767,13}, { 1535,12}, { 3071,13}, \
- { 1663,12}, { 3455,13}, { 1919,15}, { 511,14}, \
- { 1023,13}, { 2175,12}, { 4479,13}, { 2431,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \
- { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 149
-#define MUL_FFT_THRESHOLD 7424
-
-#define SQR_FFT_MODF_THRESHOLD 555 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 555, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
- { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
- { 43, 9}, { 23, 8}, { 51, 9}, { 31, 8}, \
- { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
- { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
- { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 543,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \
- { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
- { 799,10}, { 415,12}, { 127,11}, { 255,10}, \
- { 511, 9}, { 1023,10}, { 543,11}, { 287,10}, \
- { 607,11}, { 319,10}, { 671,11}, { 351,12}, \
- { 191,11}, { 383,10}, { 799,11}, { 415,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,10}, { 1727,12}, { 447,11}, { 959,10}, \
- { 1919,11}, { 991,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,12}, { 959,11}, { 1919,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \
- { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \
- { 1471,11}, { 2943,13}, { 767,12}, { 1727,13}, \
- { 895,12}, { 1983,14}, { 511,13}, { 1023,12}, \
- { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \
- { 2623,13}, { 1407,12}, { 2943,14}, { 767,13}, \
- { 1663,12}, { 3455,13}, { 1919,12}, { 3839,15}, \
- { 511,14}, { 1023,13}, { 2175,12}, { 4479,13}, \
- { 2431,12}, { 4863,14}, { 1279,13}, { 2943,12}, \
- { 5887,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 159
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 62
-#define MULLO_MUL_N_THRESHOLD 14281
-
-#define DC_DIV_QR_THRESHOLD 25
-#define DC_DIVAPPR_Q_THRESHOLD 43
-#define DC_BDIV_QR_THRESHOLD 99
-#define DC_BDIV_Q_THRESHOLD 240
-
-#define INV_MULMOD_BNM1_THRESHOLD 54
-#define INV_NEWTON_THRESHOLD 14
-#define INV_APPR_THRESHOLD 13
-
-#define BINV_NEWTON_THRESHOLD 363
-#define REDC_1_TO_REDC_N_THRESHOLD 90
-
-#define MU_DIV_QR_THRESHOLD 998
-#define MU_DIVAPPR_Q_THRESHOLD 667
-#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 1787
-#define MU_BDIV_Q_THRESHOLD 2130
-
-#define POWM_SEC_TABLE 1,16,126,480,1317
-
-#define MATRIX22_STRASSEN_THRESHOLD 21
-#define HGCD_THRESHOLD 61
-#define HGCD_APPR_THRESHOLD 56
-#define HGCD_REDUCE_THRESHOLD 3810
-#define GCD_DC_THRESHOLD 283
-#define GCDEXT_DC_THRESHOLD 309
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 21
-#define SET_STR_DC_THRESHOLD 399
-#define SET_STR_PRECOMPUTE_THRESHOLD 1183
-
-#define FAC_DSC_THRESHOLD 194
-#define FAC_ODD_THRESHOLD 34
diff --git a/gmp/mpn/x86/darwin.m4 b/gmp/mpn/x86/darwin.m4
index f8363db3f7..7ef8dfc105 100644
--- a/gmp/mpn/x86/darwin.m4
+++ b/gmp/mpn/x86/darwin.m4
@@ -1,82 +1,40 @@
divert(-1)
-dnl Copyright 2007, 2011, 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl Copyright 2007 Free Software Foundation, Inc.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
define(`DARWIN')
-
dnl Usage LEA(symbol,reg)
dnl
-dnl We maintain lists of stuff to append in load_eip and darwin_bd. The
-dnl `index' stuff is needed to suppress repeated definitions. To avoid
-dnl getting fooled by "var" and "var1", we add 'bol ' (the end of
-dnl 'indirect_symbol') at the beginning and and a newline at the end. This
-dnl might be a bit fragile.
+dnl FIXME: Only handles one symbol per assembly file because of the
+dnl way EPILOGUE_cpu is handled.
-define(`LEA',
-m4_assert_numargs(2)
-`ifdef(`PIC',`
-ifelse(index(defn(`load_eip'), `$2'),-1,
-`m4append(`load_eip',
-`L(movl_eip_`'substr($2,1)):
+define(`LEA',`
+define(`EPILOGUE_cpu',
+` L(movl_eip_`'substr($2,1)):
movl (%esp), $2
ret_internal
-')')
-ifelse(index(defn(`darwin_bd'), `bol $1
-'),-1,
-`m4append(`darwin_bd',
-` .section __IMPORT,__pointers,non_lazy_symbol_pointers
+ .section __IMPORT,__pointers,non_lazy_symbol_pointers
L($1`'$non_lazy_ptr):
.indirect_symbol $1
.long 0
-')')
+')
call L(movl_eip_`'substr($2,1))
movl L($1`'$non_lazy_ptr)-.($2), $2
-',`
- movl `$'$1, $2
-')')
-
-
-dnl EPILOGUE_cpu
-
-define(`EPILOGUE_cpu',`load_eip`'darwin_bd')
-
-define(`load_eip', `') dnl updated in LEA
-define(`darwin_bd', `') dnl updated in LEA
-
-
-dnl Usage: CALL(funcname)
-dnl
-
-define(`CALL',
-m4_assert_numargs(1)
-`call GSYM_PREFIX`'$1')
-
-undefine(`PIC_WITH_EBX')
+')
divert`'dnl
diff --git a/gmp/mpn/x86/dive_1.asm b/gmp/mpn/x86/dive_1.asm
index 9a6cbb7931..d2d02f9f72 100644
--- a/gmp/mpn/x86/dive_1.asm
+++ b/gmp/mpn/x86/dive_1.asm
@@ -1,32 +1,21 @@
dnl x86 mpn_divexact_1 -- mpn by limb exact division.
dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -108,7 +97,7 @@ ifdef(`PIC',`
subl %edx, %eax C inv = 2*inv - inv*inv*d
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax FRAME_pushl()
imull PARAM_DIVISOR, %eax
cmpl $1, %eax
diff --git a/gmp/mpn/x86/divrem_1.asm b/gmp/mpn/x86/divrem_1.asm
index 255d4935c3..a5fb88071d 100644
--- a/gmp/mpn/x86/divrem_1.asm
+++ b/gmp/mpn/x86/divrem_1.asm
@@ -1,32 +1,22 @@
dnl x86 mpn_divrem_1 -- mpn by limb division extending to fractional quotient.
-dnl Copyright 1999-2003, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/divrem_2.asm b/gmp/mpn/x86/divrem_2.asm
index 4c38ad0acb..bbadda921c 100644
--- a/gmp/mpn/x86/divrem_2.asm
+++ b/gmp/mpn/x86/divrem_2.asm
@@ -3,30 +3,19 @@ dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
dnl Copyright 2007, 2008 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -92,7 +81,7 @@ PROLOGUE(mpn_divrem_2)
seta %dl
cmp 20(%esp), %ebp
setae %al
- orb %dl, %al C "orb" form to placate Sun tools
+ or %dl, %al
jne L(35)
L(8):
mov 60(%esp), %esi C fn
@@ -185,7 +174,7 @@ L(9): mov 64(%esp), %esi C up
L(fix): seta %dl
cmp 20(%esp), %ebp
setae %al
- orb %dl, %al C "orb" form to placate Sun tools
+ or %dl, %al
je L(bck)
inc %edi
sub 20(%esp), %ebp
diff --git a/gmp/mpn/x86/fat/com.c b/gmp/mpn/x86/fat/com.c
deleted file mode 100644
index d359d4ce73..0000000000
--- a/gmp/mpn/x86/fat/com.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_com.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/com.c"
diff --git a/gmp/mpn/x86/fat/diveby3.c b/gmp/mpn/x86/fat/diveby3.c
new file mode 100644
index 0000000000..7ea0161b72
--- /dev/null
+++ b/gmp/mpn/x86/fat/diveby3.c
@@ -0,0 +1,21 @@
+/* Fat binary fallback mpn_divexact_by3c.
+
+Copyright 2003, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/diveby3.c"
diff --git a/gmp/mpn/x86/fat/fat.c b/gmp/mpn/x86/fat/fat.c
index 1740813886..c3d1866c69 100644
--- a/gmp/mpn/x86/fat/fat.c
+++ b/gmp/mpn/x86/fat/fat.c
@@ -4,33 +4,22 @@
THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
COMPLETELY IN FUTURE GNU MP RELEASES.
-Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc.
+Copyright 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h> /* for printf */
#include <stdlib.h> /* for getenv */
@@ -42,10 +31,14 @@ see https://www.gnu.org/licenses/. */
/* Change this to "#define TRACE(x) x" for some traces. */
#define TRACE(x)
+/* Change this to 1 to take the cpuid from GMP_CPU_TYPE env var. */
+#define WANT_FAKE_CPUID 0
+
/* fat_entry.asm */
-long __gmpn_cpuid (char [12], int);
-int __gmpn_cpuid_available (void);
+long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
+int __gmpn_cpuid_available __GMP_PROTO ((void));
+
#if WANT_FAKE_CPUID
@@ -56,9 +49,8 @@ int __gmpn_cpuid_available (void);
#define __gmpn_cpuid fake_cpuid
#define __gmpn_cpuid_available fake_cpuid_available
-#define MAKE_FMS(family, model) \
- ((((family) & 0xf) << 8) + (((family) & 0xff0) << 20) \
- + (((model) & 0xf) << 4) + (((model) & 0xf0) << 12))
+#define MAKE_FMS(family, model) \
+ (((family) << 8) + ((model << 4)))
static struct {
const char *name;
@@ -72,29 +64,17 @@ static struct {
{ "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
{ "pentium2", "GenuineIntel", MAKE_FMS (6, 2) },
{ "pentium3", "GenuineIntel", MAKE_FMS (6, 7) },
- { "pentium4", "GenuineIntel", MAKE_FMS (15, 2) },
- { "prescott", "GenuineIntel", MAKE_FMS (15, 3) },
- { "nocona", "GenuineIntel", MAKE_FMS (15, 4) },
- { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) },
- { "coreinhm", "GenuineIntel", MAKE_FMS (6, 0x1a) },
- { "coreiwsm", "GenuineIntel", MAKE_FMS (6, 0x25) },
- { "coreisbr", "GenuineIntel", MAKE_FMS (6, 0x2a) },
- { "coreihwl", "GenuineIntel", MAKE_FMS (6, 0x3c) },
- { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) },
+ { "pentium4", "GenuineIntel", MAKE_FMS (7, 0) },
{ "k5", "AuthenticAMD", MAKE_FMS (5, 0) },
{ "k6", "AuthenticAMD", MAKE_FMS (5, 3) },
{ "k62", "AuthenticAMD", MAKE_FMS (5, 8) },
{ "k63", "AuthenticAMD", MAKE_FMS (5, 9) },
{ "athlon", "AuthenticAMD", MAKE_FMS (6, 0) },
- { "k8", "AuthenticAMD", MAKE_FMS (15, 0) },
- { "k10", "AuthenticAMD", MAKE_FMS (16, 0) },
- { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) },
- { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) },
+ { "x86_64", "AuthenticAMD", MAKE_FMS (15, 0) },
{ "viac3", "CentaurHauls", MAKE_FMS (6, 0) },
{ "viac32", "CentaurHauls", MAKE_FMS (6, 9) },
- { "nano", "CentaurHauls", MAKE_FMS (6, 15) },
};
static int
@@ -148,46 +128,28 @@ typedef DECL_preinv_mod_1 ((*preinv_mod_1_t));
struct cpuvec_t __gmpn_cpuvec = {
__MPN(add_n_init),
- 0,
- 0,
__MPN(addmul_1_init),
- 0,
- __MPN(bdiv_dbm1c_init),
- __MPN(cnd_add_n_init),
- __MPN(cnd_sub_n_init),
- __MPN(com_init),
__MPN(copyd_init),
__MPN(copyi_init),
__MPN(divexact_1_init),
+ __MPN(divexact_by3c_init),
__MPN(divrem_1_init),
__MPN(gcd_1_init),
__MPN(lshift_init),
- __MPN(lshiftc_init),
__MPN(mod_1_init),
- __MPN(mod_1_1p_init),
- __MPN(mod_1_1p_cps_init),
- __MPN(mod_1s_2p_init),
- __MPN(mod_1s_2p_cps_init),
- __MPN(mod_1s_4p_init),
- __MPN(mod_1s_4p_cps_init),
__MPN(mod_34lsub1_init),
__MPN(modexact_1c_odd_init),
__MPN(mul_1_init),
__MPN(mul_basecase_init),
- __MPN(mullo_basecase_init),
__MPN(preinv_divrem_1_init),
__MPN(preinv_mod_1_init),
- __MPN(redc_1_init),
- __MPN(redc_2_init),
__MPN(rshift_init),
__MPN(sqr_basecase_init),
__MPN(sub_n_init),
- 0,
__MPN(submul_1_init),
0
};
-int __gmpn_cpuvec_initialized = 0;
/* The following setups start with generic x86, then overwrite with
specifics for a chip, and higher versions of that chip.
@@ -257,107 +219,21 @@ __gmpn_cpuvec_init (void)
case 6:
TRACE (printf (" p6\n"));
CPUVEC_SETUP_p6;
- switch (model)
- {
- case 0x00:
- case 0x01:
- TRACE (printf (" pentiumpro\n"));
- break;
-
- case 0x02:
- case 0x03:
- case 0x04:
- case 0x05:
- case 0x06:
- TRACE (printf (" pentium2\n"));
- CPUVEC_SETUP_p6_mmx;
- break;
-
- case 0x07:
- case 0x08:
- case 0x0a:
- case 0x0b:
- case 0x0c:
- TRACE (printf (" pentium3\n"));
- CPUVEC_SETUP_p6_mmx;
- CPUVEC_SETUP_p6_p3mmx;
- break;
-
- case 0x09: /* Banias */
- case 0x0d: /* Dothan */
- case 0x0e: /* Yonah */
- TRACE (printf (" Banias/Dothan/Yonah\n"));
- CPUVEC_SETUP_p6_mmx;
- CPUVEC_SETUP_p6_p3mmx;
- CPUVEC_SETUP_p6_sse2;
- break;
-
- case 0x0f: /* Conroe Merom Kentsfield Allendale */
- case 0x10:
- case 0x11:
- case 0x12:
- case 0x13:
- case 0x14:
- case 0x15:
- case 0x16:
- case 0x17: /* PNR Wolfdale Yorkfield */
- case 0x18:
- case 0x19:
- case 0x1d: /* PNR Dunnington */
- TRACE (printf (" Conroe\n"));
- CPUVEC_SETUP_p6_mmx;
- CPUVEC_SETUP_p6_p3mmx;
- CPUVEC_SETUP_p6_sse2;
- CPUVEC_SETUP_core2;
- break;
-
- case 0x1c: /* Atom Silverthorne */
- case 0x26: /* Atom Lincroft */
- case 0x27: /* Atom Saltwell */
- case 0x36: /* Atom Cedarview/Saltwell */
- TRACE (printf (" atom\n"));
- CPUVEC_SETUP_atom;
- CPUVEC_SETUP_atom_mmx;
- CPUVEC_SETUP_atom_sse2;
- break;
-
- case 0x1a: /* NHM Gainestown */
- case 0x1b:
- case 0x1e: /* NHM Lynnfield/Jasper */
- case 0x1f:
- case 0x20:
- case 0x21:
- case 0x22:
- case 0x23:
- case 0x24:
- case 0x25: /* WSM Clarkdale/Arrandale */
- case 0x28:
- case 0x29:
- case 0x2b:
- case 0x2c: /* WSM Gulftown */
- case 0x2e: /* NHM Beckton */
- case 0x2f: /* WSM Eagleton */
- TRACE (printf (" nehalem/westmere\n"));
- CPUVEC_SETUP_p6_mmx;
- CPUVEC_SETUP_p6_p3mmx;
- CPUVEC_SETUP_p6_sse2;
- CPUVEC_SETUP_core2;
- CPUVEC_SETUP_coreinhm;
- break;
-
- case 0x2a: /* SBR */
- case 0x2d: /* SBR-EP */
- case 0x3a: /* IBR */
- case 0x3c: /* Haswell */
- TRACE (printf (" sandybridge\n"));
+ if (model >= 2)
+ {
+ TRACE (printf (" pentium2\n"));
CPUVEC_SETUP_p6_mmx;
+ }
+ if (model >= 7)
+ {
+ TRACE (printf (" pentium3\n"));
CPUVEC_SETUP_p6_p3mmx;
+ }
+ if (model >= 0xD || model == 9)
+ {
+ TRACE (printf (" p6 with sse2\n"));
CPUVEC_SETUP_p6_sse2;
- CPUVEC_SETUP_core2;
- CPUVEC_SETUP_coreinhm;
- CPUVEC_SETUP_coreisbr;
- break;
- }
+ }
break;
case 15:
@@ -395,40 +271,13 @@ __gmpn_cpuvec_init (void)
break;
case 6:
TRACE (printf (" athlon\n"));
+ athlon:
CPUVEC_SETUP_k7;
CPUVEC_SETUP_k7_mmx;
break;
-
- case 0x0f: /* k8 */
- case 0x11: /* "fam 11h", mix of k8 and k10 */
- case 0x13: /* unknown, conservatively assume k8 */
- case 0x16: /* unknown, conservatively assume k8 */
- case 0x17: /* unknown, conservatively assume k8 */
- TRACE (printf (" k8\n"));
- CPUVEC_SETUP_k7;
- CPUVEC_SETUP_k7_mmx;
- CPUVEC_SETUP_k8;
- break;
-
- case 0x10: /* k10 */
- case 0x12: /* k10 (llano) */
- TRACE (printf (" k10\n"));
- CPUVEC_SETUP_k7;
- CPUVEC_SETUP_k7_mmx;
- break;
-
- case 0x14: /* bobcat */
- TRACE (printf (" bobcat\n"));
- CPUVEC_SETUP_k7;
- CPUVEC_SETUP_k7_mmx;
- CPUVEC_SETUP_bobcat;
- break;
-
- case 0x15: /* bulldozer */
- TRACE (printf (" bulldozer\n"));
- CPUVEC_SETUP_k7;
- CPUVEC_SETUP_k7_mmx;
- break;
+ case 15:
+ TRACE (printf (" x86_64\n"));
+ goto athlon;
}
}
else if (strcmp (vendor_string, "CentaurHauls") == 0)
@@ -441,11 +290,6 @@ __gmpn_cpuvec_init (void)
{
TRACE (printf (" viac32\n"));
}
- if (model >= 15)
- {
- TRACE (printf (" nano\n"));
- CPUVEC_SETUP_nano;
- }
break;
}
}
@@ -469,5 +313,5 @@ __gmpn_cpuvec_init (void)
/* Set this once the threshold fields are ready.
Use volatile to prevent it getting moved. */
- *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
+ ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
}
diff --git a/gmp/mpn/x86/fat/fat_entry.asm b/gmp/mpn/x86/fat/fat_entry.asm
index 6e3cb44dd5..bd46e4e8bd 100644
--- a/gmp/mpn/x86/fat/fat_entry.asm
+++ b/gmp/mpn/x86/fat/fat_entry.asm
@@ -1,32 +1,21 @@
dnl x86 fat binary entrypoints.
-dnl Copyright 2003, 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2003 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -129,7 +118,7 @@ EPILOGUE()
L(fat_init):
C al __gmpn_cpuvec byte offset
- movzbl %al, %eax
+ movsbl %al, %eax
pushl %eax
ifdef(`PIC',`
diff --git a/gmp/mpn/x86/fat/gcd_1.c b/gmp/mpn/x86/fat/gcd_1.c
index f809bd8092..5bd000618c 100644
--- a/gmp/mpn/x86/fat/gcd_1.c
+++ b/gmp/mpn/x86/fat/gcd_1.c
@@ -5,28 +5,17 @@ Copyright 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "mpn/generic/gcd_1.c"
diff --git a/gmp/mpn/x86/fat/gmp-mparam.h b/gmp/mpn/x86/fat/gmp-mparam.h
index 3641a6bafa..9127d1425f 100644
--- a/gmp/mpn/x86/fat/gmp-mparam.h
+++ b/gmp/mpn/x86/fat/gmp-mparam.h
@@ -1,35 +1,25 @@
/* Fat binary x86 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2003, 2011 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
/* mpn_divexact_1 is faster than mpn_divrem_1 at all sizes. The only time
@@ -44,17 +34,15 @@ see https://www.gnu.org/licenses/. */
preinv. */
#define USE_PREINV_DIVREM_1 1
-#define BMOD_1_TO_MOD_1_THRESHOLD 20
-
/* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
- for mpn_sqr to call the latter. */
+ for mpn_sqr_n to call the latter. */
#define SQR_BASECASE_THRESHOLD 0
/* Sensible fallbacks for these, when not taken from a cpu-specific
gmp-mparam.h. */
-#define MUL_TOOM22_THRESHOLD 20
-#define MUL_TOOM33_THRESHOLD 130
-#define SQR_TOOM2_THRESHOLD 30
+#define MUL_KARATSUBA_THRESHOLD 20
+#define MUL_TOOM3_THRESHOLD 130
+#define SQR_KARATSUBA_THRESHOLD 30
#define SQR_TOOM3_THRESHOLD 200
/* These are values more or less in the middle of what the typical x86 chips
diff --git a/gmp/mpn/x86/fat/lshiftc.c b/gmp/mpn/x86/fat/lshiftc.c
deleted file mode 100644
index 9ecf48978f..0000000000
--- a/gmp/mpn/x86/fat/lshiftc.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_lshiftc.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/lshiftc.c"
diff --git a/gmp/mpn/x86/fat/mod_1.c b/gmp/mpn/x86/fat/mod_1.c
deleted file mode 100644
index 4f149cc353..0000000000
--- a/gmp/mpn/x86/fat/mod_1.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mod_1.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/mod_1.c"
diff --git a/gmp/mpn/x86/fat/mod_1_1.c b/gmp/mpn/x86/fat/mod_1_1.c
deleted file mode 100644
index 92eaa7a87f..0000000000
--- a/gmp/mpn/x86/fat/mod_1_1.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Fat binary fallback mpn_mod_1_1p.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-/*
-PROLOGUE(mpn_mod_1_1p_cps)
-*/
-
-#define OPERATION_mod_1_1_cps 1
-#include "mpn/generic/mod_1_1.c"
diff --git a/gmp/mpn/x86/fat/mod_1_2.c b/gmp/mpn/x86/fat/mod_1_2.c
deleted file mode 100644
index 9095a61c93..0000000000
--- a/gmp/mpn/x86/fat/mod_1_2.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Fat binary fallback mpn_mod_1s_2p.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-/*
-PROLOGUE(mpn_mod_1s_2p_cps)
-*/
-
-#define OPERATION_mod_1_2_cps 1
-#include "mpn/generic/mod_1_2.c"
diff --git a/gmp/mpn/x86/fat/mod_1_4.c b/gmp/mpn/x86/fat/mod_1_4.c
deleted file mode 100644
index 51c0def443..0000000000
--- a/gmp/mpn/x86/fat/mod_1_4.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Fat binary fallback mpn_mod_1s_4p.
-
-Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-/*
-PROLOGUE(mpn_mod_1s_4p_cps)
-*/
-
-#define OPERATION_mod_1_4_cps 1
-#include "mpn/generic/mod_1_4.c"
diff --git a/gmp/mpn/x86/fat/mode1o.c b/gmp/mpn/x86/fat/mode1o.c
index 870ddb899b..a5244cae44 100644
--- a/gmp/mpn/x86/fat/mode1o.c
+++ b/gmp/mpn/x86/fat/mode1o.c
@@ -5,28 +5,17 @@ Copyright 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "mpn/generic/mode1o.c"
diff --git a/gmp/mpn/x86/fat/mullo_basecase.c b/gmp/mpn/x86/fat/mullo_basecase.c
deleted file mode 100644
index 7f86be64c5..0000000000
--- a/gmp/mpn/x86/fat/mullo_basecase.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_mullo_basecase.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/mullo_basecase.c"
diff --git a/gmp/mpn/x86/fat/redc_1.c b/gmp/mpn/x86/fat/redc_1.c
deleted file mode 100644
index 0025403353..0000000000
--- a/gmp/mpn/x86/fat/redc_1.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_redc_1.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/redc_1.c"
diff --git a/gmp/mpn/x86/fat/redc_2.c b/gmp/mpn/x86/fat/redc_2.c
deleted file mode 100644
index 1932d58323..0000000000
--- a/gmp/mpn/x86/fat/redc_2.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Fat binary fallback mpn_redc_2.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/redc_2.c"
diff --git a/gmp/mpn/x86/geode/gmp-mparam.h b/gmp/mpn/x86/geode/gmp-mparam.h
deleted file mode 100644
index cc9c9f1789..0000000000
--- a/gmp/mpn/x86/geode/gmp-mparam.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2002, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* Generated by tuneup.c, 2011-01-30, gcc 3.4 */
-
-#define MOD_1_NORM_THRESHOLD 6
-#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 17
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define USE_PREINV_DIVREM_1 0
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 42
-
-#define MUL_TOOM22_THRESHOLD 18
-#define MUL_TOOM33_THRESHOLD 66
-#define MUL_TOOM44_THRESHOLD 105
-#define MUL_TOOM6H_THRESHOLD 141
-#define MUL_TOOM8H_THRESHOLD 212
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 62
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 67
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 33
-#define SQR_TOOM3_THRESHOLD 60
-#define SQR_TOOM4_THRESHOLD 136
-#define SQR_TOOM6_THRESHOLD 196
-#define SQR_TOOM8_THRESHOLD 292
-
-#define MULMOD_BNM1_THRESHOLD 14
-#define SQRMOD_BNM1_THRESHOLD 16
-
-#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 468, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
- { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
- { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
- { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
- { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
- { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
- { 287,10}, { 159,11}, { 95,10}, { 191, 9}, \
- { 383,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
- { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 61
-#define MUL_FFT_THRESHOLD 5504
-
-#define SQR_FFT_MODF_THRESHOLD 396 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 396, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
- { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
- { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
- { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
- { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
- { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
- { 255, 9}, { 135,10}, { 79, 9}, { 159, 8}, \
- { 319,10}, { 95, 9}, { 191,11}, { 63,10}, \
- { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
- { 287, 8}, { 575,10}, { 159,11}, { 95,10}, \
- { 191,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
- { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 61
-#define SQR_FFT_THRESHOLD 3712
-
-#define MULLO_BASECASE_THRESHOLD 3
-#define MULLO_DC_THRESHOLD 37
-#define MULLO_MUL_N_THRESHOLD 10950
-
-#define DC_DIV_QR_THRESHOLD 59
-#define DC_DIVAPPR_Q_THRESHOLD 189
-#define DC_BDIV_QR_THRESHOLD 55
-#define DC_BDIV_Q_THRESHOLD 136
-
-#define INV_MULMOD_BNM1_THRESHOLD 50
-#define INV_NEWTON_THRESHOLD 183
-#define INV_APPR_THRESHOLD 181
-
-#define BINV_NEWTON_THRESHOLD 204
-#define REDC_1_TO_REDC_N_THRESHOLD 54
-
-#define MU_DIV_QR_THRESHOLD 1142
-#define MU_DIVAPPR_Q_THRESHOLD 1142
-#define MUPI_DIV_QR_THRESHOLD 81
-#define MU_BDIV_QR_THRESHOLD 889
-#define MU_BDIV_Q_THRESHOLD 998
-
-#define MATRIX22_STRASSEN_THRESHOLD 13
-#define HGCD_THRESHOLD 133
-#define GCD_DC_THRESHOLD 451
-#define GCDEXT_DC_THRESHOLD 318
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 30
-#define SET_STR_DC_THRESHOLD 547
-#define SET_STR_PRECOMPUTE_THRESHOLD 1049
diff --git a/gmp/mpn/x86/gmp-mparam.h b/gmp/mpn/x86/gmp-mparam.h
index 2cb1984889..22ee86f7e1 100644
--- a/gmp/mpn/x86/gmp-mparam.h
+++ b/gmp/mpn/x86/gmp-mparam.h
@@ -1,35 +1,24 @@
/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
/* Generic x86 mpn_divexact_1 is faster than generic x86 mpn_divrem_1 on all
diff --git a/gmp/mpn/x86/i486/gmp-mparam.h b/gmp/mpn/x86/i486/gmp-mparam.h
index aa7dbad45b..aaddea9f18 100644
--- a/gmp/mpn/x86/i486/gmp-mparam.h
+++ b/gmp/mpn/x86/i486/gmp-mparam.h
@@ -1,46 +1,35 @@
/* 80486 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 2001-2003 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
/* 100MHz DX4 */
/* Generated by tuneup.c, 2003-02-13, gcc 2.95 */
-#define MUL_TOOM22_THRESHOLD 18
-#define MUL_TOOM33_THRESHOLD 228
+#define MUL_KARATSUBA_THRESHOLD 18
+#define MUL_TOOM3_THRESHOLD 228
#define SQR_BASECASE_THRESHOLD 13
-#define SQR_TOOM2_THRESHOLD 49
+#define SQR_KARATSUBA_THRESHOLD 49
#define SQR_TOOM3_THRESHOLD 238
#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */
diff --git a/gmp/mpn/x86/k10/gmp-mparam.h b/gmp/mpn/x86/k10/gmp-mparam.h
deleted file mode 100644
index 2a1ae5a6bb..0000000000
--- a/gmp/mpn/x86/k10/gmp-mparam.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* x86/k10 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2400 MHz K10 Barcelona */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 12
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 1
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 32
-
-#define MUL_TOOM22_THRESHOLD 24
-#define MUL_TOOM33_THRESHOLD 81
-#define MUL_TOOM44_THRESHOLD 130
-#define MUL_TOOM6H_THRESHOLD 189
-#define MUL_TOOM8H_THRESHOLD 430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 82
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 90
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 38
-#define SQR_TOOM3_THRESHOLD 77
-#define SQR_TOOM4_THRESHOLD 184
-#define SQR_TOOM6_THRESHOLD 262
-#define SQR_TOOM8_THRESHOLD 369
-
-#define MULMID_TOOM42_THRESHOLD 56
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 18
-
-#define MUL_FFT_MODF_THRESHOLD 765 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 765, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \
- { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
- { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 199,10}, \
- { 111,11}, { 63,10}, { 127, 9}, { 263,10}, \
- { 175,11}, { 95,10}, { 207,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 543, 8}, { 1087, 9}, \
- { 575,11}, { 159,10}, { 319, 9}, { 671, 8}, \
- { 1343, 9}, { 735,11}, { 191, 9}, { 799, 8}, \
- { 1599,10}, { 415, 9}, { 863,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 543, 9}, { 1087,10}, \
- { 607, 9}, { 1215, 8}, { 2431,11}, { 319,10}, \
- { 671, 9}, { 1343,10}, { 735,12}, { 191,11}, \
- { 383,10}, { 799, 9}, { 1599,11}, { 415,10}, \
- { 863, 9}, { 1727,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215, 9}, \
- { 2431,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,10}, { 1471, 9}, { 2943, 8}, { 5887,12}, \
- { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \
- { 1727,12}, { 447,11}, { 959,10}, { 1919,11}, \
- { 991,10}, { 1983,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \
- { 2943, 9}, { 5887,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1727,10}, { 3455,12}, \
- { 959,11}, { 1983,14}, { 255,13}, { 511,12}, \
- { 1087,11}, { 2239,12}, { 1215,11}, { 2431,13}, \
- { 639,12}, { 1471,11}, { 2943,10}, { 5887,13}, \
- { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \
- { 1983,14}, { 511,13}, { 1023,12}, { 2239,13}, \
- { 1151,12}, { 2495,13}, { 1407,12}, { 2943,11}, \
- { 5887,14}, { 767,13}, { 1663,12}, { 3455,13}, \
- { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \
- { 2175,12}, { 4351,13}, { 2431,14}, { 1279,13}, \
- { 2943,12}, { 5887,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 172
-#define MUL_FFT_THRESHOLD 6784
-
-#define SQR_FFT_MODF_THRESHOLD 555 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 555, 5}, { 21, 6}, { 11, 5}, { 25, 6}, \
- { 13, 5}, { 27, 6}, { 27, 7}, { 15, 6}, \
- { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
- { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 127,10}, \
- { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \
- { 111,11}, { 63,10}, { 143, 9}, { 287, 8}, \
- { 575,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \
- { 1087,10}, { 287, 9}, { 607,11}, { 159,10}, \
- { 319, 9}, { 671, 8}, { 1343,10}, { 351, 9}, \
- { 735, 8}, { 1471,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399, 9}, { 799, 8}, { 1599,10}, \
- { 415, 9}, { 863,11}, { 223,10}, { 479,12}, \
- { 127,11}, { 255,10}, { 543, 9}, { 1087,11}, \
- { 287,10}, { 607, 9}, { 1215, 8}, { 2431,11}, \
- { 319,10}, { 671, 9}, { 1343,11}, { 351,10}, \
- { 735, 9}, { 1471,12}, { 191,11}, { 383,10}, \
- { 799, 9}, { 1599,11}, { 415,10}, { 863, 9}, \
- { 1727,11}, { 479,13}, { 127,12}, { 255,11}, \
- { 511,10}, { 1023,11}, { 543,10}, { 1087,11}, \
- { 607,10}, { 1215, 9}, { 2431,12}, { 319,11}, \
- { 671,10}, { 1343,11}, { 735,10}, { 1471, 9}, \
- { 2943,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,10}, { 1727,12}, { 447,11}, { 959,10}, \
- { 1919,11}, { 991,10}, { 1983,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 639,11}, { 1343,12}, { 703,11}, { 1471,10}, \
- { 2943,13}, { 383,12}, { 767,11}, { 1599,12}, \
- { 831,11}, { 1727,10}, { 3455,12}, { 959,11}, \
- { 1983,13}, { 511,12}, { 1215,11}, { 2431,13}, \
- { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1727,11}, { 3455,13}, { 895,12}, { 1983,14}, \
- { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
- { 2431,13}, { 1407,12}, { 2943,14}, { 767,13}, \
- { 1663,12}, { 3455,13}, { 1919,12}, { 3839,15}, \
- { 511,14}, { 1023,13}, { 2431,14}, { 1279,13}, \
- { 2943,12}, { 5887,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 172
-#define SQR_FFT_THRESHOLD 5504
-
-#define MULLO_BASECASE_THRESHOLD 7
-#define MULLO_DC_THRESHOLD 40
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 59
-#define DC_DIVAPPR_Q_THRESHOLD 270
-#define DC_BDIV_QR_THRESHOLD 55
-#define DC_BDIV_Q_THRESHOLD 206
-
-#define INV_MULMOD_BNM1_THRESHOLD 62
-#define INV_NEWTON_THRESHOLD 254
-#define INV_APPR_THRESHOLD 252
-
-#define BINV_NEWTON_THRESHOLD 274
-#define REDC_1_TO_REDC_N_THRESHOLD 74
-
-#define MU_DIV_QR_THRESHOLD 1589
-#define MU_DIVAPPR_Q_THRESHOLD 1589
-#define MUPI_DIV_QR_THRESHOLD 106
-#define MU_BDIV_QR_THRESHOLD 1470
-#define MU_BDIV_Q_THRESHOLD 1558
-
-#define POWM_SEC_TABLE 1,16,114,428,1240
-
-#define MATRIX22_STRASSEN_THRESHOLD 19
-#define HGCD_THRESHOLD 136
-#define HGCD_APPR_THRESHOLD 175
-#define HGCD_REDUCE_THRESHOLD 3389
-#define GCD_DC_THRESHOLD 595
-#define GCDEXT_DC_THRESHOLD 424
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 28
-#define SET_STR_DC_THRESHOLD 100
-#define SET_STR_PRECOMPUTE_THRESHOLD 1360
-
-#define FAC_DSC_THRESHOLD 224
-#define FAC_ODD_THRESHOLD 29
diff --git a/gmp/mpn/x86/k6/README b/gmp/mpn/x86/k6/README
index 1d65af3851..f488cbd1d8 100644
--- a/gmp/mpn/x86/k6/README
+++ b/gmp/mpn/x86/k6/README
@@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
diff --git a/gmp/mpn/x86/k6/aors_n.asm b/gmp/mpn/x86/k6/aors_n.asm
index 168f9b4ae4..09afd8f688 100644
--- a/gmp/mpn/x86/k6/aors_n.asm
+++ b/gmp/mpn/x86/k6/aors_n.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_add/sub_n -- mpn addition or subtraction.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/aorsmul_1.asm b/gmp/mpn/x86/k6/aorsmul_1.asm
index eaa92ebb24..c3795e3abb 100644
--- a/gmp/mpn/x86/k6/aorsmul_1.asm
+++ b/gmp/mpn/x86/k6/aorsmul_1.asm
@@ -1,52 +1,42 @@
dnl AMD K6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl Copyright 1999-2003, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5
-C P6 model 0-8,10-12 5.94
-C P6 model 9 (Banias) 5.51
-C P6 model 13 (Dothan) 5.57
+C cycles/limb
+C P5:
+C P6 model 0-8,10-12) 5.94
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan) 5.57
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C AMD K6 7.65-8.5 (data dependent)
-C AMD K7
-C AMD K8
+C K6: 7.65-8.5 (data dependent)
+C K7:
+C K8:
-dnl K6: large multipliers small multipliers
+dnl K6: large multpliers small multpliers
dnl UNROLL_COUNT cycles/limb cycles/limb
dnl 4 9.5 7.78
dnl 8 9.0 7.78
@@ -257,7 +247,7 @@ C registers at the point of doing the mul for the initial two carry limbs.
C
C The add/adc for the initial carry in %esi is necessary only for the
C mpn_addmul/submul_1c entry points. Duplicating the startup code to
-C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
C idea.
dnl overlapping with parameters already fetched
diff --git a/gmp/mpn/x86/k6/cross.pl b/gmp/mpn/x86/k6/cross.pl
index fc921a56b7..cf476d603b 100755
--- a/gmp/mpn/x86/k6/cross.pl
+++ b/gmp/mpn/x86/k6/cross.pl
@@ -2,31 +2,20 @@
# Copyright 2000, 2001 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
# Usage: cross.pl [filename.o]...
diff --git a/gmp/mpn/x86/k6/divrem_1.asm b/gmp/mpn/x86/k6/divrem_1.asm
index b4cea4fa2a..1c86d9bd6c 100644
--- a/gmp/mpn/x86/k6/divrem_1.asm
+++ b/gmp/mpn/x86/k6/divrem_1.asm
@@ -1,32 +1,22 @@
dnl AMD K6 mpn_divrem_1 -- mpn by limb division.
-dnl Copyright 1999-2003, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/gcd_1.asm b/gmp/mpn/x86/k6/gcd_1.asm
index 0c233ff362..58aff08221 100644
--- a/gmp/mpn/x86/k6/gcd_1.asm
+++ b/gmp/mpn/x86/k6/gcd_1.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_gcd_1 -- mpn by 1 gcd.
-dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/gmp-mparam.h b/gmp/mpn/x86/k6/gmp-mparam.h
index f03f1b2d91..c04446a573 100644
--- a/gmp/mpn/x86/k6/gmp-mparam.h
+++ b/gmp/mpn/x86/k6/gmp-mparam.h
@@ -1,166 +1,68 @@
/* AMD K6 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2004, 2009, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2009
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
-or both in parallel, as here.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+/* 450MHz K6-2 */
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+/* Generated by tuneup.c, 2009-01-05, gcc 3.4 */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define MUL_KARATSUBA_THRESHOLD 19
+#define MUL_TOOM3_THRESHOLD 73
+#define MUL_TOOM44_THRESHOLD 104
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD 32
+#define SQR_TOOM3_THRESHOLD 105
+#define SQR_TOOM4_THRESHOLD 143
-/* 450MHz K6-2 */
+#define MULLOW_BASECASE_THRESHOLD 0 /* always */
+#define MULLOW_DC_THRESHOLD 64
+#define MULLOW_MUL_N_THRESHOLD 232
+
+#define DIV_SB_PREINV_THRESHOLD 4
+#define DIV_DC_THRESHOLD 67
+#define POWM_THRESHOLD 110
+
+#define MATRIX22_STRASSEN_THRESHOLD 21
+#define HGCD_THRESHOLD 195
+#define GCD_DC_THRESHOLD 602
+#define GCDEXT_DC_THRESHOLD 662
+#define JACOBI_BASE_METHOD 2
+
+#define USE_PREINV_DIVREM_1 0
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+
+#define GET_STR_DC_THRESHOLD 31
+#define GET_STR_PRECOMPUTE_THRESHOLD 52
+#define SET_STR_DC_THRESHOLD 1127
+#define SET_STR_PRECOMPUTE_THRESHOLD 1795
+
+#define MUL_FFT_TABLE { 336, 672, 1152, 3584, 10240, 24576, 163840, 393216, 0 }
+#define MUL_FFT_MODF_THRESHOLD 352
+#define MUL_FFT_THRESHOLD 7168
-#define MOD_1_NORM_THRESHOLD 12
-#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 41
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 32
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 3
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 128
-#define USE_PREINV_DIVREM_1 0
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-
-#define MUL_TOOM22_THRESHOLD 20
-#define MUL_TOOM33_THRESHOLD 69
-#define MUL_TOOM44_THRESHOLD 106
-#define MUL_TOOM6H_THRESHOLD 157
-#define MUL_TOOM8H_THRESHOLD 199
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 64
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 32
-#define SQR_TOOM3_THRESHOLD 97
-#define SQR_TOOM4_THRESHOLD 143
-#define SQR_TOOM6_THRESHOLD 222
-#define SQR_TOOM8_THRESHOLD 272
-
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 17
-
-#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 476, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 11, 5}, { 23, 6}, { 17, 7}, { 9, 6}, \
- { 19, 7}, { 11, 6}, { 23, 7}, { 13, 6}, \
- { 27, 7}, { 15, 6}, { 31, 7}, { 17, 6}, \
- { 35, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 167,10}, { 95, 9}, { 191,10}, \
- { 111,11}, { 63,10}, { 127, 9}, { 255,10}, \
- { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \
- { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \
- { 287,11}, { 159,10}, { 351,11}, { 191,10}, \
- { 415, 9}, { 831,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 543,11}, { 287,10}, { 575,11}, \
- { 351,10}, { 703,12}, { 191,11}, { 415,10}, \
- { 831,13}, { 127,12}, { 255,11}, { 543,10}, \
- { 1087,11}, { 575,12}, { 319,11}, { 703,12}, \
- { 383,11}, { 831,12}, { 447,11}, { 895,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1151,12}, { 703,13}, { 383,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1215,13}, { 8192,14}, \
- { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 106
-#define MUL_FFT_THRESHOLD 7424
-
-#define SQR_FFT_MODF_THRESHOLD 432 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 432, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 24, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 21, 8}, { 11, 7}, { 29, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
- { 39, 9}, { 23, 7}, { 93, 8}, { 47, 7}, \
- { 95, 8}, { 51,10}, { 15, 9}, { 31, 8}, \
- { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 71, 8}, \
- { 143, 9}, { 79,10}, { 47, 9}, { 95,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 167,10}, { 95, 9}, { 191,11}, { 63,10}, \
- { 127, 9}, { 255,10}, { 143, 9}, { 287, 8}, \
- { 575,10}, { 159, 9}, { 319,11}, { 95,10}, \
- { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
- { 159,10}, { 319, 9}, { 639,10}, { 351, 9}, \
- { 703,11}, { 191,10}, { 415,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 543,11}, { 287,10}, \
- { 607,11}, { 319,10}, { 639,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 415,10}, { 831,13}, \
- { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \
- { 607,12}, { 319,11}, { 703,12}, { 383,11}, \
- { 831,12}, { 447,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,12}, { 703,13}, \
- { 383,12}, { 895,14}, { 255,13}, { 511,12}, \
- { 1215,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 112
-#define SQR_FFT_THRESHOLD 7040
-
-#define MULLO_BASECASE_THRESHOLD 3
-#define MULLO_DC_THRESHOLD 60
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 78
-#define DC_DIVAPPR_Q_THRESHOLD 252
-#define DC_BDIV_QR_THRESHOLD 84
-#define DC_BDIV_Q_THRESHOLD 171
-
-#define INV_MULMOD_BNM1_THRESHOLD 55
-#define INV_NEWTON_THRESHOLD 234
-#define INV_APPR_THRESHOLD 236
-
-#define BINV_NEWTON_THRESHOLD 268
-#define REDC_1_TO_REDC_N_THRESHOLD 67
-
-#define MU_DIV_QR_THRESHOLD 1308
-#define MU_DIVAPPR_Q_THRESHOLD 1142
-#define MUPI_DIV_QR_THRESHOLD 134
-#define MU_BDIV_QR_THRESHOLD 1164
-#define MU_BDIV_Q_THRESHOLD 1164
-
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 182
-#define GCD_DC_THRESHOLD 591
-#define GCDEXT_DC_THRESHOLD 472
-#define JACOBI_BASE_METHOD 2
-
-#define GET_STR_DC_THRESHOLD 24
-#define GET_STR_PRECOMPUTE_THRESHOLD 40
-#define SET_STR_DC_THRESHOLD 834
-#define SET_STR_PRECOMPUTE_THRESHOLD 2042
+#define SQR_FFT_TABLE { 272, 672, 1408, 4608, 10240, 24576, 163840, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD 336
+#define SQR_FFT_THRESHOLD 3840
diff --git a/gmp/mpn/x86/k6/k62mmx/copyd.asm b/gmp/mpn/x86/k6/k62mmx/copyd.asm
index f80a5a1cdb..227ed78783 100644
--- a/gmp/mpn/x86/k6/k62mmx/copyd.asm
+++ b/gmp/mpn/x86/k6/k62mmx/copyd.asm
@@ -1,32 +1,21 @@
dnl AMD K6-2 mpn_copyd -- copy limb vector, decrementing.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/k62mmx/lshift.asm b/gmp/mpn/x86/k6/k62mmx/lshift.asm
index c86575feed..e48e73e19a 100644
--- a/gmp/mpn/x86/k6/k62mmx/lshift.asm
+++ b/gmp/mpn/x86/k6/k62mmx/lshift.asm
@@ -1,32 +1,21 @@
dnl AMD K6-2 mpn_lshift -- mpn left shift.
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/k62mmx/rshift.asm b/gmp/mpn/x86/k6/k62mmx/rshift.asm
index f604a7bd52..b3114d0e6e 100644
--- a/gmp/mpn/x86/k6/k62mmx/rshift.asm
+++ b/gmp/mpn/x86/k6/k62mmx/rshift.asm
@@ -1,32 +1,21 @@
dnl AMD K6-2 mpn_rshift -- mpn right shift.
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/mmx/com.asm b/gmp/mpn/x86/k6/mmx/com_n.asm
index b747454627..42e6ab392a 100644
--- a/gmp/mpn/x86/k6/mmx/com.asm
+++ b/gmp/mpn/x86/k6/mmx/com_n.asm
@@ -1,32 +1,21 @@
-dnl AMD K6-2 mpn_com -- mpn bitwise one's complement.
+dnl AMD K6-2 mpn_com_n -- mpn bitwise one's complement.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -39,7 +28,7 @@ C K6-2 1.0 1.18 1.18 1.18 cycles/limb
C K6 1.5 1.85 1.75 1.85
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C Take the bitwise ones-complement of src,size and write it to dst,size.
@@ -49,7 +38,7 @@ defframe(PARAM_DST, 4)
TEXT
ALIGN(16)
-PROLOGUE(mpn_com)
+PROLOGUE(mpn_com_n)
deflit(`FRAME',0)
movl PARAM_SIZE, %ecx
diff --git a/gmp/mpn/x86/k6/mmx/dive_1.asm b/gmp/mpn/x86/k6/mmx/dive_1.asm
index b644dca8cd..9cc90d88a5 100644
--- a/gmp/mpn/x86/k6/mmx/dive_1.asm
+++ b/gmp/mpn/x86/k6/mmx/dive_1.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_divexact_1 -- mpn by limb exact division.
-dnl Copyright 2000-2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -128,7 +117,7 @@ Zdisp( movzbl, 0,(%eax,%ebp), %eax)
subl %ebp, %eax C inv = 2*inv - inv*inv*d
subl $1, %edx C shift amount, and clear carry
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax FRAME_pushl()
imull PARAM_DIVISOR, %eax
cmpl $1, %eax
diff --git a/gmp/mpn/x86/k6/mmx/logops_n.asm b/gmp/mpn/x86/k6/mmx/logops_n.asm
index e17930bb2d..a6272131a2 100644
--- a/gmp/mpn/x86/k6/mmx/logops_n.asm
+++ b/gmp/mpn/x86/k6/mmx/logops_n.asm
@@ -1,33 +1,22 @@
dnl AMD K6-2 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/mmx/lshift.asm b/gmp/mpn/x86/k6/mmx/lshift.asm
index 45be582633..1492025171 100644
--- a/gmp/mpn/x86/k6/mmx/lshift.asm
+++ b/gmp/mpn/x86/k6/mmx/lshift.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_lshift -- mpn left shift.
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/mmx/popham.asm b/gmp/mpn/x86/k6/mmx/popham.asm
index 2b19d0b5ee..a0a651d39c 100644
--- a/gmp/mpn/x86/k6/mmx/popham.asm
+++ b/gmp/mpn/x86/k6/mmx/popham.asm
@@ -1,33 +1,22 @@
dnl AMD K6-2 mpn_popcount, mpn_hamdist -- mpn bit population count and
dnl hamming distance.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/mmx/rshift.asm b/gmp/mpn/x86/k6/mmx/rshift.asm
index cd0382f322..80cd6fb05a 100644
--- a/gmp/mpn/x86/k6/mmx/rshift.asm
+++ b/gmp/mpn/x86/k6/mmx/rshift.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_rshift -- mpn right shift.
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/mod_34lsub1.asm b/gmp/mpn/x86/k6/mod_34lsub1.asm
index 7e30503e54..a5b7ee1064 100644
--- a/gmp/mpn/x86/k6/mod_34lsub1.asm
+++ b/gmp/mpn/x86/k6/mod_34lsub1.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/mode1o.asm b/gmp/mpn/x86/k6/mode1o.asm
index a13f647b81..f299877911 100644
--- a/gmp/mpn/x86/k6/mode1o.asm
+++ b/gmp/mpn/x86/k6/mode1o.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_modexact_1_odd -- exact division style remainder.
-dnl Copyright 2000-2003, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -114,7 +103,7 @@ Zdisp( movzbl, 0,(%ecx,%edi), %edi) C inv 8 bits
subl %ecx, %edi C inv = 2*inv - inv*inv*d
- ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax
movl %esi, %eax
imull %edi, %eax
diff --git a/gmp/mpn/x86/k6/mul_1.asm b/gmp/mpn/x86/k6/mul_1.asm
index 3ef7ec24fe..e1c468fe34 100644
--- a/gmp/mpn/x86/k6/mul_1.asm
+++ b/gmp/mpn/x86/k6/mul_1.asm
@@ -1,49 +1,38 @@
dnl AMD K6 mpn_mul_1 -- mpn by limb multiply.
dnl Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5
-C P6 model 0-8,10-12 5.5
+C cycles/limb
+C P5:
+C P6 model 0-8,10-12) 5.5
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 4.87
+C P6 model 13 (Dothan) 4.87
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C AMD K6 6.25
-C AMD K7
-C AMD K8
+C K6: 6.25
+C K7:
+C K8:
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/k6/mul_basecase.asm b/gmp/mpn/x86/k6/mul_basecase.asm
index 7030001c3f..dcd4d70082 100644
--- a/gmp/mpn/x86/k6/mul_basecase.asm
+++ b/gmp/mpn/x86/k6/mul_basecase.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_mul_basecase -- multiply two mpn numbers.
-dnl Copyright 1999-2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/pre_mod_1.asm b/gmp/mpn/x86/k6/pre_mod_1.asm
index 34db20d386..3231539bfd 100644
--- a/gmp/mpn/x86/k6/pre_mod_1.asm
+++ b/gmp/mpn/x86/k6/pre_mod_1.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_preinv_mod_1 -- mpn by 1 remainder, with pre-inverted divisor.
dnl Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k6/sqr_basecase.asm b/gmp/mpn/x86/k6/sqr_basecase.asm
index b7ecb5cc8a..3392d38812 100644
--- a/gmp/mpn/x86/k6/sqr_basecase.asm
+++ b/gmp/mpn/x86/k6/sqr_basecase.asm
@@ -1,32 +1,21 @@
dnl AMD K6 mpn_sqr_basecase -- square an mpn number.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -36,35 +25,35 @@ C product (measured on the speed difference between 17 and 33 limbs,
C which is roughly the Karatsuba recursing range).
-dnl SQR_TOOM2_THRESHOLD_MAX is the maximum SQR_TOOM2_THRESHOLD this
+dnl SQR_KARATSUBA_THRESHOLD_MAX is the maximum SQR_KARATSUBA_THRESHOLD this
dnl code supports. This value is used only by the tune program to know
dnl what it can go up to. (An attempt to compile with a bigger value will
dnl trigger some m4_assert()s in the code, making the build fail.)
dnl
dnl The value is determined by requiring the displacements in the unrolled
dnl addmul to fit in single bytes. This means a maximum UNROLL_COUNT of
-dnl 63, giving a maximum SQR_TOOM2_THRESHOLD of 66.
+dnl 63, giving a maximum SQR_KARATSUBA_THRESHOLD of 66.
-deflit(SQR_TOOM2_THRESHOLD_MAX, 66)
+deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66)
dnl Allow a value from the tune program to override config.m4.
-ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
-`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
+`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
dnl UNROLL_COUNT is the number of code chunks in the unrolled addmul. The
-dnl number required is determined by SQR_TOOM2_THRESHOLD, since
-dnl mpn_sqr_basecase only needs to handle sizes < SQR_TOOM2_THRESHOLD.
+dnl number required is determined by SQR_KARATSUBA_THRESHOLD, since
+dnl mpn_sqr_basecase only needs to handle sizes < SQR_KARATSUBA_THRESHOLD.
dnl
dnl The first addmul is the biggest, and this takes the second least
dnl significant limb and multiplies it by the third least significant and
-dnl up. Hence for a maximum operand size of SQR_TOOM2_THRESHOLD-1
-dnl limbs, UNROLL_COUNT needs to be SQR_TOOM2_THRESHOLD-3.
+dnl up. Hence for a maximum operand size of SQR_KARATSUBA_THRESHOLD-1
+dnl limbs, UNROLL_COUNT needs to be SQR_KARATSUBA_THRESHOLD-3.
-m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
-deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/gmp/mpn/x86/k7/README b/gmp/mpn/x86/k7/README
index 5711b612c5..e2c5e0c18d 100644
--- a/gmp/mpn/x86/k7/README
+++ b/gmp/mpn/x86/k7/README
@@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
diff --git a/gmp/mpn/x86/k7/addlsh1_n.asm b/gmp/mpn/x86/k7/addlsh1_n.asm
deleted file mode 100644
index a957b6f78e..0000000000
--- a/gmp/mpn/x86/k7/addlsh1_n.asm
+++ /dev/null
@@ -1,196 +0,0 @@
-dnl AMD K7 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This is an attempt at an addlsh1_n for x86-32, not relying on sse2 insns.
-C The innerloop is 2*3-way unrolled, which is best we can do with the available
-C registers. It seems tricky to use the same structure for rsblsh1_n, since we
-C cannot feed carry between operations there.
-
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 5.4 (worse than add_n + lshift)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 6
-C AMD K6 ?
-C AMD K7 2.5
-C AMD K8
-
-C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
-C processors. It uses 2*3-way unrolling, for good reasons. Unfortunately,
-C that means we need an initial magic multiply.
-C
-C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern. We
-C cannot do rsblsh1_n since we feed carry from the shift blocks to the
-C add/subtract blocks, which is right for addition but reversed for
-C subtraction. We could perhaps do sublsh1_n, with some extra move insns,
-C without losing any time, since we're not issue limited but carry recurrency
-C latency.
-C
-C Breaking carry recurrency might be a good idea. We would then need separate
-C registers for the shift carry and add/subtract carry, which in turn would
-C force is to 2*2-way unrolling.
-
-defframe(PARAM_SIZE, 16)
-defframe(PARAM_DBLD, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(VAR_COUNT,`PARAM_DST')
-define(VAR_TMP,`PARAM_DBLD')
-
-ASM_START()
- TEXT
- ALIGN(8)
-PROLOGUE(mpn_addlsh1_n)
-deflit(`FRAME',0)
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`vp', `%ebp')
-
- mov $0x2aaaaaab, %eax
-
- push %ebx FRAME_pushl()
- mov PARAM_SIZE, %ebx C size
-
- push rp FRAME_pushl()
- mov PARAM_DST, rp
-
- mul %ebx
-
- push up FRAME_pushl()
- mov PARAM_SRC, up
-
- not %edx C count = -(size\8)-1
- mov %edx, VAR_COUNT
-
- push vp FRAME_pushl()
- mov PARAM_DBLD, vp
-
- lea 3(%edx,%edx,2), %ecx C count*3+3 = -(size\6)*3
- xor %edx, %edx
- lea (%ebx,%ecx,2), %ebx C size + (count*3+3)*2 = size % 6
- or %ebx, %ebx
- jz L(exact)
-
-L(oop):
-ifdef(`CPU_P6',`
- shr %edx ') C restore 2nd saved carry bit
- mov (vp), %eax
- adc %eax, %eax
- rcr %edx C restore 1st saved carry bit
- lea 4(vp), vp
- adc (up), %eax
- lea 4(up), up
- adc %edx, %edx C save a carry bit in edx
-ifdef(`CPU_P6',`
- adc %edx, %edx ') C save another carry bit in edx
- dec %ebx
- mov %eax, (rp)
- lea 4(rp), rp
- jnz L(oop)
- mov vp, VAR_TMP
-L(exact):
- incl VAR_COUNT
- jz L(end)
-
- ALIGN(16)
-L(top):
-ifdef(`CPU_P6',`
- shr %edx ') C restore 2nd saved carry bit
- mov (vp), %eax
- adc %eax, %eax
- mov 4(vp), %ebx
- adc %ebx, %ebx
- mov 8(vp), %ecx
- adc %ecx, %ecx
-
- rcr %edx C restore 1st saved carry bit
-
- adc (up), %eax
- mov %eax, (rp)
- adc 4(up), %ebx
- mov %ebx, 4(rp)
- adc 8(up), %ecx
- mov %ecx, 8(rp)
-
- mov 12(vp), %eax
- adc %eax, %eax
- mov 16(vp), %ebx
- adc %ebx, %ebx
- mov 20(vp), %ecx
- adc %ecx, %ecx
-
- lea 24(vp), vp
- adc %edx, %edx C save a carry bit in edx
-
- adc 12(up), %eax
- mov %eax, 12(rp)
- adc 16(up), %ebx
- mov %ebx, 16(rp)
- adc 20(up), %ecx
-
- lea 24(up), up
-
-ifdef(`CPU_P6',`
- adc %edx, %edx ') C save another carry bit in edx
- mov %ecx, 20(rp)
- incl VAR_COUNT
- lea 24(rp), rp
- jne L(top)
-
-L(end):
- pop vp FRAME_popl()
- pop up FRAME_popl()
-
-ifdef(`CPU_P6',`
- xor %eax, %eax
- shr $1, %edx
- adc %edx, %eax
-',`
- adc $0, %edx
- mov %edx, %eax
-')
- pop rp FRAME_popl()
- pop %ebx FRAME_popl()
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/k7/aors_n.asm b/gmp/mpn/x86/k7/aors_n.asm
index 1a08072029..d84de3ee98 100644
--- a/gmp/mpn/x86/k7/aors_n.asm
+++ b/gmp/mpn/x86/k7/aors_n.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_add_n/mpn_sub_n -- mpn add or subtract.
-dnl Copyright 1999-2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/aorsmul_1.asm b/gmp/mpn/x86/k7/aorsmul_1.asm
index eec8df6de2..b247c29131 100644
--- a/gmp/mpn/x86/k7/aorsmul_1.asm
+++ b/gmp/mpn/x86/k7/aorsmul_1.asm
@@ -1,49 +1,39 @@
dnl AMD K7 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias) 6.5
+C cycles/limb
+C P5:
+C P6 model 0-8,10-12)
+C P6 model 9 (Banias)
C P6 model 13 (Dothan)
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C AMD K6
-C AMD K7 3.75
-C AMD K8
+C K6:
+C K7: 3.75
+C K8:
C TODO
C * Improve feed-in and wind-down code. We beat the old code for all n != 1,
diff --git a/gmp/mpn/x86/k7/bdiv_q_1.asm b/gmp/mpn/x86/k7/bdiv_q_1.asm
deleted file mode 100644
index df3477f539..0000000000
--- a/gmp/mpn/x86/k7/bdiv_q_1.asm
+++ /dev/null
@@ -1,244 +0,0 @@
-dnl AMD K7 mpn_bdiv_q_1 -- mpn by limb exact division.
-
-dnl Rearranged from mpn/x86/k7/dive_1.asm by Marco Bodrato.
-
-dnl Copyright 2001, 2002, 2004, 2007, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb
-C Athlon: 11.0
-C Hammer: 9.0
-
-
-C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C
-C The dependent chain is mul+imul+sub for 11 cycles and that speed is
-C achieved with no special effort. The load and shrld latencies are hidden
-C by out of order execution.
-C
-C It's a touch faster on size==1 to use the mul-by-inverse than divl.
-
-defframe(PARAM_SHIFT, 24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-defframe(SAVE_EBX, -4)
-defframe(SAVE_ESI, -8)
-defframe(SAVE_EDI, -12)
-defframe(SAVE_EBP, -16)
-defframe(VAR_INVERSE, -20)
-defframe(VAR_DST_END, -24)
-
-deflit(STACK_SPACE, 24)
-
- TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C mp_limb_t inverse, int shift)
- ALIGN(16)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
- subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
- movl PARAM_SHIFT, %ecx C shift count
-
- movl %ebp, SAVE_EBP
- movl PARAM_SIZE, %ebp
-
- movl %esi, SAVE_ESI
- movl PARAM_SRC, %esi
-
- movl %edi, SAVE_EDI
- movl PARAM_DST, %edi
-
- movl %ebx, SAVE_EBX
-
- leal (%esi,%ebp,4), %esi C src end
- leal (%edi,%ebp,4), %edi C dst end
- negl %ebp C -size
-
- movl PARAM_INVERSE, %eax C inv
-
-L(common):
- movl %eax, VAR_INVERSE
- movl (%esi,%ebp,4), %eax C src[0]
-
- incl %ebp
- jz L(one)
-
- movl (%esi,%ebp,4), %edx C src[1]
-
- shrdl( %cl, %edx, %eax)
-
- movl %edi, VAR_DST_END
- xorl %ebx, %ebx
- jmp L(entry)
-
- ALIGN(8)
-L(top):
- C eax q
- C ebx carry bit, 0 or 1
- C ecx shift
- C edx
- C esi src end
- C edi dst end
- C ebp counter, limbs, negative
-
- mull PARAM_DIVISOR C carry limb in edx
-
- movl -4(%esi,%ebp,4), %eax
- movl (%esi,%ebp,4), %edi
-
- shrdl( %cl, %edi, %eax)
-
- subl %ebx, %eax C apply carry bit
- setc %bl
- movl VAR_DST_END, %edi
-
- subl %edx, %eax C apply carry limb
- adcl $0, %ebx
-
-L(entry):
- imull VAR_INVERSE, %eax
-
- movl %eax, -4(%edi,%ebp,4)
- incl %ebp
- jnz L(top)
-
-
- mull PARAM_DIVISOR C carry limb in edx
-
- movl -4(%esi), %eax C src high limb
- shrl %cl, %eax
- movl SAVE_ESI, %esi
-
- subl %ebx, %eax C apply carry bit
- movl SAVE_EBX, %ebx
- movl SAVE_EBP, %ebp
-
- subl %edx, %eax C apply carry limb
-
- imull VAR_INVERSE, %eax
-
- movl %eax, -4(%edi)
- movl SAVE_EDI, %edi
- addl $STACK_SPACE, %esp
-
- ret
-
-L(one):
- shrl %cl, %eax
- movl SAVE_ESI, %esi
- movl SAVE_EBX, %ebx
-
- imull VAR_INVERSE, %eax
-
- movl SAVE_EBP, %ebp
-
- movl %eax, -4(%edi)
- movl SAVE_EDI, %edi
- addl $STACK_SPACE, %esp
-
- ret
-EPILOGUE()
-
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C
-
- ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_DIVISOR, %eax
- subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
- movl $-1, %ecx C shift count
-
- movl %ebp, SAVE_EBP
- movl PARAM_SIZE, %ebp
-
- movl %esi, SAVE_ESI
- movl %edi, SAVE_EDI
-
- C If there's usually only one or two trailing zero bits then this
- C should be faster than bsfl.
-L(strip_twos):
- incl %ecx
- shrl %eax
- jnc L(strip_twos)
-
- movl %ebx, SAVE_EBX
- leal 1(%eax,%eax), %ebx C d without twos
- andl $127, %eax C d/2, 7 bits
-
-ifdef(`PIC',`
- LEA( binvert_limb_table, %edx)
- movzbl (%eax,%edx), %eax C inv 8 bits
-',`
- movzbl binvert_limb_table(%eax), %eax C inv 8 bits
-')
-
- leal (%eax,%eax), %edx C 2*inv
- movl %ebx, PARAM_DIVISOR C d without twos
-
- imull %eax, %eax C inv*inv
-
- movl PARAM_SRC, %esi
- movl PARAM_DST, %edi
-
- imull %ebx, %eax C inv*inv*d
-
- subl %eax, %edx C inv = 2*inv - inv*inv*d
- leal (%edx,%edx), %eax C 2*inv
-
- imull %edx, %edx C inv*inv
-
- leal (%esi,%ebp,4), %esi C src end
- leal (%edi,%ebp,4), %edi C dst end
- negl %ebp C -size
-
- imull %ebx, %edx C inv*inv*d
-
- subl %edx, %eax C inv = 2*inv - inv*inv*d
-
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
- pushl %eax FRAME_pushl()
- imull PARAM_DIVISOR, %eax
- cmpl $1, %eax
- popl %eax FRAME_popl()')
-
- jmp L(common)
-EPILOGUE()
diff --git a/gmp/mpn/x86/k7/dive_1.asm b/gmp/mpn/x86/k7/dive_1.asm
index 8eb4f45ac0..c994e0fb06 100644
--- a/gmp/mpn/x86/k7/dive_1.asm
+++ b/gmp/mpn/x86/k7/dive_1.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_divexact_1 -- mpn by limb exact division.
dnl Copyright 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -116,7 +105,7 @@ ifdef(`PIC',`
subl %edx, %eax C inv = 2*inv - inv*inv*d
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax FRAME_pushl()
imull PARAM_DIVISOR, %eax
cmpl $1, %eax
diff --git a/gmp/mpn/x86/k7/gcd_1.asm b/gmp/mpn/x86/k7/gcd_1.asm
index c7d12c83c0..f912f43730 100644
--- a/gmp/mpn/x86/k7/gcd_1.asm
+++ b/gmp/mpn/x86/k7/gcd_1.asm
@@ -1,186 +1,369 @@
-dnl x86 mpn_gcd_1 optimised for AMD K7.
+dnl AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
-dnl Contributed to the GNU project by by Kevin Ryde. Rehacked by Torbjorn
-dnl Granlund.
-
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/bit (approx)
-C AMD K7 5.31
-C AMD K8,K9 5.33
-C AMD K10 5.30
-C AMD bd1 ?
-C AMD bobcat 7.02
-C Intel P4-2 10.1
-C Intel P4-3/4 10.0
-C Intel P6/13 5.88
-C Intel core2 6.26
-C Intel NHM 6.83
-C Intel SBR 8.50
-C Intel atom 8.90
-C VIA nano ?
-C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
-
-C TODO
-C * Tune overhead, this takes 2-3 cycles more than old code when v0 is tiny.
-C * Stream things better through registers, avoiding some copying.
-
-C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+C K7: 6.75 cycles/bit (approx) 1x1 gcd
+C 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
+
+
+dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
+dnl where x is the larger of the two. See tune/README for more.
+dnl
+dnl divl at 40 cycles compared to the gcd at about 7 cycles/bitpair
+dnl suggests 40/7*2=11.4 but 7 seems to be about right.
+
+deflit(DIV_THRESHOLD, 7)
+
+C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+C
+C This is mixed in with the code, but as per the k7 optimization manual it's
+C a full cache line and suitably aligned so it won't get swapped between
+C code and data. Having it in TEXT rather than RODATA saves needing a GOT
+C entry when PIC.
+C
+C Actually, there doesn't seem to be a measurable difference between this in
+C it's own cache line or plonked in the middle of the code. Presumably
+C since TEXT is read-only there's no worries about coherency.
+
+deflit(MASK, 63)
deflit(MAXSHIFT, 6)
-deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
-DEF_OBJECT(ctz_table,64)
+ TEXT
+ ALIGN(64)
+L(table):
.byte MAXSHIFT
forloop(i,1,MASK,
` .byte m4_count_trailing_zeros(i)
')
-END_OBJECT(ctz_table)
-C Threshold of when to call bmod when U is one limb. Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`DIV_THRES_LOG2', 7)
+C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb);
+C
+
+defframe(PARAM_LIMB, 12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
-define(`up', `%edi')
-define(`n', `%esi')
-define(`v0', `%edx')
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+defframe(CALL_DIVISOR,-20)
+defframe(CALL_SIZE, -24)
+defframe(CALL_SRC, -28)
+deflit(STACK_SPACE, 28)
-ASM_START()
TEXT
ALIGN(16)
+
PROLOGUE(mpn_gcd_1)
- push %edi
- push %esi
+deflit(`FRAME',0)
+
+ ASSERT(ne, `cmpl $0, PARAM_LIMB') C y!=0
+ ASSERT(ae, `cmpl $1, PARAM_SIZE') C size>=1
+
+ movl PARAM_SRC, %eax
+ movl PARAM_LIMB, %edx
+ subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
- mov 12(%esp), up
- mov 16(%esp), n
- mov 20(%esp), v0
+ movl %esi, SAVE_ESI
+ movl %ebx, SAVE_EBX
- mov (up), %eax C U low limb
- or v0, %eax C x | y
- mov $-1, %ecx
+ movl (%eax), %esi C src low limb
+
+ifdef(`PIC',`
+ movl %edi, SAVE_EDI
+ call L(movl_eip_to_edi)
+L(here):
+ addl $L(table)-L(here), %edi
+')
+
+ movl %esi, %ebx
+ orl %edx, %esi C x|y
+ movl $-1, %ecx
L(twos):
- inc %ecx
- shr %eax
- jnc L(twos)
+ incl %ecx
+ shrl %esi
+ jnc L(twos) C 3/4 chance of x or y odd already
- shr %cl, v0
- mov %ecx, %eax C common twos
+ shrl %cl, %ebx
+ shrl %cl, %edx
+ movl %ecx, %esi C common twos
-L(divide_strip_y):
- shr v0
- jnc L(divide_strip_y)
- adc v0, v0
-
- push %eax
- push v0
-
- cmp $1, n
- jnz L(reduce_nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
- mov (up), %ecx
- mov %ecx, %eax
- shr $DIV_THRES_LOG2, %ecx
- cmp %ecx, v0
- ja L(reduced)
-
- mov v0, %esi
- xor %edx, %edx
- div %esi
- mov %edx, %eax
- jmp L(reduced)
-
-L(reduce_nby1):
-ifdef(`PIC_WITH_EBX',`
- push %ebx
- call L(movl_eip_to_ebx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
+ movl PARAM_SIZE, %ecx
+ cmpl $1, %ecx
+ ja L(divide)
+
+
+ C eax
+ C ebx x
+ C ecx
+ C edx y
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp
+
+ movl %edx, %eax
+ cmpl %ebx, %edx
+
+ cmovb( %ebx, %eax) C swap to make x bigger than y
+ cmovb( %edx, %ebx)
+
+
+L(strip_y):
+ C eax x
+ C ebx y
+ C ecx
+ C edx
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp
+
+ ASSERT(nz,`orl %ebx,%ebx')
+ shrl %ebx
+ jnc L(strip_y)
+ rcll %ebx
+
+
+ C eax x
+ C ebx y (odd)
+ C ecx
+ C edx
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp
+
+ movl %eax, %ecx
+ movl %ebx, %edx
+ shrl $DIV_THRESHOLD, %eax
+
+ cmpl %eax, %ebx
+ movl %ecx, %eax
+ ja L(strip_x_entry) C do x%y if x much bigger than y
+
+
+ xorl %edx, %edx
+
+ divl %ebx
+
+ orl %edx, %edx
+ movl %edx, %eax C remainder -> x
+ movl %ebx, %edx C y
+
+ jz L(done_ebx)
+ jmp L(strip_x)
+
+
+ C Offset 0x9D here for non-PIC. About 0.4 cycles/bit is saved by
+ C ensuring the end of the jnz at the end of this loop doesn't cross
+ C into the next cache line at 0xC0.
+ C
+ C PIC on the other hand is offset 0xAC here and extends to 0xC9, so
+ C it crosses but doesn't suffer any measurable slowdown.
+
+L(top):
+ C eax x
+ C ebx y-x
+ C ecx x-y
+ C edx y
+ C esi twos, for use at end
+ C edi [PIC] L(table)
+
+ cmovc( %ebx, %ecx) C if x-y gave carry, use x and y-x
+ cmovc( %eax, %edx)
+
+L(strip_x):
+ movl %ecx, %eax
+L(strip_x_entry):
+ andl $MASK, %ecx
+
+ ASSERT(nz, `orl %eax, %eax')
+
+ifdef(`PIC',`
+ movb (%ecx,%edi), %cl
+',`
+ movb L(table) (%ecx), %cl
')
- push v0 C param 3
- push n C param 2
- push up C param 1
- cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
- jl L(bmod)
- CALL( mpn_mod_1)
- jmp L(called)
-L(bmod):
- CALL( mpn_modexact_1_odd)
-
-L(called):
- add $12, %esp C deallocate params
-ifdef(`PIC_WITH_EBX',`
- pop %ebx
+
+ shrl %cl, %eax
+ cmpb $MAXSHIFT, %cl
+
+ movl %eax, %ecx
+ movl %edx, %ebx
+ je L(strip_x)
+
+ ASSERT(nz, `testl $1, %eax') C both odd
+ ASSERT(nz, `testl $1, %edx')
+
+ subl %eax, %ebx
+ subl %edx, %ecx
+ jnz L(top)
+
+
+L(done):
+ movl %esi, %ecx
+ movl SAVE_ESI, %esi
+ifdef(`PIC',`
+ movl SAVE_EDI, %edi
')
-L(reduced):
- pop %edx
-
- LEA( ctz_table, %esi)
- test %eax, %eax
- mov %eax, %ecx
- jnz L(mid)
- jmp L(end)
-
- ALIGN(16) C K8 BC P4 NHM SBR
-L(top): cmovc( %ecx, %eax) C if x-y < 0 0
- cmovc( %edi, %edx) C use x,y-x 0
-L(mid): and $MASK, %ecx C 0
- movzbl (%esi,%ecx), %ecx C 1
- jz L(shift_alot) C 1
- shr %cl, %eax C 3
- mov %eax, %edi C 4
- mov %edx, %ecx C 3
- sub %eax, %ecx C 4
- sub %edx, %eax C 4
- jnz L(top) C 5
-
-L(end): pop %ecx
- mov %edx, %eax
- shl %cl, %eax
- pop %esi
- pop %edi
- ret
-L(shift_alot):
- shr $MAXSHIFT, %eax
- mov %eax, %ecx
- jmp L(mid)
+ shll %cl, %eax
+ movl SAVE_EBX, %ebx
+ addl $FRAME, %esp
-ifdef(`PIC_WITH_EBX',`
-L(movl_eip_to_ebx):
- mov (%esp), %ebx
ret
+
+
+
+C -----------------------------------------------------------------------------
+C two or more limbs
+
+dnl MODEXACT_THRESHOLD is the size at which it's better to call
+dnl mpn_modexact_1_odd than do an inline loop.
+
+deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5))
+
+L(divide):
+ C eax src
+ C ebx
+ C ecx size
+ C edx y
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp
+
+L(divide_strip_y):
+ ASSERT(nz,`orl %edx,%edx')
+ shrl %edx
+ jnc L(divide_strip_y)
+ leal 1(%edx,%edx), %ebx C y now odd
+
+ movl %ebp, SAVE_EBP
+ movl %eax, %ebp
+ movl -4(%eax,%ecx,4), %eax C src high limb
+
+ cmp $MODEXACT_THRESHOLD, %ecx
+ jae L(modexact)
+
+ cmpl %ebx, %eax C high cmp divisor
+ movl $0, %edx
+
+ cmovc( %eax, %edx) C skip a div if high<divisor
+ sbbl $0, %ecx
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx y
+ C ecx counter (size to 1, inclusive)
+ C edx carry (remainder)
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp src
+
+ movl -4(%ebp,%ecx,4), %eax
+
+ divl %ebx
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+ C eax
+ C ebx y (odd)
+ C ecx
+ C edx x
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp
+
+ orl %edx, %edx
+ movl SAVE_EBP, %ebp
+ movl %edx, %eax
+
+ movl %edx, %ecx
+ movl %ebx, %edx
+ jnz L(strip_x_entry)
+
+
+L(done_ebx):
+ movl %ebx, %eax
+ jmp L(done)
+
+
+
+L(modexact):
+ C eax
+ C ebx y
+ C ecx size
+ C edx
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp src
+
+ifdef(`PIC',`
+ movl %ebp, CALL_SRC
+ movl %ebx, %ebp C y
+ movl %edi, %ebx C L(table)
+
+ addl $_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx
+ movl %ebp, CALL_DIVISOR
+ movl %ecx, CALL_SIZE
+
+ call GSYM_PREFIX`'mpn_modexact_1_odd@PLT
+',`
+dnl non-PIC
+ movl %ebx, CALL_DIVISOR
+ movl %ebp, CALL_SRC
+ movl %ecx, CALL_SIZE
+
+ call GSYM_PREFIX`'mpn_modexact_1_odd
')
+
+ C eax x
+ C ebx [non-PIC] y
+ C ecx
+ C edx
+ C esi common twos
+ C edi [PIC] L(table)
+ C ebp [PIC] y
+
+ orl %eax, %eax
+ movl ifdef(`PIC',`%ebp',`%ebx'), %edx
+ movl SAVE_EBP, %ebp
+
+ movl %eax, %ecx
+ jnz L(strip_x_entry)
+
+ movl %edx, %eax
+ jmp L(done)
+
+
+ifdef(`PIC', `
+L(movl_eip_to_edi):
+ movl (%esp), %edi
+ ret_internal
+')
+
EPILOGUE()
diff --git a/gmp/mpn/x86/k7/gmp-mparam.h b/gmp/mpn/x86/k7/gmp-mparam.h
index 9977a113e2..ced0c020f7 100644
--- a/gmp/mpn/x86/k7/gmp-mparam.h
+++ b/gmp/mpn/x86/k7/gmp-mparam.h
@@ -1,241 +1,73 @@
/* AMD K7 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2005, 2008-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2008 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
-or both in parallel, as here.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2083 MHz K7 Barton */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-13, gcc 4.2 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 24
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 3
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 24
-
-#define MUL_TOOM22_THRESHOLD 28
-#define MUL_TOOM33_THRESHOLD 85
-#define MUL_TOOM44_THRESHOLD 147
-#define MUL_TOOM6H_THRESHOLD 216
-#define MUL_TOOM8H_THRESHOLD 309
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 85
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 98
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 124
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 50
-#define SQR_TOOM3_THRESHOLD 81
-#define SQR_TOOM4_THRESHOLD 216
-#define SQR_TOOM6_THRESHOLD 306
-#define SQR_TOOM8_THRESHOLD 446
-
-#define MULMID_TOOM42_THRESHOLD 56
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 17
-
-#define MUL_FFT_MODF_THRESHOLD 904 /* k = 6 */
-#define MUL_FFT_TABLE3 \
- { { 904, 6}, { 21, 7}, { 11, 6}, { 25, 7}, \
- { 13, 6}, { 27, 7}, { 15, 6}, { 31, 7}, \
- { 17, 6}, { 35, 7}, { 19, 6}, { 39, 7}, \
- { 23, 6}, { 47, 7}, { 27, 8}, { 15, 7}, \
- { 31, 6}, { 63, 7}, { 35, 8}, { 19, 7}, \
- { 39, 8}, { 23, 7}, { 47, 8}, { 31, 7}, \
- { 63, 8}, { 39, 7}, { 79, 9}, { 23, 8}, \
- { 47, 7}, { 95, 8}, { 51, 9}, { 31, 8}, \
- { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \
- { 127, 9}, { 71, 8}, { 143, 9}, { 79, 8}, \
- { 159,10}, { 47, 9}, { 95, 8}, { 191, 9}, \
- { 103,11}, { 31,10}, { 63, 9}, { 127, 8}, \
- { 255, 9}, { 143,10}, { 79, 9}, { 167,10}, \
- { 95, 9}, { 199,10}, { 111,11}, { 63,10}, \
- { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \
- { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
- { 383,10}, { 207,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511,10}, { 271, 8}, { 1087,10}, \
- { 287,11}, { 159,10}, { 319, 9}, { 639,11}, \
- { 191,10}, { 383, 9}, { 767, 8}, { 1535, 9}, \
- { 799, 8}, { 1599,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 511, 9}, { 1023,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 575, 9}, { 1151,10}, \
- { 607, 9}, { 1215, 8}, { 2431,11}, { 319,10}, \
- { 639, 9}, { 1279,10}, { 671, 9}, { 1343,12}, \
- { 191,11}, { 383,10}, { 767, 9}, { 1535,10}, \
- { 799, 9}, { 1599,10}, { 831, 9}, { 1663,10}, \
- { 863,13}, { 127,12}, { 255,11}, { 511,10}, \
- { 1023,11}, { 543,10}, { 1087,11}, { 575,10}, \
- { 1151,11}, { 607,10}, { 1215, 9}, { 2431,12}, \
- { 319,11}, { 639,10}, { 1407,11}, { 735,10}, \
- { 1471, 9}, { 2943,12}, { 383,11}, { 767,10}, \
- { 1535,11}, { 799,10}, { 1599,11}, { 831,10}, \
- { 1663,11}, { 895,10}, { 1791,11}, { 959,10}, \
- { 1919,13}, { 255,12}, { 511,11}, { 1023,10}, \
- { 2047,11}, { 1087,12}, { 575,11}, { 1151,10}, \
- { 2303,11}, { 1215,10}, { 2431,12}, { 639,11}, \
- { 1279,10}, { 2559,11}, { 1407,10}, { 2815,11}, \
- { 1471,10}, { 2943,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1663,12}, { 895,11}, \
- { 1791,10}, { 3583,12}, { 959,11}, { 1919,10}, \
- { 3839,14}, { 255,13}, { 511,12}, { 1023,11}, \
- { 2047,12}, { 1087,11}, { 2175,12}, { 1151,11}, \
- { 2303,12}, { 1215,11}, { 2431,13}, { 639,12}, \
- { 1407,11}, { 2815,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1663,11}, { 3327,13}, { 895,12}, \
- { 1791,11}, { 3583,12}, { 1919,11}, { 3839,12}, \
- { 1983,11}, { 3967,14}, { 511,13}, { 1023,12}, \
- { 2239,13}, { 1151,12}, { 2495,13}, { 1279,12}, \
- { 2559,13}, { 1407,12}, { 2943,11}, { 5887,14}, \
- { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \
- { 3327,13}, { 1791,12}, { 3583,13}, { 1919,12}, \
- { 3967,15}, { 511,14}, { 1023,13}, { 2047,12}, \
- { 4095,13}, { 2175,12}, { 4351,13}, { 2431,12}, \
- { 4863,14}, { 1279,13}, { 2559,12}, { 5119,13}, \
- { 2943,12}, { 5887,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 228
-#define MUL_FFT_THRESHOLD 7808
-
-#define SQR_FFT_MODF_THRESHOLD 888 /* k = 6 */
-#define SQR_FFT_TABLE3 \
- { { 888, 6}, { 21, 7}, { 11, 6}, { 25, 7}, \
- { 13, 6}, { 27, 7}, { 15, 6}, { 31, 7}, \
- { 17, 6}, { 35, 7}, { 19, 6}, { 39, 7}, \
- { 23, 6}, { 47, 7}, { 27, 8}, { 15, 7}, \
- { 31, 6}, { 63, 7}, { 35, 8}, { 19, 7}, \
- { 39, 8}, { 23, 7}, { 47, 8}, { 31, 7}, \
- { 63, 8}, { 39, 9}, { 23, 8}, { 47, 7}, \
- { 95, 8}, { 51, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
- { 79,10}, { 47, 9}, { 95, 8}, { 191,11}, \
- { 31,10}, { 63, 9}, { 127, 8}, { 255, 9}, \
- { 143,10}, { 79, 9}, { 167,10}, { 95, 9}, \
- { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \
- { 255, 8}, { 511,10}, { 143, 9}, { 287, 8}, \
- { 575,10}, { 159,11}, { 95,10}, { 191, 9}, \
- { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \
- { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
- { 639, 8}, { 1279, 9}, { 671,11}, { 191,10}, \
- { 383, 9}, { 799, 8}, { 1599, 9}, { 831,11}, \
- { 223,12}, { 127,11}, { 255,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 575, 9}, { 1215, 8}, \
- { 2431,11}, { 319,10}, { 639, 9}, { 1279,10}, \
- { 671, 9}, { 1407,12}, { 191,10}, { 799, 9}, \
- { 1599,10}, { 831, 9}, { 1663,10}, { 863, 9}, \
- { 1727,11}, { 447,13}, { 127,12}, { 255,11}, \
- { 511,10}, { 1023,11}, { 543,10}, { 1087, 9}, \
- { 2175,10}, { 1119,11}, { 575,10}, { 1151,11}, \
- { 607,10}, { 1215, 9}, { 2431,12}, { 319,11}, \
- { 639,10}, { 1279,11}, { 671,10}, { 1343, 9}, \
- { 2687,11}, { 703,10}, { 1407,11}, { 735,10}, \
- { 1471, 9}, { 2943,10}, { 1503,12}, { 383,11}, \
- { 767,10}, { 1535,11}, { 799,10}, { 1599,11}, \
- { 863,10}, { 1727,12}, { 447,11}, { 895,10}, \
- { 1791,11}, { 959,10}, { 1919,13}, { 255,12}, \
- { 511,11}, { 1023,10}, { 2047,11}, { 1087,10}, \
- { 2175,11}, { 1119,12}, { 575,11}, { 1151,10}, \
- { 2303,11}, { 1215,10}, { 2431,12}, { 639,11}, \
- { 1407,10}, { 2815,11}, { 1471,10}, { 2943,12}, \
- { 767,11}, { 1599,12}, { 831,11}, { 1663,10}, \
- { 3327,12}, { 895,11}, { 1791,10}, { 3583,12}, \
- { 959,11}, { 1919,10}, { 3839,11}, { 1983,14}, \
- { 255,13}, { 511,12}, { 1023,11}, { 2047,12}, \
- { 1087,11}, { 2175,12}, { 1151,11}, { 2303,12}, \
- { 1215,11}, { 2431,13}, { 639,12}, { 1407,11}, \
- { 2815,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1663,11}, { 3327,12}, { 1727,13}, { 895,12}, \
- { 1791,11}, { 3583,12}, { 1919,11}, { 3839,12}, \
- { 1983,11}, { 3967,14}, { 511,13}, { 1023,12}, \
- { 2175,13}, { 1151,12}, { 2495,13}, { 1279,12}, \
- { 2559,13}, { 1407,12}, { 2943,11}, { 5887,14}, \
- { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \
- { 3327,13}, { 1791,12}, { 3583,13}, { 1919,12}, \
- { 3967,15}, { 511,14}, { 1023,13}, { 2047,12}, \
- { 4095,13}, { 2175,12}, { 4351,13}, { 2431,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 16384,15}, \
- { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 229
-#define SQR_FFT_THRESHOLD 7552
-
-#define MULLO_BASECASE_THRESHOLD 8
-#define MULLO_DC_THRESHOLD 36
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 45
-#define DC_DIVAPPR_Q_THRESHOLD 208
-#define DC_BDIV_QR_THRESHOLD 43
-#define DC_BDIV_Q_THRESHOLD 140
-
-#define INV_MULMOD_BNM1_THRESHOLD 62
-#define INV_NEWTON_THRESHOLD 204
-#define INV_APPR_THRESHOLD 204
-
-#define BINV_NEWTON_THRESHOLD 230
-#define REDC_1_TO_REDC_N_THRESHOLD 59
-
-#define MU_DIV_QR_THRESHOLD 1752
-#define MU_DIVAPPR_Q_THRESHOLD 1528
-#define MUPI_DIV_QR_THRESHOLD 82
-#define MU_BDIV_QR_THRESHOLD 1360
-#define MU_BDIV_Q_THRESHOLD 1470
-
-#define POWM_SEC_TABLE 1,16,102,336,1221
-
-#define MATRIX22_STRASSEN_THRESHOLD 16
-#define HGCD_THRESHOLD 120
-#define HGCD_APPR_THRESHOLD 143
-#define HGCD_REDUCE_THRESHOLD 4818
-#define GCD_DC_THRESHOLD 474
-#define GCDEXT_DC_THRESHOLD 345
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 33
-#define SET_STR_DC_THRESHOLD 298
-#define SET_STR_PRECOMPUTE_THRESHOLD 1187
-
-#define FAC_DSC_THRESHOLD 602
-#define FAC_ODD_THRESHOLD 29
+/* 2083 MHz Athlon */
+
+/* Generated by tuneup.c, 2008-12-23, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD 28
+#define MUL_TOOM3_THRESHOLD 89
+#define MUL_TOOM44_THRESHOLD 130
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD 52
+#define SQR_TOOM3_THRESHOLD 89
+#define SQR_TOOM4_THRESHOLD 196
+
+#define MULLOW_BASECASE_THRESHOLD 10
+#define MULLOW_DC_THRESHOLD 96
+#define MULLOW_MUL_N_THRESHOLD 234
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 86
+#define POWM_THRESHOLD 134
+#define MATRIX22_STRASSEN_THRESHOLD 18
+#define HGCD_THRESHOLD 163
+#define GCD_DC_THRESHOLD 665
+#define GCDEXT_DC_THRESHOLD 605
+#define JACOBI_BASE_METHOD 1
+
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+
+#define GET_STR_DC_THRESHOLD 19
+#define GET_STR_PRECOMPUTE_THRESHOLD 35
+#define SET_STR_DC_THRESHOLD 826
+#define SET_STR_PRECOMPUTE_THRESHOLD 1691
+
+#define MUL_FFT_TABLE { 432, 864, 1664, 4608, 10240, 40960, 163840, 655360, 0 }
+#define MUL_FFT_MODF_THRESHOLD 496
+#define MUL_FFT_THRESHOLD 4864
+
+#define SQR_FFT_TABLE { 432, 864, 1664, 4608, 10240, 40960, 98304, 655360, 0 }
+#define SQR_FFT_MODF_THRESHOLD 432
+#define SQR_FFT_THRESHOLD 3840
+
+/* These tables need to be updated. */
+
+#define MUL_FFT_TABLE2 {{1, 4}, {401, 5}, {801, 6}, {817, 5}, {865, 6}, {1025, 5}, {1057, 6}, {1601, 7}, {1633, 6}, {1729, 7}, {1921, 6}, {2113, 7}, {2177, 6}, {2241, 7}, {2433, 6}, {2497, 7}, {2945, 6}, {3009, 7}, {3457, 8}, {3521, 7}, {4481, 8}, {4865, 7}, {5249, 8}, {5889, 7}, {6017, 8}, {7553, 9}, {7681, 8}, {9985, 9}, {11777, 8}, {13057, 9}, {13825, 8}, {14081, 9}, {15873, 8}, {16641, 9}, {16897, 8}, {17153, 9}, {19969, 8}, {20225, 9}, {20737, 8}, {20993, 9}, {24065, 8}, {24577, 9}, {25089, 8}, {25345, 9}, {27393, 10}, {27649, 9}, {28161, 10}, {31745, 9}, {38913, 10}, {39425, 9}, {40449, 10}, {48129, 9}, {48641, 11}, {63489, 10}, {98305, 11}, {99329, 10}, {100353, 11}, {101377, 10}, {103425, 11}, {104449, 10}, {110593, 11}, {112641, 10}, {113665, 11}, {129025, 10}, {162817, 11}, {194561, 10}, {195585, 12}, {258049, 11}, {391169, 12}, {520193, 11}, {718849, 12}, {782337, 11}, {849921, 13}, {1040385, 12}, {2879489, 13}, {3137537, 12}, {3928065, 13}, {4186113, 12}, {4976641, 13}, {5234689, 12}, {6025217, 13}, {6283265, 12}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1, 4}, {401, 5}, {417, 4}, {433, 5}, {881, 6}, {961, 5}, {993, 6}, {1857, 7}, {1921, 6}, {2049, 7}, {2177, 6}, {2241, 7}, {2433, 6}, {2497, 7}, {3457, 8}, {3841, 7}, {4481, 8}, {4609, 7}, {4737, 8}, {4865, 7}, {5249, 8}, {5889, 7}, {6273, 8}, {7041, 9}, {7681, 8}, {9985, 9}, {10241, 8}, {10497, 9}, {11777, 8}, {13057, 9}, {15873, 8}, {16385, 9}, {16897, 8}, {17153, 9}, {19969, 8}, {20225, 9}, {20737, 8}, {20993, 9}, {24065, 8}, {24321, 9}, {24577, 10}, {24833, 9}, {25601, 10}, {27137, 9}, {27649, 10}, {31745, 9}, {38401, 10}, {38913, 9}, {40449, 10}, {48129, 9}, {48641, 11}, {63489, 10}, {99329, 11}, {101377, 10}, {103425, 11}, {104449, 10}, {107521, 11}, {110593, 10}, {113665, 11}, {129025, 10}, {154625, 11}, {155649, 10}, {162817, 11}, {194561, 12}, {258049, 11}, {391169, 12}, {520193, 11}, {718849, 12}, {727041, 11}, {729089, 12}, {782337, 11}, {849921, 13}, {1040385, 12}, {2879489, 13}, {3137537, 12}, {3928065, 13}, {4186113, 12}, {4714497, 13}, {5234689, 12}, {6025217, 13}, {6283265, 12}, {7073793, 13}, {7331841, 12}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/x86/k7/invert_limb.asm b/gmp/mpn/x86/k7/invert_limb.asm
deleted file mode 100644
index 6cce455a9d..0000000000
--- a/gmp/mpn/x86/k7/invert_limb.asm
+++ /dev/null
@@ -1,193 +0,0 @@
-dnl x86 mpn_invert_limb
-
-dnl Contributed to the GNU project by Niels Möller
-
-dnl Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles (approx) div
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0 (Willamette) ?
-C P4 model 1 (?) ?
-C P4 model 2 (Northwood) ?
-C P4 model 3 (Prescott) ?
-C P4 model 4 (Nocona) ?
-C AMD K6 ?
-C AMD K7 41 53
-C AMD K8 ?
-
-C TODO
-C * These c/l numbers are for a non-PIC build. Consider falling back to using
-C the 'div' instruction for PIC builds.
-C * Perhaps use this file--or at least the algorithm--for more machines than k7.
-
-C Register usage:
-C Input D in %edi
-C Current approximation is in %eax and/or %ecx
-C %ebx and %edx are temporaries
-C %esi and %ebp are unused
-
-defframe(PARAM_DIVISOR,4)
-
-ASM_START()
-
-C Make approx_tab global to work around Apple relocation bug.
-ifdef(`DARWIN',`
- deflit(`approx_tab', MPN(invert_limb_tab))
- GLOBL approx_tab')
-
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_invert_limb)
-deflit(`FRAME', 0)
- mov PARAM_DIVISOR, %eax
- C Avoid push/pop on k7.
- sub $8, %esp FRAME_subl_esp(8)
- mov %ebx, (%esp)
- mov %edi, 4(%esp)
-
- mov %eax, %edi
- shr $22, %eax
-ifdef(`PIC',`
- LEA( approx_tab, %ebx)
- movzwl -1024(%ebx, %eax, 2), %eax
-',`
- movzwl -1024+approx_tab(%eax, %eax), %eax C %eax = v0
-')
-
- C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
- mov %eax, %ecx
- imul %eax, %eax
- mov %edi, %ebx
- shr $11, %ebx
- inc %ebx
- mul %ebx
- mov %edi, %ebx C Prepare
- shr %ebx
- sbb %eax, %eax
- sub %eax, %ebx C %ebx = d_31, %eax = mask
- shl $4, %ecx
- dec %ecx
- sub %edx, %ecx C %ecx = v1
-
- C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
- imul %ecx, %ebx
- and %ecx, %eax
- shr %eax
- sub %ebx, %eax
- mul %ecx
- mov %edi, %eax C Prepare for next mul
- shl $15, %ecx
- shr %edx
- add %edx, %ecx C %ecx = v2
-
- mul %ecx
- add %edi, %eax
- mov %ecx, %eax
- adc %edi, %edx
- sub %edx, %eax C %eax = v3
-
- mov (%esp), %ebx
- mov 4(%esp), %edi
- add $8, %esp
-
- ret
-
-EPILOGUE()
-
-DEF_OBJECT(approx_tab,2)
- .value 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
- .value 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
- .value 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
- .value 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
- .value 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
- .value 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
- .value 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
- .value 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
- .value 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
- .value 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
- .value 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
- .value 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
- .value 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
- .value 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
- .value 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
- .value 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
- .value 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
- .value 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
- .value 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
- .value 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
- .value 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
- .value 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
- .value 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
- .value 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
- .value 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
- .value 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
- .value 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
- .value 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
- .value 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
- .value 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
- .value 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
- .value 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
- .value 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
- .value 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
- .value 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
- .value 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
- .value 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
- .value 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
- .value 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
- .value 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
- .value 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
- .value 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
- .value 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
- .value 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
- .value 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
- .value 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
- .value 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
- .value 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
- .value 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
- .value 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
- .value 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
- .value 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
- .value 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
- .value 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
- .value 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
- .value 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
- .value 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
- .value 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
- .value 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
- .value 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
- .value 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
- .value 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
- .value 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
- .value 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
-END_OBJECT(approx_tab)
diff --git a/gmp/mpn/x86/k7/mmx/com.asm b/gmp/mpn/x86/k7/mmx/com_n.asm
index a258c224f1..068c01f076 100644
--- a/gmp/mpn/x86/k7/mmx/com.asm
+++ b/gmp/mpn/x86/k7/mmx/com_n.asm
@@ -1,32 +1,21 @@
-dnl AMD Athlon mpn_com -- mpn bitwise one's complement.
+dnl AMD Athlon mpn_com_n -- mpn bitwise one's complement.
dnl Copyright 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -34,7 +23,7 @@ include(`../config.m4')
C K7: 1.0 cycles/limb
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C The loop form below is necessary for the claimed speed. It needs to be
C aligned to a 16 byte boundary and only 16 bytes long. Maybe that's so it
@@ -62,7 +51,7 @@ defframe(PARAM_DST, 4)
TEXT
ALIGN(16)
-PROLOGUE(mpn_com)
+PROLOGUE(mpn_com_n)
deflit(`FRAME',0)
movl PARAM_DST, %edx
diff --git a/gmp/mpn/x86/k7/mmx/copyd.asm b/gmp/mpn/x86/k7/mmx/copyd.asm
index 59ece40920..4601fcd75a 100644
--- a/gmp/mpn/x86/k7/mmx/copyd.asm
+++ b/gmp/mpn/x86/k7/mmx/copyd.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_copyd -- copy limb vector, decrementing.
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/mmx/copyi.asm b/gmp/mpn/x86/k7/mmx/copyi.asm
index 9a28f927ec..a17d575ff4 100644
--- a/gmp/mpn/x86/k7/mmx/copyi.asm
+++ b/gmp/mpn/x86/k7/mmx/copyi.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_copyi -- copy limb vector, incrementing.
dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/mmx/divrem_1.asm b/gmp/mpn/x86/k7/mmx/divrem_1.asm
index cf343280bb..fa5824c7b9 100644
--- a/gmp/mpn/x86/k7/mmx/divrem_1.asm
+++ b/gmp/mpn/x86/k7/mmx/divrem_1.asm
@@ -1,33 +1,22 @@
dnl AMD K7 mpn_divrem_1, mpn_divrem_1c, mpn_preinv_divrem_1 -- mpn by limb
dnl division.
-dnl Copyright 1999-2002, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -456,7 +445,7 @@ C chain, and nothing better than 18 cycles has been found when using it.
C The jump is taken only when q1 is 0xFFFFFFFF, and on random data this will
C be an extremely rare event.
C
-C Branch mispredictions will hit random occurrences of q1==0xFFFFFFFF, but
+C Branch mispredictions will hit random occurrances of q1==0xFFFFFFFF, but
C if some special data is coming out with this always, the q1_ff special
C case actually runs at 15 c/l. 0x2FFF...FFFD divided by 3 is a good way to
C induce the q1_ff case, for speed measurements or testing. Note that
@@ -735,12 +724,12 @@ C q1 is the high word of m*n2+b*n2 and the following shows q1<=b-2 always.
C rnd() means rounding down to a multiple of d.
C
C m*n2 + b*n2 <= m*(d-1) + b*(d-1)
-C = m*d + b*d - m - b
-C = floor((b(b-d)-1)/d)*d + b*d - m - b
-C = rnd(b(b-d)-1) + b*d - m - b
-C = rnd(b(b-d)-1 + b*d) - m - b
-C = rnd(b*b-1) - m - b
-C <= (b-2)*b
+C = m*d + b*d - m - b
+C = floor((b(b-d)-1)/d)*d + b*d - m - b
+C = rnd(b(b-d)-1) + b*d - m - b
+C = rnd(b(b-d)-1 + b*d) - m - b
+C = rnd(b*b-1) - m - b
+C <= (b-2)*b
C
C Unchanged from the general case is that the final quotient limb q can be
C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from
diff --git a/gmp/mpn/x86/k7/mmx/lshift.asm b/gmp/mpn/x86/k7/mmx/lshift.asm
index b3383cf2c3..b3bff8ffd1 100644
--- a/gmp/mpn/x86/k7/mmx/lshift.asm
+++ b/gmp/mpn/x86/k7/mmx/lshift.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_lshift -- mpn left shift.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/mmx/mod_1.asm b/gmp/mpn/x86/k7/mmx/mod_1.asm
new file mode 100644
index 0000000000..2b42e55caf
--- /dev/null
+++ b/gmp/mpn/x86/k7/mmx/mod_1.asm
@@ -0,0 +1,509 @@
+dnl AMD K7 mpn_mod_1 -- mpn by limb remainder.
+
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C K7: 17.0 cycles/limb.
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse);
+C
+C The code here is the same as mpn_divrem_1, but with the quotient
+C discarded. See mpn/x86/k7/mmx/divrem_1.c for some comments.
+
+
+dnl MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+dnl
+dnl The inverse takes about 50 cycles to calculate, but after that the
+dnl multiply is 17 c/l versus division at 41 c/l.
+dnl
+dnl Using mul or div is about the same speed at 3 limbs, so the threshold
+dnl is set to 4 to get the smaller div code used at 3.
+
+deflit(MUL_THRESHOLD, 4)
+
+
+defframe(PARAM_INVERSE,16) dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY, 16) dnl mpn_mod_1c
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+defframe(VAR_SRC_STOP,-28)
+
+deflit(STACK_SPACE, 28)
+
+ TEXT
+
+ ALIGN(32)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+ movl PARAM_SRC, %ecx
+ movl PARAM_SIZE, %eax
+ subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %edi, SAVE_EDI
+ movl PARAM_INVERSE, %edx
+
+ movl %esi, SAVE_ESI
+ movl -4(%ecx,%eax,4), %edi C src high limb
+ leal -16(%ecx,%eax,4), %ecx C &src[size-4]
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_INVERSE, %edx
+
+ movl $0, VAR_NORM C l==0
+
+ movl %edi, %esi
+ subl %ebp, %edi C high-divisor
+
+ cmovc( %esi, %edi) C restore if underflow
+ decl %eax
+ jz L(done_edi) C size==1, high-divisor only
+
+ movl 8(%ecx), %esi C src second high limb
+ movl %edx, VAR_INVERSE
+
+ movl $32, %ebx C 32-l
+ decl %eax
+ jz L(inverse_one_left) C size==2, one divide
+
+ movd %ebx, %mm7 C 32-l
+ decl %eax
+ jz L(inverse_two_left) C size==3, two divides
+
+ jmp L(inverse_top) C size>=4
+
+
+L(done_edi):
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ movl %edi, %eax
+
+ movl SAVE_EDI, %edi
+ addl $STACK_SPACE, %esp
+
+ ret
+
+EPILOGUE()
+
+
+ ALIGN(32)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+ movl PARAM_CARRY, %edx
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+ jmp L(start_1c)
+
+EPILOGUE()
+
+
+ ALIGN(32)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl $0, %edx C initial carry (if can't skip a div)
+ subl $STACK_SPACE, %esp
+deflit(`FRAME',STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ cmpl %ebp, %eax C carry flag if high<divisor
+
+ cmovc( %eax, %edx) C src high limb as initial carry
+ sbbl $0, %ecx C size-1 to skip one div
+ jz L(divide_done)
+
+
+ ALIGN(16)
+L(start_1c):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ cmpl $MUL_THRESHOLD, %ecx
+ jae L(mul_by_inverse)
+
+
+
+C With a MUL_THRESHOLD of 4, this "loop" only ever does 1 to 3 iterations,
+C but it's already fast and compact, and there's nothing to gain by
+C expanding it out.
+C
+C Using PARAM_DIVISOR in the divl is a couple of cycles faster than %ebp.
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl PARAM_DIVISOR
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ movl %edx, %eax
+
+ ret
+
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ bsrl %ebp, %eax C 31-l
+
+ movl %ebx, SAVE_EBX
+ movl %ecx, %ebx C size
+
+ movl %edi, SAVE_EDI
+ movl $31, %ecx
+
+ movl %edx, %edi C carry
+ movl $-1, %edx
+
+ C
+
+ xorl %eax, %ecx C l
+ incl %eax C 32-l
+
+ shll %cl, %ebp C d normalized
+ movl %ecx, VAR_NORM
+
+ movd %eax, %mm7 C 32-l
+
+ movl $-1, %eax
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+ C
+
+ movl %eax, VAR_INVERSE
+ leal -12(%esi,%ebx,4), %eax C &src[size-3]
+
+ movl 8(%eax), %esi C src high limb
+ movl 4(%eax), %edx C src second highest limb
+
+ shldl( %cl, %esi, %edi) C n2 = carry,high << l
+
+ shldl( %cl, %edx, %esi) C n10 = high,second << l
+
+ movl %eax, %ecx C &src[size-3]
+
+
+ifelse(MUL_THRESHOLD,2,`
+ cmpl $2, %ebx
+ je L(inverse_two_left)
+')
+
+
+C The dependent chain here is the same as in mpn_divrem_1, but a few
+C instructions are saved by not needing to store the quotient limbs.
+C Unfortunately this doesn't get the code down to the theoretical 16 c/l.
+C
+C There's four dummy instructions in the loop, all of which are necessary
+C for the claimed 17 c/l. It's a 1 to 3 cycle slowdown if any are removed,
+C or changed from load to store or vice versa. They're not completely
+C random, since they correspond to what mpn_divrem_1 has, but there's no
+C obvious reason why they're necessary. Presumably they induce something
+C good in the out of order execution, perhaps through some load/store
+C ordering and/or decoding effects.
+C
+C The q1==0xFFFFFFFF case is handled here the same as in mpn_divrem_1. On
+C on special data that comes out as q1==0xFFFFFFFF always, the loop runs at
+C about 13.5 c/l.
+
+ ALIGN(32)
+L(inverse_top):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx src pointer, decrementing
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src qword)
+ C mm7 rshift for normalization
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+ movl PARAM_SIZE, %ebx C dummy
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movq (%ecx), %mm0 C next src limb and the one below it
+ subl $4, %ecx
+
+ movl %ecx, PARAM_SIZE C dummy
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2+1
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+ nop C dummy
+
+ mull %ebx C (q1+1)*d
+
+ psrlq %mm7, %mm0
+ leal (%ecx), %ecx C dummy
+
+ C
+
+ C
+
+ subl %eax, %esi C low n - (q1+1)*d
+ movl PARAM_SRC, %eax
+
+ C
+
+ sbbl %edx, %edi C high n - (q1+1)*d, 0 or -1
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+ cmpl %eax, %ecx
+ jae L(inverse_top)
+
+
+L(inverse_loop_done):
+
+
+C -----------------------------------------------------------------------------
+
+L(inverse_two_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx &src[-1]
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 scratch (src dword)
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movd 4(%ecx), %mm0 C src low limb
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2+1
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ psllq $32, %mm0
+
+ psrlq %mm7, %mm0
+
+ C
+
+ subl %eax, %esi
+
+ C
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ movl %esi, %edi C remainder -> n2
+ leal (%ebp,%esi), %edx
+
+ movd %mm0, %esi
+
+ cmovc( %edx, %edi) C n - q1*d if underflow from using q1+1
+
+
+L(inverse_one_left):
+ C eax scratch
+ C ebx scratch (nadj, q1)
+ C ecx
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+ C
+ C mm0 src limb, shifted
+ C mm7 rshift
+
+ cmpl $0x80000000, %esi C n1 as 0=c, 1=nc
+ movl %edi, %eax C n2
+
+ leal (%ebp,%esi), %ebx
+ cmovc( %esi, %ebx) C nadj = n10 + (-n1 & d), ignoring overflow
+ sbbl $-1, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ movl VAR_NORM, %ecx C for final denorm
+
+ C
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2+1
+ movl %ebp, %eax C d
+
+ C
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+
+ mull %ebx C (q1+1)*d
+
+ movl SAVE_EBX, %ebx
+
+ C
+
+ C
+
+ subl %eax, %esi
+
+ movl %esi, %eax C remainder
+ movl SAVE_ESI, %esi
+
+ sbbl %edx, %edi C n - (q1+1)*d
+ leal (%ebp,%eax), %edx
+ movl SAVE_EBP, %ebp
+
+ cmovc( %edx, %eax) C n - q1*d if underflow from using q1+1
+ movl SAVE_EDI, %edi
+
+ shrl %cl, %eax C denorm remainder
+ addl $STACK_SPACE, %esp
+ emms
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx src pointer
+ C edx
+ C esi n10
+ C edi (n2)
+ C ebp divisor
+
+ movl PARAM_SRC, %edx
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+ psrlq %mm7, %mm0
+
+ movd %mm0, %esi C next n10
+
+ cmpl %edx, %ecx
+ jae L(inverse_top)
+ jmp L(inverse_loop_done)
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/k7/mmx/popham.asm b/gmp/mpn/x86/k7/mmx/popham.asm
index 95965b74d4..5dc0a78c42 100644
--- a/gmp/mpn/x86/k7/mmx/popham.asm
+++ b/gmp/mpn/x86/k7/mmx/popham.asm
@@ -1,40 +1,29 @@
dnl AMD K7 mpn_popcount, mpn_hamdist -- population count and hamming
dnl distance.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C popcount hamdist
C P3 generic 6.5 7
-C P3 model 9 (Banias) 5.7 6.1
+C P3 model 9 (Banias) ? ?
C P3 model 13 (Dothan) 5.75 6
C K7 5 6
diff --git a/gmp/mpn/x86/k7/mmx/rshift.asm b/gmp/mpn/x86/k7/mmx/rshift.asm
index 345d23a25e..3566ce85d7 100644
--- a/gmp/mpn/x86/k7/mmx/rshift.asm
+++ b/gmp/mpn/x86/k7/mmx/rshift.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_rshift -- mpn right shift.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/mod_1_1.asm b/gmp/mpn/x86/k7/mod_1_1.asm
deleted file mode 100644
index 1bbe6f92d7..0000000000
--- a/gmp/mpn/x86/k7/mod_1_1.asm
+++ /dev/null
@@ -1,221 +0,0 @@
-dnl x86-32 mpn_mod_1_1p, requiring cmov.
-
-dnl Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
-
-dnl Copyright 2010, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0 (Willamette) ?
-C P4 model 1 (?) ?
-C P4 model 2 (Northwood) ?
-C P4 model 3 (Prescott) ?
-C P4 model 4 (Nocona) ?
-C AMD K6 ?
-C AMD K7 7
-C AMD K8 ?
-
-define(`B2mb', `%ebx')
-define(`r0', `%esi')
-define(`r2', `%ebp')
-define(`t0', `%edi')
-define(`ap', `%ecx') C Also shift count
-
-C Stack frame
-C pre 36(%esp)
-C b 32(%esp)
-C n 28(%esp)
-C ap 24(%esp)
-C return 20(%esp)
-C %ebp 16(%esp)
-C %edi 12(%esp)
-C %esi 8(%esp)
-C %ebx 4(%esp)
-C B2mod (%esp)
-
-define(`B2modb', `(%esp)')
-define(`n', `28(%esp)')
-define(`b', `32(%esp)')
-define(`pre', `36(%esp)')
-
-C mp_limb_t
-C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t pre[4])
-C
-C The pre array contains bi, cnt, B1modb, B2modb
-C Note: This implementation needs B1modb only when cnt > 0
-
-ASM_START()
- TEXT
- ALIGN(8)
-PROLOGUE(mpn_mod_1_1p)
- push %ebp
- push %edi
- push %esi
- push %ebx
- mov 32(%esp), %ebp C pre[]
-
- mov 12(%ebp), %eax C B2modb
- push %eax C Put it on stack
-
- mov n, %edx
- mov 24(%esp), ap
-
- lea (ap, %edx, 4), ap
- mov -4(ap), %eax
- cmp $3, %edx
- jnc L(first)
- mov -8(ap), r0
- jmp L(reduce_two)
-
-L(first):
- C First iteration, no r2
- mull B2modb
- mov -12(ap), r0
- add %eax, r0
- mov -8(ap), %eax
- adc %edx, %eax
- sbb r2, r2
- subl $3, n
- lea -16(ap), ap
- jz L(reduce_three)
-
- mov B2modb, B2mb
- sub b, B2mb
- lea (B2mb, r0), t0
- jmp L(mid)
-
- ALIGN(16)
-L(top): C Loopmixed to 7 c/l on k7
- add %eax, r0
- lea (B2mb, r0), t0
- mov r2, %eax
- adc %edx, %eax
- sbb r2, r2
-L(mid): mull B2modb
- and B2modb, r2
- add r0, r2
- decl n
- mov (ap), r0
- cmovc( t0, r2)
- lea -4(ap), ap
- jnz L(top)
-
- add %eax, r0
- mov r2, %eax
- adc %edx, %eax
- sbb r2, r2
-
-L(reduce_three):
- C Eliminate r2
- and b, r2
- sub r2, %eax
-
-L(reduce_two):
- mov pre, %ebp
- movb 4(%ebp), %cl
- test %cl, %cl
- jz L(normalized)
-
- C Unnormalized, use B1modb to reduce to size < B b
- mull 8(%ebp)
- xor t0, t0
- add %eax, r0
- adc %edx, t0
- mov t0, %eax
-
- C Left-shift to normalize
- shld %cl, r0, %eax C Always use shld?
-
- shl %cl, r0
- jmp L(udiv)
-
-L(normalized):
- mov %eax, t0
- sub b, t0
- cmovnc( t0, %eax)
-
-L(udiv):
- lea 1(%eax), t0
- mull (%ebp)
- mov b, %ebx C Needed in register for lea
- add r0, %eax
- adc t0, %edx
- imul %ebx, %edx
- sub %edx, r0
- cmp r0, %eax
- lea (%ebx, r0), %eax
- cmovnc( r0, %eax)
- cmp %ebx, %eax
- jnc L(fix)
-L(ok): shr %cl, %eax
-
- add $4, %esp
- pop %ebx
- pop %esi
- pop %edi
- pop %ebp
-
- ret
-L(fix): sub %ebx, %eax
- jmp L(ok)
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps)
- push %ebp
- mov 12(%esp), %ebp
- push %esi
- bsr %ebp, %ecx
- push %ebx
- xor $31, %ecx
- mov 16(%esp), %esi
- sal %cl, %ebp
- mov %ebp, %edx
- not %edx
- mov $-1, %eax
- div %ebp C On K7, invert_limb would be a few cycles faster.
- mov %eax, (%esi) C store bi
- mov %ecx, 4(%esi) C store cnt
- neg %ebp
- mov $1, %edx
- shld %cl, %eax, %edx
- imul %ebp, %edx
- shr %cl, %edx
- imul %ebp, %eax
- mov %edx, 8(%esi) C store B1modb
- mov %eax, 12(%esi) C store B2modb
- pop %ebx
- pop %esi
- pop %ebp
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/k7/mod_1_4.asm b/gmp/mpn/x86/k7/mod_1_4.asm
deleted file mode 100644
index bb7597edd2..0000000000
--- a/gmp/mpn/x86/k7/mod_1_4.asm
+++ /dev/null
@@ -1,260 +0,0 @@
-dnl x86-32 mpn_mod_1s_4p, requiring cmov.
-
-dnl Contributed to the GNU project by Torbjorn Granlund.
-
-dnl Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 6
-C P4 model 0 (Willamette) ?
-C P4 model 1 (?) ?
-C P4 model 2 (Northwood) 15.5
-C P4 model 3 (Prescott) ?
-C P4 model 4 (Nocona) ?
-C AMD K6 ?
-C AMD K7 4.75
-C AMD K8 ?
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p)
- push %ebp
- push %edi
- push %esi
- push %ebx
- sub $28, %esp
- mov 60(%esp), %edi C cps[]
- mov 8(%edi), %eax
- mov 12(%edi), %edx
- mov 16(%edi), %ecx
- mov 20(%edi), %esi
- mov 24(%edi), %edi
- mov %eax, 4(%esp)
- mov %edx, 8(%esp)
- mov %ecx, 12(%esp)
- mov %esi, 16(%esp)
- mov %edi, 20(%esp)
- mov 52(%esp), %eax C n
- xor %edi, %edi
- mov 48(%esp), %esi C up
- lea -12(%esi,%eax,4), %esi
- and $3, %eax
- je L(b0)
- cmp $2, %eax
- jc L(b1)
- je L(b2)
-
-L(b3): mov 4(%esi), %eax
- mull 4(%esp)
- mov (%esi), %ebp
- add %eax, %ebp
- adc %edx, %edi
- mov 8(%esi), %eax
- mull 8(%esp)
- lea -12(%esi), %esi
- jmp L(m0)
-
-L(b0): mov (%esi), %eax
- mull 4(%esp)
- mov -4(%esi), %ebp
- add %eax, %ebp
- adc %edx, %edi
- mov 4(%esi), %eax
- mull 8(%esp)
- add %eax, %ebp
- adc %edx, %edi
- mov 8(%esi), %eax
- mull 12(%esp)
- lea -16(%esi), %esi
- jmp L(m0)
-
-L(b1): mov 8(%esi), %ebp
- lea -4(%esi), %esi
- jmp L(m1)
-
-L(b2): mov 8(%esi), %edi
- mov 4(%esi), %ebp
- lea -8(%esi), %esi
- jmp L(m1)
-
- ALIGN(16)
-L(top): mov (%esi), %eax
- mull 4(%esp)
- mov -4(%esi), %ebx
- xor %ecx, %ecx
- add %eax, %ebx
- adc %edx, %ecx
- mov 4(%esi), %eax
- mull 8(%esp)
- add %eax, %ebx
- adc %edx, %ecx
- mov 8(%esi), %eax
- mull 12(%esp)
- add %eax, %ebx
- adc %edx, %ecx
- lea -16(%esi), %esi
- mov 16(%esp), %eax
- mul %ebp
- add %eax, %ebx
- adc %edx, %ecx
- mov 20(%esp), %eax
- mul %edi
- mov %ebx, %ebp
- mov %ecx, %edi
-L(m0): add %eax, %ebp
- adc %edx, %edi
-L(m1): subl $4, 52(%esp)
- ja L(top)
-
-L(end): mov 4(%esp), %eax
- mul %edi
- mov 60(%esp), %edi
- add %eax, %ebp
- adc $0, %edx
- mov 4(%edi), %ecx
- mov %edx, %esi
- mov %ebp, %eax
- sal %cl, %esi
- mov %ecx, %ebx
- neg %ecx
- shr %cl, %eax
- or %esi, %eax
- lea 1(%eax), %esi
- mull (%edi)
- mov %ebx, %ecx
- mov %eax, %ebx
- mov %ebp, %eax
- mov 56(%esp), %ebp
- sal %cl, %eax
- add %eax, %ebx
- adc %esi, %edx
- imul %ebp, %edx
- sub %edx, %eax
- lea (%eax,%ebp), %edx
- cmp %eax, %ebx
- cmovc( %edx, %eax)
- mov %eax, %edx
- sub %ebp, %eax
- cmovc( %edx, %eax)
- add $28, %esp
- pop %ebx
- pop %esi
- pop %edi
- pop %ebp
- shr %cl, %eax
- ret
-EPILOGUE()
-
- ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p_cps)
-C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
- push %ebp
- push %edi
- push %esi
- push %ebx
- mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx
- mov 24(%esp), %ebx
- bsr %ebx, %ecx
- xor $31, %ecx
- sal %cl, %ebx C b << cnt
- mov %ebx, %edx
- not %edx
- mov $-1, %eax
- div %ebx
- xor %edi, %edi
- sub %ebx, %edi
- mov $1, %esi
- mov %eax, (%ebp) C store bi
- mov %ecx, 4(%ebp) C store cnt
- shld %cl, %eax, %esi
- imul %edi, %esi
- mov %eax, %edi
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 8(%ebp) C store B1modb
-
- not %edx
- imul %ebx, %edx
- lea (%edx,%ebx), %esi
- cmp %edx, %eax
- cmovnc( %edx, %esi)
- mov %edi, %eax
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 12(%ebp) C store B2modb
-
- not %edx
- imul %ebx, %edx
- lea (%edx,%ebx), %esi
- cmp %edx, %eax
- cmovnc( %edx, %esi)
- mov %edi, %eax
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 16(%ebp) C store B3modb
-
- not %edx
- imul %ebx, %edx
- lea (%edx,%ebx), %esi
- cmp %edx, %eax
- cmovnc( %edx, %esi)
- mov %edi, %eax
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 20(%ebp) C store B4modb
-
- not %edx
- imul %ebx, %edx
- add %edx, %ebx
- cmp %edx, %eax
- cmovnc( %edx, %ebx)
-
- shr %cl, %ebx
- mov %ebx, 24(%ebp) C store B5modb
-
- pop %ebx
- pop %esi
- pop %edi
- pop %ebp
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/k7/mod_34lsub1.asm b/gmp/mpn/x86/k7/mod_34lsub1.asm
index ee3ad04099..f00e84dc42 100644
--- a/gmp/mpn/x86/k7/mod_34lsub1.asm
+++ b/gmp/mpn/x86/k7/mod_34lsub1.asm
@@ -1,32 +1,22 @@
dnl AMD K7 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
-dnl Copyright 2000-2002, 2004, 2005, 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/mode1o.asm b/gmp/mpn/x86/k7/mode1o.asm
index 6472ec5949..ef858049a6 100644
--- a/gmp/mpn/x86/k7/mode1o.asm
+++ b/gmp/mpn/x86/k7/mode1o.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_modexact_1_odd -- exact division style remainder.
-dnl Copyright 2000-2002, 2004, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -122,7 +111,7 @@ ifdef(`PIC',`
subl %eax, %edi C inv = 2*inv - inv*inv*d
- ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB
movl %esi, %eax
imull %edi, %eax
cmpl $1, %eax')
diff --git a/gmp/mpn/x86/k7/mul_1.asm b/gmp/mpn/x86/k7/mul_1.asm
index 755cd2ed50..016262d594 100644
--- a/gmp/mpn/x86/k7/mul_1.asm
+++ b/gmp/mpn/x86/k7/mul_1.asm
@@ -1,38 +1,28 @@
dnl AMD K7 mpn_mul_1.
-dnl Copyright 1999-2002, 2005, 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2005, 2008 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5
+C cycles/limb
+C P5:
C P6 model 0-8,10-12)
C P6 model 9 (Banias)
C P6 model 13 (Dothan)
@@ -41,9 +31,9 @@ C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C AMD K6
-C AMD K7 3.25
-C AMD K8
+C K6:
+C K7: 3.25
+C K8:
C TODO
C * Improve feed-in and wind-down code. We beat the old code for all n != 1,
diff --git a/gmp/mpn/x86/k7/mul_basecase.asm b/gmp/mpn/x86/k7/mul_basecase.asm
index 4dfb500885..7f4c0002f7 100644
--- a/gmp/mpn/x86/k7/mul_basecase.asm
+++ b/gmp/mpn/x86/k7/mul_basecase.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_mul_basecase -- multiply two mpn numbers.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/k7/sqr_basecase.asm b/gmp/mpn/x86/k7/sqr_basecase.asm
index 7b6a97e0df..408a13dc9b 100644
--- a/gmp/mpn/x86/k7/sqr_basecase.asm
+++ b/gmp/mpn/x86/k7/sqr_basecase.asm
@@ -1,32 +1,21 @@
dnl AMD K7 mpn_sqr_basecase -- square an mpn number.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -39,18 +28,18 @@ C roughly the Karatsuba recursing range).
dnl These are the same as mpn/x86/k6/sqr_basecase.asm, see that code for
dnl some comments.
-deflit(SQR_TOOM2_THRESHOLD_MAX, 66)
+deflit(SQR_KARATSUBA_THRESHOLD_MAX, 66)
-ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
-`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
+`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
-m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
-deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
-C With a SQR_TOOM2_THRESHOLD around 50 this code is about 1500 bytes,
+C With a SQR_KARATSUBA_THRESHOLD around 50 this code is about 1500 bytes,
C which is quite a bit, but is considered good value since squares big
C enough to use most of the code will be spending quite a few cycles in it.
diff --git a/gmp/mpn/x86/k7/sublsh1_n.asm b/gmp/mpn/x86/k7/sublsh1_n.asm
deleted file mode 100644
index 523b01218d..0000000000
--- a/gmp/mpn/x86/k7/sublsh1_n.asm
+++ /dev/null
@@ -1,173 +0,0 @@
-dnl AMD K7 mpn_sublsh1_n_ip1 -- rp[] = rp[] - (up[] << 1)
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This is an attempt at a sublsh1_n for x86-32, not relying on sse2 insns. The
-C innerloop is 2*3-way unrolled, which is best we can do with the available
-C registers. It seems tricky to use the same structure for rsblsh1_n, since we
-C cannot feed carry between operations there.
-
-C cycles/limb
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood)
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 6.75
-C AMD K6
-C AMD K7
-C AMD K8
-
-C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32
-C processors. It uses 2*4-way unrolling, for good reasons.
-C
-C Breaking carry recurrency might be a good idea. We would then need separate
-C registers for the shift carry and add/subtract carry, which in turn would
-C force is to 2*2-way unrolling.
-
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(VAR_COUNT,`PARAM_SIZE')
-define(SAVE_EBX,`PARAM_SRC')
-define(SAVE_EBP,`PARAM_DST')
-
-ASM_START()
- TEXT
- ALIGN(8)
-PROLOGUE(mpn_sublsh1_n_ip1)
-deflit(`FRAME',0)
-
-define(`rp', `%edi')
-define(`up', `%esi')
-
- mov PARAM_SIZE, %eax C size
- push up FRAME_pushl()
- push rp FRAME_pushl()
- xor %edx, %edx
- mov PARAM_SRC, up
- mov PARAM_DST, rp
- mov %ebx, SAVE_EBX
- mov %eax, %ebx
- shr $3, %eax
-
- not %eax C count = -(size\8)-i
- and $7, %ebx C size % 8
- jz L(exact)
-
-L(oop):
-ifdef(`CPU_P6',`
- shr %edx ') C restore 2nd saved carry bit
- mov (up), %ecx
- adc %ecx, %ecx
- rcr %edx C restore 1st saved carry bit
- lea 4(up), up
- sbb %ecx, (rp)
- lea 4(rp), rp
- adc %edx, %edx C save a carry bit in edx
-ifdef(`CPU_P6',`
- adc %edx, %edx ') C save another carry bit in edx
- dec %ebx
- jnz L(oop)
-L(exact):
- inc %eax
- jz L(end)
- mov %eax, VAR_COUNT
- mov %ebp, SAVE_EBP
-
- ALIGN(16)
-L(top):
-ifdef(`CPU_P6',`
- shr %edx ') C restore 2nd saved carry bit
- mov (up), %eax
- adc %eax, %eax
- mov 4(up), %ebx
- adc %ebx, %ebx
- mov 8(up), %ecx
- adc %ecx, %ecx
- mov 12(up), %ebp
- adc %ebp, %ebp
-
- rcr %edx C restore 1st saved carry bit
-
- sbb %eax, (rp)
- sbb %ebx, 4(rp)
- sbb %ecx, 8(rp)
- sbb %ebp, 12(rp)
-
- mov 16(up), %eax
- adc %eax, %eax
- mov 20(up), %ebx
- adc %ebx, %ebx
- mov 24(up), %ecx
- adc %ecx, %ecx
- mov 28(up), %ebp
- adc %ebp, %ebp
-
- lea 32(up), up
- adc %edx, %edx C save a carry bit in edx
-
- sbb %eax, 16(rp)
- sbb %ebx, 20(rp)
- sbb %ecx, 24(rp)
- sbb %ebp, 28(rp)
-
-ifdef(`CPU_P6',`
- adc %edx, %edx ') C save another carry bit in edx
- incl VAR_COUNT
- lea 32(rp), rp
- jne L(top)
-
- mov SAVE_EBP, %ebp
-L(end):
- mov SAVE_EBX, %ebx
-
-ifdef(`CPU_P6',`
- xor %eax, %eax
- shr $1, %edx
- adc %edx, %eax
-',`
- adc $0, %edx
- mov %edx, %eax
-')
- pop rp FRAME_popl()
- pop up FRAME_popl()
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/k8/gmp-mparam.h b/gmp/mpn/x86/k8/gmp-mparam.h
deleted file mode 100644
index 8d95fef80b..0000000000
--- a/gmp/mpn/x86/k8/gmp-mparam.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/* x86/k8 gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2500 MHz K8 Brisbane */
-/* FFT tuning limit = 10000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 11
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD 2
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 40
-
-#define MUL_TOOM22_THRESHOLD 24
-#define MUL_TOOM33_THRESHOLD 81
-#define MUL_TOOM44_THRESHOLD 130
-#define MUL_TOOM6H_THRESHOLD 303
-#define MUL_TOOM8H_THRESHOLD 430
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 92
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 122
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 46
-#define SQR_TOOM3_THRESHOLD 78
-#define SQR_TOOM4_THRESHOLD 202
-#define SQR_TOOM6_THRESHOLD 286
-#define SQR_TOOM8_THRESHOLD 422
-
-#define MULMID_TOOM42_THRESHOLD 56
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 18
-
-#define MUL_FFT_MODF_THRESHOLD 848 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 848, 5}, { 27, 6}, { 25, 7}, { 13, 6}, \
- { 27, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 167,10}, { 95, 9}, { 199,10}, { 111,11}, \
- { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
- { 287,10}, { 159,11}, { 95,10}, { 207,12}, \
- { 63,11}, { 127,10}, { 271, 9}, { 543,10}, \
- { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \
- { 335, 9}, { 671,11}, { 191,10}, { 383, 9}, \
- { 799,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 319,10}, { 671, 9}, { 1343,12}, { 191,11}, \
- { 383,10}, { 799, 9}, { 1599,11}, { 415,10}, \
- { 863, 9}, { 1727,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1119,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,10}, \
- { 1471, 9}, { 2943,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 863,10}, { 1727,12}, { 447,11}, \
- { 991,13}, { 255,12}, { 511,11}, { 1023,10}, \
- { 2111,11}, { 1119,12}, { 575,11}, { 1215,10}, \
- { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1471,10}, { 2943,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1727,12}, { 959,11}, \
- { 1919,14}, { 255,13}, { 511,12}, { 1023,11}, \
- { 2047,12}, { 1087,11}, { 2239,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1727,13}, { 895,12}, { 1983,14}, \
- { 511,13}, { 1023,12}, { 2239,13}, { 1151,12}, \
- { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 144
-#define MUL_FFT_THRESHOLD 7552
-
-#define SQR_FFT_MODF_THRESHOLD 618 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 618, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 28, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 167,10}, { 95, 9}, { 191,10}, { 111,11}, \
- { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543, 8}, { 1087,10}, { 287,11}, \
- { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
- { 671, 8}, { 1343,10}, { 351,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \
- { 415,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
- { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \
- { 351,12}, { 191,11}, { 383,10}, { 799, 9}, \
- { 1599,11}, { 415,10}, { 863, 9}, { 1727,13}, \
- { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \
- { 607,10}, { 1215,12}, { 319,11}, { 671,10}, \
- { 1343,11}, { 735,10}, { 1471,12}, { 383,11}, \
- { 799,10}, { 1599,11}, { 863,10}, { 1727,12}, \
- { 447,11}, { 959,10}, { 1919,11}, { 991,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \
- { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1727,12}, { 959,11}, \
- { 1919,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2239,12}, { 1215,11}, { 2431,13}, { 639,12}, \
- { 1471,11}, { 2943,13}, { 767,12}, { 1727,11}, \
- { 3455,13}, { 895,12}, { 1983,14}, { 511,13}, \
- { 1023,12}, { 2239,13}, { 1151,12}, { 4096,13}, \
- { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 147
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 8
-#define MULLO_DC_THRESHOLD 31
-#define MULLO_MUL_N_THRESHOLD 14281
-
-#define DC_DIV_QR_THRESHOLD 91
-#define DC_DIVAPPR_Q_THRESHOLD 280
-#define DC_BDIV_QR_THRESHOLD 87
-#define DC_BDIV_Q_THRESHOLD 222
-
-#define INV_MULMOD_BNM1_THRESHOLD 62
-#define INV_NEWTON_THRESHOLD 268
-#define INV_APPR_THRESHOLD 270
-
-#define BINV_NEWTON_THRESHOLD 260
-#define REDC_1_TO_REDC_N_THRESHOLD 79
-
-#define MU_DIV_QR_THRESHOLD 1718
-#define MU_DIVAPPR_Q_THRESHOLD 1528
-#define MUPI_DIV_QR_THRESHOLD 97
-#define MU_BDIV_QR_THRESHOLD 1470
-#define MU_BDIV_Q_THRESHOLD 1470
-
-#define POWM_SEC_TABLE 1,22,114,416,1464
-
-#define MATRIX22_STRASSEN_THRESHOLD 16
-#define HGCD_THRESHOLD 149
-#define HGCD_APPR_THRESHOLD 204
-#define HGCD_REDUCE_THRESHOLD 4455
-#define GCD_DC_THRESHOLD 599
-#define GCDEXT_DC_THRESHOLD 403
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 28
-#define SET_STR_DC_THRESHOLD 270
-#define SET_STR_PRECOMPUTE_THRESHOLD 1367
-
-#define FAC_DSC_THRESHOLD 348
-#define FAC_ODD_THRESHOLD 24
diff --git a/gmp/mpn/x86/lshift.asm b/gmp/mpn/x86/lshift.asm
index 6ee6153cc2..5598599f8b 100644
--- a/gmp/mpn/x86/lshift.asm
+++ b/gmp/mpn/x86/lshift.asm
@@ -1,43 +1,33 @@
dnl x86 mpn_lshift -- mpn left shift.
-dnl Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C P54 7.5
-C P55 7.0
-C P6 2.5
-C K6 4.5
-C K7 5.0
-C P4 14.5
+C P54: 7.5
+C P55: 7.0
+C P6: 2.5
+C K6: 4.5
+C K7: 5.0
+C P4: 14.5
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/mmx/sec_tabselect.asm b/gmp/mpn/x86/mmx/sec_tabselect.asm
deleted file mode 100644
index aae158abf7..0000000000
--- a/gmp/mpn/x86/mmx/sec_tabselect.asm
+++ /dev/null
@@ -1,163 +0,0 @@
-dnl X86 MMX mpn_sec_tabselect.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb cycles/limb
-C ali,evn n unal,evn n
-C P5
-C P6 model 0-8,10-12
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 1.33 1.87
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood) 2.1 2.63
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona) 1.7 2.57
-C Intel Atom 1.85 2.7
-C AMD K6
-C AMD K7 1.33 1.33
-C AMD K8
-C AMD K10
-
-define(`rp', `%edi')
-define(`tp', `%esi')
-define(`n', `%edx')
-define(`nents', `%ecx')
-define(`which', `')
-
-define(`i', `%ebp')
-define(`j', `%ebx')
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
- push %ebx
- push %esi
- push %edi
- push %ebp
-
- mov 20(%esp), rp
- mov 24(%esp), tp
- mov 28(%esp), n
- mov 32(%esp), nents
-
- movd 36(%esp), %mm6
- punpckldq %mm6, %mm6 C 2 copies of `which'
-
- mov $1, %ebx
- movd %ebx, %mm7
- punpckldq %mm7, %mm7 C 2 copies of 1
-
- mov n, j
- add $-4, j
- js L(outer_end)
-
-L(outer_top):
- mov nents, i
- mov tp, %eax
- pxor %mm1, %mm1
- pxor %mm4, %mm4
- pxor %mm5, %mm5
- ALIGN(16)
-L(top): movq %mm6, %mm0
- pcmpeqd %mm1, %mm0
- paddd %mm7, %mm1
- movq (tp), %mm2
- movq 8(tp), %mm3
- pand %mm0, %mm2
- pand %mm0, %mm3
- por %mm2, %mm4
- por %mm3, %mm5
- lea (tp,n,4), tp
- add $-1, i
- jne L(top)
-
- movq %mm4, (rp)
- movq %mm5, 8(rp)
-
- lea 16(%eax), tp
- lea 16(rp), rp
- add $-4, j
- jns L(outer_top)
-L(outer_end):
-
- test $2, %dl
- jz L(b0x)
-
-L(b1x): mov nents, i
- mov tp, %eax
- pxor %mm1, %mm1
- pxor %mm4, %mm4
- ALIGN(16)
-L(tp2): movq %mm6, %mm0
- pcmpeqd %mm1, %mm0
- paddd %mm7, %mm1
- movq (tp), %mm2
- pand %mm0, %mm2
- por %mm2, %mm4
- lea (tp,n,4), tp
- add $-1, i
- jne L(tp2)
-
- movq %mm4, (rp)
-
- lea 8(%eax), tp
- lea 8(rp), rp
-
-L(b0x): test $1, %dl
- jz L(b00)
-
-L(b01): mov nents, i
- pxor %mm1, %mm1
- pxor %mm4, %mm4
- ALIGN(16)
-L(tp1): movq %mm6, %mm0
- pcmpeqd %mm1, %mm0
- paddd %mm7, %mm1
- movd (tp), %mm2
- pand %mm0, %mm2
- por %mm2, %mm4
- lea (tp,n,4), tp
- add $-1, i
- jne L(tp1)
-
- movd %mm4, (rp)
-
-L(b00): pop %ebp
- pop %edi
- pop %esi
- pop %ebx
- emms
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/mod_1.asm b/gmp/mpn/x86/mod_1.asm
new file mode 100644
index 0000000000..0fa3ce0def
--- /dev/null
+++ b/gmp/mpn/x86/mod_1.asm
@@ -0,0 +1,163 @@
+dnl x86 mpn_mod_1 -- mpn by limb remainder.
+
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 486 42 approx, maybe
+C P5 44
+C P6 39
+C K6 20
+C K7 41
+C P4 58
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C
+C Essentially this code is the same as the division based part of
+C mpn/generic/mod_1.c, but has the advantage that we get the desired divl
+C instruction even when gcc is not being used (where longlong.h only has the
+C rather slow generic C udiv_qrnnd().
+C
+C A test is done to see if the high limb is less than the divisor, and if so
+C one less div is done. A div is between 20 and 40 cycles on the various
+C x86s, so assuming high<divisor about half the time, then this test saves
+C half that amount. The branch misprediction penalty on each chip is less
+C than half a div.
+C
+C
+C Notes for K6:
+C
+C Back-to-back div instructions take 20 cycles, the same as the loop here,
+C so it seems there's nothing to gain by rearranging. Pairing the mov and
+C loop instructions was found to gain nothing. Normally we use a loop
+C instruction rather than decl/jnz, but it gains nothing here.
+C
+C A multiply-by-inverse is used in mpn/x86/k6/pre_mod_1.asm, but it saves
+C only 2 c/l so currently we haven't bothered with the same for mpn_mod_1.
+C If an inverse takes about 40 cycles for normalized or perhaps 60 for
+C unnormalized (due to bsfl being slow on k6) then the threshold would be at
+C least 20 or 30 limbs.
+C
+
+defframe(PARAM_CARRY, 16)
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+ TEXT
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ pushl %esi FRAME_pushl()
+
+ orl %ecx, %ecx
+ jz L(done_zero)
+
+ movl PARAM_DIVISOR, %esi
+ movl -4(%ebx,%ecx,4), %eax C src high limb
+
+ cmpl %esi, %eax
+
+ sbbl %edx, %edx C -1 if high<divisor
+
+ addl %edx, %ecx C skip one division if high<divisor
+ jz L(done_eax)
+
+ andl %eax, %edx C carry if high<divisor
+
+
+L(top):
+ C eax scratch (quotient)
+ C ebx src
+ C ecx counter
+ C edx carry (remainder)
+ C esi divisor
+ C edi
+ C ebp
+
+ movl -4(%ebx,%ecx,4), %eax
+
+ divl %esi
+
+ decl %ecx
+ jnz L(top)
+
+
+ movl %edx, %eax
+L(done_eax):
+ popl %esi
+
+ popl %ebx
+
+ ret
+
+EPILOGUE()
+
+
+ C This code located after mpn_mod_1, so the jump to L(top) here is
+ C back and hence will be predicted as taken. (size==0 is considered
+ C unlikely.)
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_SRC, %ebx
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_DIVISOR, %esi
+ orl %ecx, %ecx
+
+ movl PARAM_CARRY, %edx
+ jnz L(top)
+
+ popl %esi
+ movl %edx, %eax
+
+ popl %ebx
+
+ ret
+
+
+ C This code is for mpn_mod_1, but is positioned here to save some
+ C space in the alignment padding.
+ C
+L(done_zero):
+ popl %esi
+ xorl %eax, %eax
+
+ popl %ebx
+
+ ret
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/mod_34lsub1.asm b/gmp/mpn/x86/mod_34lsub1.asm
index e09e702c6f..68b4a73dbc 100644
--- a/gmp/mpn/x86/mod_34lsub1.asm
+++ b/gmp/mpn/x86/mod_34lsub1.asm
@@ -1,42 +1,31 @@
dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1.
-dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C P5 3.0
-C P6 3.66
-C K6 3.0
-C K7 1.3
-C P4 9
+C P5: 3.0
+C P6: 3.66
+C K6: 3.0
+C K7: 1.3
+C P4: 9
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
diff --git a/gmp/mpn/x86/mul_1.asm b/gmp/mpn/x86/mul_1.asm
index 421de62225..1d715ece7e 100644
--- a/gmp/mpn/x86/mul_1.asm
+++ b/gmp/mpn/x86/mul_1.asm
@@ -1,50 +1,40 @@
dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
dnl with a limb and store the result in a second limb vector.
-dnl Copyright 1992, 1994, 1997-2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free
+dnl Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5 12.5
-C P6 model 0-8,10-12 5.5
+C cycles/limb
+C P5: 12.5
+C P6 model 0-8,10-12) 5.5
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 5.25
-C P4 model 0 (Willamette) 19.0
-C P4 model 1 (?) 19.0
-C P4 model 2 (Northwood) 19.0
+C P6 model 13 (Dothan) 5.25
+C P4 model 0 (Willamette) 19.0
+C P4 model 1 (?) 19.0
+C P4 model 2 (Northwood) 19.0
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C AMD K6 10.5
-C AMD K7 4.5
-C AMD K8
+C K6: 10.5
+C K7: 4.5
+C K8:
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/mul_basecase.asm b/gmp/mpn/x86/mul_basecase.asm
index 8339732a80..7918ea07f3 100644
--- a/gmp/mpn/x86/mul_basecase.asm
+++ b/gmp/mpn/x86/mul_basecase.asm
@@ -1,43 +1,33 @@
dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result
dnl in a third limb vector.
-dnl Copyright 1996-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/crossproduct
-C P5 15
-C P6 7.5
-C K6 12.5
-C K7 5.5
-C P4 24
+C P5: 15
+C P6: 7.5
+C K6: 12.5
+C K7: 5.5
+C P4: 24
C void mpn_mul_basecase (mp_ptr wp,
diff --git a/gmp/mpn/x86/nano/gmp-mparam.h b/gmp/mpn/x86/nano/gmp-mparam.h
deleted file mode 100644
index cd8ac4e1d6..0000000000
--- a/gmp/mpn/x86/nano/gmp-mparam.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/* x86/nano gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 2000-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
-
-#define MOD_1_1P_METHOD 1
-#define MOD_1_NORM_THRESHOLD 3
-#define MOD_1_UNNORM_THRESHOLD 3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 10
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 53
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
-#define USE_PREINV_DIVREM_1 1
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 32
-
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 132
-#define MUL_TOOM44_THRESHOLD 195
-#define MUL_TOOM6H_THRESHOLD 270
-#define MUL_TOOM8H_THRESHOLD 478
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 130
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 135
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 28
-#define SQR_TOOM3_THRESHOLD 194
-#define SQR_TOOM4_THRESHOLD 502
-#define SQR_TOOM6_THRESHOLD 746
-#define SQR_TOOM8_THRESHOLD 1005
-
-#define MULMID_TOOM42_THRESHOLD 40
-
-#define MULMOD_BNM1_THRESHOLD 14
-#define SQRMOD_BNM1_THRESHOLD 19
-
-#define POWM_SEC_TABLE 4,23,258,828,2246
-
-#define MUL_FFT_MODF_THRESHOLD 308 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 308, 5}, { 13, 6}, { 7, 5}, { 17, 6}, \
- { 9, 5}, { 19, 6}, { 11, 5}, { 23, 6}, \
- { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
- { 19, 7}, { 11, 6}, { 24, 7}, { 15, 6}, \
- { 31, 7}, { 19, 8}, { 11, 7}, { 25, 8}, \
- { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 47, 9}, { 15, 8}, { 31, 7}, \
- { 63, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
- { 15, 9}, { 31, 8}, { 63, 9}, { 47,10}, \
- { 31, 9}, { 71,10}, { 47, 9}, { 95,11}, \
- { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
- { 63,10}, { 127, 9}, { 255, 8}, { 543, 9}, \
- { 287, 8}, { 575, 7}, { 1215,10}, { 159,11}, \
- { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 543, 8}, { 1087,10}, { 287, 9}, \
- { 607, 8}, { 1215,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 351, 9}, { 703, 8}, { 1407, 9}, \
- { 735, 8}, { 1471,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \
- { 447, 9}, { 895,10}, { 479, 9}, { 959, 8}, \
- { 1919,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
- { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 89
-#define MUL_FFT_THRESHOLD 1856
-
-#define SQR_FFT_MODF_THRESHOLD 396 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 396, 5}, { 13, 6}, { 7, 5}, { 21, 6}, \
- { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 25, 7}, { 15, 6}, { 31, 7}, { 19, 6}, \
- { 39, 7}, { 21, 8}, { 11, 7}, { 23, 6}, \
- { 47, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
- { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
- { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
- { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
- { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 127,10}, \
- { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
- { 127, 9}, { 255, 8}, { 543,10}, { 143, 9}, \
- { 287, 8}, { 607, 7}, { 1215, 6}, { 2431,10}, \
- { 159, 8}, { 639,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \
- { 1087,10}, { 287, 9}, { 607, 8}, { 1215,11}, \
- { 159,10}, { 319, 9}, { 671,10}, { 351, 9}, \
- { 703, 8}, { 1407, 9}, { 735, 8}, { 1471, 7}, \
- { 2943,11}, { 191,10}, { 383, 9}, { 799,10}, \
- { 415, 9}, { 895,10}, { 479,12}, { 4096,13}, \
- { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 87
-#define SQR_FFT_THRESHOLD 2368
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 51
-#define MULLO_MUL_N_THRESHOLD 3369
-
-#define DC_DIV_QR_THRESHOLD 56
-#define DC_DIVAPPR_Q_THRESHOLD 183
-#define DC_BDIV_QR_THRESHOLD 55
-#define DC_BDIV_Q_THRESHOLD 118
-
-#define INV_MULMOD_BNM1_THRESHOLD 30
-#define INV_NEWTON_THRESHOLD 266
-#define INV_APPR_THRESHOLD 218
-
-#define BINV_NEWTON_THRESHOLD 268
-#define REDC_1_TO_REDC_N_THRESHOLD 56
-
-#define MU_DIV_QR_THRESHOLD 1308
-#define MU_DIVAPPR_Q_THRESHOLD 1528
-#define MUPI_DIV_QR_THRESHOLD 124
-#define MU_BDIV_QR_THRESHOLD 855
-#define MU_BDIV_Q_THRESHOLD 1334
-
-#define MATRIX22_STRASSEN_THRESHOLD 14
-#define HGCD_THRESHOLD 104
-#define HGCD_APPR_THRESHOLD 139
-#define HGCD_REDUCE_THRESHOLD 2121
-#define GCD_DC_THRESHOLD 456
-#define GCDEXT_DC_THRESHOLD 321
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 11
-#define GET_STR_PRECOMPUTE_THRESHOLD 25
-#define SET_STR_DC_THRESHOLD 542
-#define SET_STR_PRECOMPUTE_THRESHOLD 840
diff --git a/gmp/mpn/x86/p6/README b/gmp/mpn/x86/p6/README
index f19d47b94f..1ded4e7177 100644
--- a/gmp/mpn/x86/p6/README
+++ b/gmp/mpn/x86/p6/README
@@ -3,28 +3,17 @@ Copyright 2000, 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
@@ -63,7 +52,7 @@ Some of these might be able to be improved.
mpn_mul_basecase 8.2 cycles/crossproduct (approx)
mpn_sqr_basecase 4.0 cycles/crossproduct (approx)
- or 7.75 cycles/triangleproduct (approx)
+ or 7.75 cycles/triangleproduct (approx)
Pentium II and III have MMX and get the following improvements.
diff --git a/gmp/mpn/x86/p6/aors_n.asm b/gmp/mpn/x86/p6/aors_n.asm
index df51c2e6f7..f4652ec2cb 100644
--- a/gmp/mpn/x86/p6/aors_n.asm
+++ b/gmp/mpn/x86/p6/aors_n.asm
@@ -1,43 +1,32 @@
dnl Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract.
dnl Copyright 2006 Free Software Foundation, Inc.
-
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C TODO:
-C * Avoid indexed addressing, it makes us stall on the two-ported register
+C * Avoid indexed adressing, it makes us stall on the two-ported register
C file.
-C cycles/limb
-C P6 model 0-8,10-12 3.17
-C P6 model 9 (Banias) 2.15
-C P6 model 13 (Dothan) 2.25
+C cycles/limb
+C P6 model 0-8,10-12) 3.17
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 2.25
define(`rp', `%edi')
diff --git a/gmp/mpn/x86/p6/aorsmul_1.asm b/gmp/mpn/x86/p6/aorsmul_1.asm
index bc8c49c62e..746bf05f12 100644
--- a/gmp/mpn/x86/p6/aorsmul_1.asm
+++ b/gmp/mpn/x86/p6/aorsmul_1.asm
@@ -1,49 +1,38 @@
dnl Intel P6 mpn_addmul_1/mpn_submul_1 -- add or subtract mpn multiple.
-dnl Copyright 1999-2002, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5
-C P6 model 0-8,10-12 6.44
-C P6 model 9 (Banias) 6.15
-C P6 model 13 (Dothan) 6.11
+C cycles/limb
+C P5:
+C P6 model 0-8,10-12) 6.44
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan) 6.11
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C AMD K6
-C AMD K7
-C AMD K8
+C K6:
+C K7:
+C K8:
dnl P6 UNROLL_COUNT cycles/limb
@@ -181,7 +170,7 @@ C registers when doing the mul for the initial two carry limbs.
C
C The add/adc for the initial carry in %ebx is necessary only for the
C mpn_add/submul_1c entry points. Duplicating the startup code to
-C eliminate this for the plain mpn_add/submul_1 doesn't seem like a good
+C eliminiate this for the plain mpn_add/submul_1 doesn't seem like a good
C idea.
dnl overlapping with parameters already fetched
diff --git a/gmp/mpn/x86/p6/bdiv_q_1.asm b/gmp/mpn/x86/p6/bdiv_q_1.asm
deleted file mode 100644
index 2cc179c238..0000000000
--- a/gmp/mpn/x86/p6/bdiv_q_1.asm
+++ /dev/null
@@ -1,286 +0,0 @@
-dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder.
-
-dnl Rearranged from mpn/x86/p6/dive_1.asm by Marco Bodrato.
-
-dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C odd even divisor
-C P6: 10.0 12.0 cycles/limb
-
-C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-
-C The odd case is basically the same as mpn_modexact_1_odd, just with an
-C extra store, and it runs at the same 10 cycles which is the dependent
-C chain.
-C
-C The shifts for the even case aren't on the dependent chain so in principle
-C it could run the same too, but nothing running at 10 has been found.
-C Perhaps there's too many uops (an extra 4 over the odd case).
-
-defframe(PARAM_SHIFT, 24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-defframe(SAVE_EBX, -4)
-defframe(SAVE_ESI, -8)
-defframe(SAVE_EDI, -12)
-defframe(SAVE_EBP, -16)
-deflit(STACK_SPACE, 16)
-
-dnl re-use parameter space
-define(VAR_INVERSE,`PARAM_SRC')
-
- TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C mp_limb_t inverse, int shift)
-
- ALIGN(16)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
- subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
-
- movl %esi, SAVE_ESI
- movl PARAM_SRC, %esi
-
- movl %ebx, SAVE_EBX
- movl PARAM_SIZE, %ebx
-
- movl %ebp, SAVE_EBP
- movl PARAM_INVERSE, %ebp
-
- movl PARAM_SHIFT, %ecx C trailing twos
-
-L(common):
- movl %edi, SAVE_EDI
- movl PARAM_DST, %edi
-
- leal (%esi,%ebx,4), %esi C src end
-
- leal (%edi,%ebx,4), %edi C dst end
- negl %ebx C -size
-
- movl (%esi,%ebx,4), %eax C src[0]
-
- orl %ecx, %ecx
- jz L(odd_entry)
-
- movl %edi, PARAM_DST
- movl %ebp, VAR_INVERSE
-
-L(even):
- C eax src[0]
- C ebx counter, limbs, negative
- C ecx shift
- C edx
- C esi
- C edi
- C ebp
-
- xorl %ebp, %ebp C initial carry bit
- xorl %edx, %edx C initial carry limb (for size==1)
-
- incl %ebx
- jz L(even_one)
-
- movl (%esi,%ebx,4), %edi C src[1]
-
- shrdl( %cl, %edi, %eax)
-
- jmp L(even_entry)
-
-
-L(even_top):
- C eax scratch
- C ebx counter, limbs, negative
- C ecx shift
- C edx scratch
- C esi &src[size]
- C edi &dst[size] and scratch
- C ebp carry bit
-
- movl (%esi,%ebx,4), %edi
-
- mull PARAM_DIVISOR
-
- movl -4(%esi,%ebx,4), %eax
- shrdl( %cl, %edi, %eax)
-
- subl %ebp, %eax
-
- sbbl %ebp, %ebp
- subl %edx, %eax
-
- sbbl $0, %ebp
-
-L(even_entry):
- imull VAR_INVERSE, %eax
-
- movl PARAM_DST, %edi
- negl %ebp
-
- movl %eax, -4(%edi,%ebx,4)
- incl %ebx
- jnz L(even_top)
-
- mull PARAM_DIVISOR
-
- movl -4(%esi), %eax
-
-L(even_one):
- shrl %cl, %eax
- movl SAVE_ESI, %esi
-
- subl %ebp, %eax
- movl SAVE_EBP, %ebp
-
- subl %edx, %eax
- movl SAVE_EBX, %ebx
-
- imull VAR_INVERSE, %eax
-
- movl %eax, -4(%edi)
- movl SAVE_EDI, %edi
- addl $STACK_SPACE, %esp
-
- ret
-
-C The dependent chain here is
-C
-C subl %edx, %eax 1
-C imull %ebp, %eax 4
-C mull PARAM_DIVISOR 5
-C ----
-C total 10
-C
-C and this is the measured speed. No special scheduling is necessary, out
-C of order execution hides the load latency.
-
-L(odd_top):
- C eax scratch (src limb)
- C ebx counter, limbs, negative
- C ecx carry bit
- C edx carry limb, high of last product
- C esi &src[size]
- C edi &dst[size]
- C ebp inverse
-
- mull PARAM_DIVISOR
-
- movl (%esi,%ebx,4), %eax
- subl %ecx, %eax
-
- sbbl %ecx, %ecx
- subl %edx, %eax
-
- sbbl $0, %ecx
-
-L(odd_entry):
- imull %ebp, %eax
-
- movl %eax, (%edi,%ebx,4)
- negl %ecx
-
- incl %ebx
- jnz L(odd_top)
-
-
- movl SAVE_ESI, %esi
-
- movl SAVE_EDI, %edi
-
- movl SAVE_EBP, %ebp
-
- movl SAVE_EBX, %ebx
- addl $STACK_SPACE, %esp
-
- ret
-
-EPILOGUE()
-
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C
-
- ALIGN(16)
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_DIVISOR, %eax
- subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
-
- movl %esi, SAVE_ESI
- movl PARAM_SRC, %esi
-
- movl %ebx, SAVE_EBX
- movl PARAM_SIZE, %ebx
-
- bsfl %eax, %ecx C trailing twos
-
- movl %ebp, SAVE_EBP
-
- shrl %cl, %eax C d without twos
-
- movl %eax, %edx
- shrl %eax C d/2 without twos
-
- movl %edx, PARAM_DIVISOR
- andl $127, %eax
-
-ifdef(`PIC',`
- LEA( binvert_limb_table, %ebp)
- movzbl (%eax,%ebp), %ebp C inv 8 bits
-',`
- movzbl binvert_limb_table(%eax), %ebp C inv 8 bits
-')
-
- leal (%ebp,%ebp), %eax C 2*inv
-
- imull %ebp, %ebp C inv*inv
- imull %edx, %ebp C inv*inv*d
-
- subl %ebp, %eax C inv = 2*inv - inv*inv*d
- leal (%eax,%eax), %ebp C 2*inv
-
- imull %eax, %eax C inv*inv
- imull %edx, %eax C inv*inv*d
-
- subl %eax, %ebp C inv = 2*inv - inv*inv*d
-
- jmp L(common)
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/p6/copyd.asm b/gmp/mpn/x86/p6/copyd.asm
index 1be7636835..2946f51e7a 100644
--- a/gmp/mpn/x86/p6/copyd.asm
+++ b/gmp/mpn/x86/p6/copyd.asm
@@ -1,32 +1,21 @@
dnl Intel P6 mpn_copyd -- copy limb vector backwards.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/dive_1.asm b/gmp/mpn/x86/p6/dive_1.asm
index aa7ba880c9..e8efc28eac 100644
--- a/gmp/mpn/x86/p6/dive_1.asm
+++ b/gmp/mpn/x86/p6/dive_1.asm
@@ -1,32 +1,21 @@
dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder.
dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -117,7 +106,7 @@ ifdef(`PIC',`
subl %eax, %ebp C inv = 2*inv - inv*inv*d
- ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB
movl PARAM_DIVISOR, %eax
imull %ebp, %eax
cmpl $1, %eax')
@@ -138,7 +127,7 @@ C subl %edx, %eax 1
C imull %ebp, %eax 4
C mull PARAM_DIVISOR 5
C ----
-C total 10
+C total 10
C
C and this is the measured speed. No special scheduling is necessary, out
C of order execution hides the load latency.
diff --git a/gmp/mpn/x86/p6/gcd_1.asm b/gmp/mpn/x86/p6/gcd_1.asm
deleted file mode 100644
index f6518f6e19..0000000000
--- a/gmp/mpn/x86/p6/gcd_1.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl x86 mpn_gcd_1 optimised for processors with fast BSF.
-
-dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked by Torbjorn Granlund.
-
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/bit (approx)
-C AMD K7 7.80
-C AMD K8,K9 7.79
-C AMD K10 4.08
-C AMD bd1 ?
-C AMD bobcat 7.82
-C Intel P4-2 14.9
-C Intel P4-3/4 14.0
-C Intel P6/13 5.09
-C Intel core2 4.22
-C Intel NHM 5.00
-C Intel SBR 5.00
-C Intel atom 17.1
-C VIA nano ?
-C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
-
-C Threshold of when to call bmod when U is one limb. Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 6)
-
-
-define(`up', `%edi')
-define(`n', `%esi')
-define(`v0', `%edx')
-
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_gcd_1)
- push %edi
- push %esi
-
- mov 12(%esp), up
- mov 16(%esp), n
- mov 20(%esp), v0
-
- mov (up), %eax C U low limb
- or v0, %eax
- bsf %eax, %eax C min(ctz(u0),ctz(v0))
-
- bsf v0, %ecx
- shr %cl, v0
-
- push %eax C preserve common twos over call
- push v0 C preserve v0 argument over call
-
- cmp $1, n
- jnz L(reduce_nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
- mov (up), %ecx
- mov %ecx, %eax
- shr $BMOD_THRES_LOG2, %ecx
- cmp %ecx, v0
- ja L(reduced)
- jmp L(bmod)
-
-L(reduce_nby1):
- cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
- jl L(bmod)
-ifdef(`PIC_WITH_EBX',`
- push %ebx
- call L(movl_eip_to_ebx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
-')
- push v0 C param 3
- push n C param 2
- push up C param 1
- CALL( mpn_mod_1)
- jmp L(called)
-
-L(bmod):
-ifdef(`PIC_WITH_EBX',`dnl
- push %ebx
- call L(movl_eip_to_ebx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
-')
- push v0 C param 3
- push n C param 2
- push up C param 1
- CALL( mpn_modexact_1_odd)
-
-L(called):
- add $12, %esp C deallocate params
-ifdef(`PIC_WITH_EBX',`dnl
- pop %ebx
-')
-L(reduced):
- pop %edx
-
- bsf %eax, %ecx
-C test %eax, %eax C FIXME: does this lower latency?
- jnz L(mid)
- jmp L(end)
-
- ALIGN(16) C K10 BD C2 NHM SBR
-L(top): cmovc( %esi, %eax) C if x-y < 0 0,3 0,3 0,6 0,5 0,5
- cmovc( %edi, %edx) C use x,y-x 0,3 0,3 2,8 1,7 1,7
-L(mid): shr %cl, %eax C 1,7 1,6 2,8 2,8 2,8
- mov %edx, %esi C 1 1 4 3 3
- sub %eax, %esi C 2 2 5 4 4
- bsf %esi, %ecx C 3 3 6 5 5
- mov %eax, %edi C 2 2 3 3 4
- sub %edx, %eax C 2 2 4 3 4
- jnz L(top) C
-
-L(end): pop %ecx
- mov %edx, %eax
- shl %cl, %eax
-
- pop %esi
- pop %edi
- ret
-
-ifdef(`PIC_WITH_EBX',`dnl
-L(movl_eip_to_ebx):
- mov (%esp), %ebx
- ret
-')
-EPILOGUE()
diff --git a/gmp/mpn/x86/p6/gmp-mparam.h b/gmp/mpn/x86/p6/gmp-mparam.h
index 96c96fd558..a85c500275 100644
--- a/gmp/mpn/x86/p6/gmp-mparam.h
+++ b/gmp/mpn/x86/p6/gmp-mparam.h
@@ -1,194 +1,70 @@
/* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2003, 2008-2010, 2012 Free Software
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-or both in parallel, as here.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
- value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in
- mpn/x86/p6/sqr_basecase.asm. */
-
-
-/* 1867 MHz P6 model 13 */
-
-#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 21
-
-#define MUL_TOOM22_THRESHOLD 20
-#define MUL_TOOM33_THRESHOLD 74
-#define MUL_TOOM44_THRESHOLD 181
-#define MUL_TOOM6H_THRESHOLD 252
-#define MUL_TOOM8H_THRESHOLD 363
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 30
-#define SQR_TOOM3_THRESHOLD 101
-#define SQR_TOOM4_THRESHOLD 154
-#define SQR_TOOM6_THRESHOLD 222
-#define SQR_TOOM8_THRESHOLD 527
-
-#define MULMID_TOOM42_THRESHOLD 58
-
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 17
-
-#define POWM_SEC_TABLE 4,23,258,768,2388
-
-#define MUL_FFT_MODF_THRESHOLD 565 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 565, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
- { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 5}, \
- { 383, 4}, { 991, 5}, { 511, 6}, { 267, 7}, \
- { 157, 8}, { 91, 9}, { 47, 8}, { 111, 9}, \
- { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
- { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \
- { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
- { 159,10}, { 335, 9}, { 671,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \
- { 415,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
- { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \
- { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1471,13}, { 383,12}, { 831,11}, { 1727,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
- { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \
- { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
- { 1407,12}, { 2815,14}, { 767,13}, { 1663,12}, \
- { 3455,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 132
-#define MUL_FFT_THRESHOLD 6784
-
-#define SQR_FFT_MODF_THRESHOLD 472 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 472, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 63, 4}, { 1023, 8}, { 67, 9}, \
- { 39, 5}, { 639, 4}, { 1471, 6}, { 383, 7}, \
- { 209, 8}, { 119, 9}, { 63, 7}, { 255, 8}, \
- { 139, 9}, { 71, 8}, { 143, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 159, 8}, { 319, 9}, \
- { 167,10}, { 95,11}, { 63,10}, { 143, 9}, \
- { 287,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \
- { 1087,10}, { 287, 9}, { 575,11}, { 159,10}, \
- { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
- { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \
- { 831,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
- { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \
- { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
- { 255,11}, { 543,10}, { 1087,11}, { 607,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \
- { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \
- { 447,11}, { 959,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \
- { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \
- { 767,11}, { 1599,12}, { 831,11}, { 1727,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
- { 639,12}, { 1471,13}, { 767,12}, { 1727,13}, \
- { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \
- { 2111,13}, { 1151,12}, { 2431,13}, { 1407,14}, \
- { 767,13}, { 1663,12}, { 3455,13}, { 8192,14}, \
- { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 146
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 33
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 20
-#define DC_DIVAPPR_Q_THRESHOLD 56
-#define DC_BDIV_QR_THRESHOLD 60
-#define DC_BDIV_Q_THRESHOLD 134
-
-#define INV_MULMOD_BNM1_THRESHOLD 38
-#define INV_NEWTON_THRESHOLD 66
-#define INV_APPR_THRESHOLD 63
-
-#define BINV_NEWTON_THRESHOLD 250
-#define REDC_1_TO_REDC_N_THRESHOLD 63
-
-#define MU_DIV_QR_THRESHOLD 1164
-#define MU_DIVAPPR_Q_THRESHOLD 979
-#define MUPI_DIV_QR_THRESHOLD 38
-#define MU_BDIV_QR_THRESHOLD 1442
-#define MU_BDIV_Q_THRESHOLD 1470
-
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 64
-#define HGCD_APPR_THRESHOLD 105
-#define HGCD_REDUCE_THRESHOLD 3524
-#define GCD_DC_THRESHOLD 386
-#define GCDEXT_DC_THRESHOLD 309
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 26
-#define SET_STR_DC_THRESHOLD 587
-#define SET_STR_PRECOMPUTE_THRESHOLD 1104
+
+/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be
+ smaller than the value in mpn/x86/p6/mmx/gmp-mparam.h. The former is
+ used as a hard limit in mpn/x86/p6/sqr_basecase.asm, and that file will
+ be run by the p6/mmx cpus (pentium2, pentium3). */
+
+
+/* 200MHz Pentium Pro */
+
+/* Generated by tuneup.c, 2003-02-12, gcc 2.95 */
+
+#define MUL_KARATSUBA_THRESHOLD 23
+#define MUL_TOOM3_THRESHOLD 140
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_KARATSUBA_THRESHOLD 52
+#define SQR_TOOM3_THRESHOLD 189
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 116
+#define POWM_THRESHOLD 131
+
+#define GCD_ACCEL_THRESHOLD 3
+#define JACOBI_BASE_METHOD 1
+
+#define USE_PREINV_DIVREM_1 0
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 18
+#define GET_STR_PRECOMPUTE_THRESHOLD 23
+#define SET_STR_THRESHOLD 6093
+
+#define MUL_FFT_TABLE { 464, 928, 1920, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD 360
+#define MUL_FFT_THRESHOLD 2816
+
+#define SQR_FFT_TABLE { 528, 1184, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD 440
+#define SQR_FFT_THRESHOLD 2816
+
+#define MUL_FFT_TABLE2 {{1,4}, {305,5}, {321,4}, {337,5}, {353,4}, {369,5}, {801,6}, {833,5}, {865,6}, {897,5}, {929,6}, {961,5}, {993,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3457,8}, {3841,7}, {3969,8}, {4097,7}, {4225,8}, {4353,7}, {4481,8}, {5889,7}, {6017,8}, {6401,7}, {6529,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {9985,9}, {10241,8}, {11009,9}, {11777,8}, {12289,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {19969,10}, {23553,9}, {24065,8}, {24321,9}, {26113,10}, {27649,11}, {28673,10}, {31745,9}, {34305,10}, {34817,9}, {35329,10}, {39937,9}, {40449,10}, {48129,11}, {55297,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {131073,11}, {133121,10}, {134145,11}, {137217,10}, {138241,11}, {161793,10}, {162817,11}, {194561,12}, {258049,11}, {260097,10}, {261121,9}, {261633,10}, {266241,11}, {268289,10}, {277505,11}, {292865,10}, {293889,9}, {294401,10}, {310273,9}, {310785,11}, {325633,10}, {326657,12}, {389121,13}, {516097,12}, {520193,11}, {522241,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {622593,12}, {651265,11}, {653313,10}, {654337,11}, {655361,10}, {657409,11}, {663553,10}, {664577,11}, {686081,10}, {687105,11}, {718849,10}, {719873,11}, {720897,10}, {722945,11}, {737281,10}, {740353,11}, {745473,10}, {749569,11}, {751617,10}, {752641,9}, {753153,11}, {753665,12}, {770049,11}, {774145,12}, {782337,11}, {786433,10}, {787457,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {854017,10}, {855041,11}, {862209,10}, {863233,11}, {866305,10}, {867329,11}, {876545,10}, {877569,11}, {882689,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1515521,13}, {1523713,12}, {1527809,13}, {1540097,12}, {1544193,13}, {1548289,12}, {1568769,11}, {1636353,10}, {1637377,12}, {1699841,11}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {289,4}, {305,5}, {673,6}, {705,5}, {737,6}, {769,5}, {801,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3713,8}, {3841,7}, {4225,8}, {4865,7}, {4993,9}, {5121,8}, {6657,9}, {7681,8}, {8961,9}, {11777,8}, {12033,10}, {12289,8}, {12545,9}, {13825,10}, {14337,9}, {14849,10}, {15361,9}, {19969,10}, {23553,9}, {24577,11}, {30721,10}, {31745,9}, {32257,10}, {37889,9}, {38401,10}, {39937,9}, {40449,10}, {48129,11}, {63489,10}, {80897,11}, {96257,12}, {126977,11}, {129025,10}, {130049,11}, {194561,12}, {208897,11}, {210945,12}, {258049,11}, {260097,9}, {269313,10}, {277505,9}, {278017,11}, {278529,10}, {280577,11}, {282625,10}, {283649,11}, {284673,10}, {285697,11}, {286721,10}, {289793,11}, {290817,10}, {293889,9}, {294401,10}, {310273,9}, {310785,8}, {311041,10}, {311297,11}, {315393,10}, {321537,12}, {323585,11}, {325633,10}, {326657,12}, {331777,10}, {332801,12}, {389121,10}, {392193,9}, {392705,10}, {413697,9}, {414209,10}, {418817,9}, {419841,10}, {424961,9}, {425473,10}, {441345,9}, {441857,10}, {449537,9}, {450561,10}, {452609,9}, {453121,10}, {454657,9}, {455169,10}, {490497,12}, {491521,13}, {516097,12}, {520193,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {624641,12}, {626689,11}, {653313,10}, {654337,11}, {686081,10}, {687105,11}, {718849,10}, {720897,11}, {722945,10}, {724993,11}, {729089,10}, {734209,11}, {737281,10}, {744449,11}, {745473,10}, {747521,11}, {749569,10}, {752641,11}, {784385,10}, {785409,11}, {808961,10}, {809985,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {851969,10}, {852993,11}, {858113,10}, {859137,11}, {860161,10}, {861185,11}, {882689,10}, {883713,11}, {980993,13}, {1040385,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1568769,11}, {1636353,10}, {1637377,12}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/x86/p6/lshsub_n.asm b/gmp/mpn/x86/p6/lshsub_n.asm
index 7ada213644..a3086bdbc2 100644
--- a/gmp/mpn/x86/p6/lshsub_n.asm
+++ b/gmp/mpn/x86/p6/lshsub_n.asm
@@ -1,38 +1,27 @@
dnl Intel P6 mpn_lshsub_n -- mpn papillion support.
dnl Copyright 2006 Free Software Foundation, Inc.
-
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C P6/13: 3.35 cycles/limb (separate mpn_sub_n + mpn_lshift needs 4.12)
-C (1) The loop is not scheduled in any way, and scheduling attempts have not
+C (1) The loop is is not scheduled in any way, and scheduling attempts have not
C improved speed on P6/13. Presumably, the K7 will want scheduling, if it
C at all wants to use MMX.
C (2) We could save a register by not alternatingly using eax and edx in the
diff --git a/gmp/mpn/x86/p6/mmx/divrem_1.asm b/gmp/mpn/x86/p6/mmx/divrem_1.asm
index 5300616c14..8891f3a843 100644
--- a/gmp/mpn/x86/p6/mmx/divrem_1.asm
+++ b/gmp/mpn/x86/p6/mmx/divrem_1.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-II mpn_divrem_1 -- mpn by limb division.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/mmx/gmp-mparam.h b/gmp/mpn/x86/p6/mmx/gmp-mparam.h
index 35c3aadfc1..47602f562e 100644
--- a/gmp/mpn/x86/p6/mmx/gmp-mparam.h
+++ b/gmp/mpn/x86/p6/mmx/gmp-mparam.h
@@ -1,198 +1,79 @@
/* Intel P6/mmx gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2005, 2009, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2009
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-or both in parallel, as here.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
- value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in
- mpn/x86/p6/sqr_basecase.asm. */
-
-
-/* 800 MHz P6 model 8 */
-
-#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 10
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 17
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 49
-
-#define MUL_TOOM22_THRESHOLD 22
-#define MUL_TOOM33_THRESHOLD 73
-#define MUL_TOOM44_THRESHOLD 193
-#define MUL_TOOM6H_THRESHOLD 254
-#define MUL_TOOM8H_THRESHOLD 381
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 30
-#define SQR_TOOM3_THRESHOLD 81
-#define SQR_TOOM4_THRESHOLD 142
-#define SQR_TOOM6_THRESHOLD 258
-#define SQR_TOOM8_THRESHOLD 399
-
-#define MULMOD_BNM1_THRESHOLD 15
-#define SQRMOD_BNM1_THRESHOLD 18
-
-#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 476, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
- { 21, 7}, { 11, 6}, { 25, 7}, { 13, 6}, \
- { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
- { 11, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
- { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
- { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 167,10}, { 95, 9}, { 199,10}, \
- { 111,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511,10}, { 143, 9}, { 287, 8}, { 575,10}, \
- { 159,11}, { 95,10}, { 191, 9}, { 383,10}, \
- { 207,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543, 8}, { 1087,10}, \
- { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 351, 9}, { 703,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \
- { 223,10}, { 447,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
- { 1215,11}, { 319,10}, { 671,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,10}, { 831,11}, { 447,13}, { 127,12}, \
- { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \
- { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 703,10}, { 1407,11}, { 735,12}, { 383,11}, \
- { 831,12}, { 447,11}, { 959,10}, { 1919,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,10}, { 2431,12}, { 639,11}, { 1343,12}, \
- { 703,11}, { 1471,13}, { 383,12}, { 767,11}, \
- { 1535,12}, { 831,11}, { 1727,12}, { 959,11}, \
- { 1919,14}, { 255,13}, { 511,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1727,13}, { 895,12}, { 1919,11}, \
- { 3839,14}, { 511,13}, { 1023,12}, { 2111,13}, \
- { 1151,12}, { 2431,13}, { 1279,12}, { 2559,13}, \
- { 1407,12}, { 2943,14}, { 767,13}, { 1663,12}, \
- { 3327,13}, { 1919,12}, { 3839,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD 7040
-
-#define SQR_FFT_MODF_THRESHOLD 376 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 376, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
- { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
- { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
- { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
- { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
- { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
- { 255, 9}, { 135,10}, { 79, 9}, { 167,10}, \
- { 95, 9}, { 191, 8}, { 383,10}, { 111,11}, \
- { 63,10}, { 127, 9}, { 255, 8}, { 511, 9}, \
- { 271,10}, { 143, 9}, { 287, 8}, { 575, 9}, \
- { 303, 8}, { 607,10}, { 159, 9}, { 319,11}, \
- { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \
- { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \
- { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \
- { 479,12}, { 127,11}, { 255,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 319,10}, { 671,11}, { 351,10}, { 703,12}, \
- { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
- { 831,11}, { 479,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 703,10}, \
- { 1407,11}, { 735,12}, { 383,11}, { 831,12}, \
- { 447,11}, { 959,10}, { 1919,13}, { 255,12}, \
- { 511,11}, { 1087,12}, { 575,11}, { 1215,10}, \
- { 2431,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1407,13}, { 383,12}, { 831,11}, { 1727,12}, \
- { 959,11}, { 1919,14}, { 255,13}, { 511,12}, \
- { 1215,11}, { 2431,13}, { 639,12}, { 1471,11}, \
- { 2943,13}, { 767,12}, { 1727,13}, { 895,12}, \
- { 1919,11}, { 3839,14}, { 511,13}, { 1023,12}, \
- { 2111,13}, { 1151,12}, { 2431,13}, { 1407,12}, \
- { 2943,14}, { 767,13}, { 1535,12}, { 3071,13}, \
- { 1663,12}, { 3455,13}, { 1919,12}, { 3839,15}, \
- { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 161
-#define SQR_FFT_THRESHOLD 3712
-
-#define MULLO_BASECASE_THRESHOLD 8
-#define MULLO_DC_THRESHOLD 60
-#define MULLO_MUL_N_THRESHOLD 13765
-
-#define DC_DIV_QR_THRESHOLD 83
-#define DC_DIVAPPR_Q_THRESHOLD 246
-#define DC_BDIV_QR_THRESHOLD 76
-#define DC_BDIV_Q_THRESHOLD 175
-
-#define INV_MULMOD_BNM1_THRESHOLD 42
-#define INV_NEWTON_THRESHOLD 268
-#define INV_APPR_THRESHOLD 250
-
-#define BINV_NEWTON_THRESHOLD 276
-#define REDC_1_TO_REDC_N_THRESHOLD 74
-
-#define MU_DIV_QR_THRESHOLD 1442
-#define MU_DIVAPPR_Q_THRESHOLD 1442
-#define MUPI_DIV_QR_THRESHOLD 132
-#define MU_BDIV_QR_THRESHOLD 1142
-#define MU_BDIV_Q_THRESHOLD 1334
-
-#define MATRIX22_STRASSEN_THRESHOLD 18
-#define HGCD_THRESHOLD 121
-#define GCD_DC_THRESHOLD 478
-#define GCDEXT_DC_THRESHOLD 361
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 26
-#define SET_STR_DC_THRESHOLD 272
-#define SET_STR_PRECOMPUTE_THRESHOLD 1074
+
+/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be more
+ than the value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard
+ limit in mpn/x86/p6/sqr_basecase.asm. */
+
+
+/* 1867 MHz P6 model 13 */
+
+/* Generated by tuneup.c, 2009-03-02, gcc 4.3 */
+
+#define MUL_KARATSUBA_THRESHOLD 20
+#define MUL_TOOM3_THRESHOLD 74
+#define MUL_TOOM44_THRESHOLD 166
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 101
+#define SQR_TOOM4_THRESHOLD 154
+
+#define MULLOW_BASECASE_THRESHOLD 7
+#define MULLOW_DC_THRESHOLD 39
+#define MULLOW_MUL_N_THRESHOLD 230
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 21
+#define POWM_THRESHOLD 154
+
+#define MATRIX22_STRASSEN_THRESHOLD 23
+#define HGCD_THRESHOLD 72
+#define GCD_DC_THRESHOLD 321
+#define GCDEXT_DC_THRESHOLD 416
+#define JACOBI_BASE_METHOD 1
+
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 587
+#define SET_STR_PRECOMPUTE_THRESHOLD 1083
+
+#define MUL_FFT_TABLE { 400, 928, 1664, 4608, 10240, 57344, 163840, 393216, 0 }
+#define MUL_FFT_MODF_THRESHOLD 496
+#define MUL_FFT_THRESHOLD 7168
+
+#define SQR_FFT_TABLE { 432, 928, 1664, 3584, 10240, 40960, 98304, 393216, 0 }
+#define SQR_FFT_MODF_THRESHOLD 448
+#define SQR_FFT_THRESHOLD 3840
+
+/* These tables need updating */
+#define MUL_FFT_TABLE2 {{1,4}, {305,5}, {321,4}, {337,5}, {353,4}, {369,5}, {801,6}, {833,5}, {865,6}, {897,5}, {929,6}, {961,5}, {993,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3457,8}, {3841,7}, {3969,8}, {4097,7}, {4225,8}, {4353,7}, {4481,8}, {5889,7}, {6017,8}, {6401,7}, {6529,8}, {6913,9}, {7681,8}, {8961,9}, {9729,8}, {9985,9}, {10241,8}, {11009,9}, {11777,8}, {12289,9}, {13825,10}, {15361,9}, {15873,8}, {16129,9}, {19969,10}, {23553,9}, {24065,8}, {24321,9}, {26113,10}, {27649,11}, {28673,10}, {31745,9}, {34305,10}, {34817,9}, {35329,10}, {39937,9}, {40449,10}, {48129,11}, {55297,10}, {56321,11}, {63489,10}, {80897,11}, {96257,10}, {97281,12}, {126977,11}, {129025,10}, {130049,9}, {130561,10}, {131073,11}, {133121,10}, {134145,11}, {137217,10}, {138241,11}, {161793,10}, {162817,11}, {194561,12}, {258049,11}, {260097,10}, {261121,9}, {261633,10}, {266241,11}, {268289,10}, {277505,11}, {292865,10}, {293889,9}, {294401,10}, {310273,9}, {310785,11}, {325633,10}, {326657,12}, {389121,13}, {516097,12}, {520193,11}, {522241,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {622593,12}, {651265,11}, {653313,10}, {654337,11}, {655361,10}, {657409,11}, {663553,10}, {664577,11}, {686081,10}, {687105,11}, {718849,10}, {719873,11}, {720897,10}, {722945,11}, {737281,10}, {740353,11}, {745473,10}, {749569,11}, {751617,10}, {752641,9}, {753153,11}, {753665,12}, {770049,11}, {774145,12}, {782337,11}, {786433,10}, {787457,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {854017,10}, {855041,11}, {862209,10}, {863233,11}, {866305,10}, {867329,11}, {876545,10}, {877569,11}, {882689,10}, {883713,9}, {884225,11}, {884737,13}, {1040385,12}, {1044481,11}, {1112065,10}, {1113089,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1515521,13}, {1523713,12}, {1527809,13}, {1540097,12}, {1544193,13}, {1548289,12}, {1568769,11}, {1636353,10}, {1637377,12}, {1699841,11}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1,4}, {273,5}, {289,4}, {305,5}, {673,6}, {705,5}, {737,6}, {769,5}, {801,6}, {1345,7}, {1409,6}, {1537,7}, {1665,6}, {1729,7}, {2689,8}, {2817,7}, {3201,8}, {3329,7}, {3713,8}, {3841,7}, {4225,8}, {4865,7}, {4993,9}, {5121,8}, {6657,9}, {7681,8}, {8961,9}, {11777,8}, {12033,10}, {12289,8}, {12545,9}, {13825,10}, {14337,9}, {14849,10}, {15361,9}, {19969,10}, {23553,9}, {24577,11}, {30721,10}, {31745,9}, {32257,10}, {37889,9}, {38401,10}, {39937,9}, {40449,10}, {48129,11}, {63489,10}, {80897,11}, {96257,12}, {126977,11}, {129025,10}, {130049,11}, {194561,12}, {208897,11}, {210945,12}, {258049,11}, {260097,9}, {269313,10}, {277505,9}, {278017,11}, {278529,10}, {280577,11}, {282625,10}, {283649,11}, {284673,10}, {285697,11}, {286721,10}, {289793,11}, {290817,10}, {293889,9}, {294401,10}, {310273,9}, {310785,8}, {311041,10}, {311297,11}, {315393,10}, {321537,12}, {323585,11}, {325633,10}, {326657,12}, {331777,10}, {332801,12}, {389121,10}, {392193,9}, {392705,10}, {413697,9}, {414209,10}, {418817,9}, {419841,10}, {424961,9}, {425473,10}, {441345,9}, {441857,10}, {449537,9}, {450561,10}, {452609,9}, {453121,10}, {454657,9}, {455169,10}, {490497,12}, {491521,13}, {516097,12}, {520193,10}, {523265,11}, {555009,10}, {556033,11}, {587777,10}, {588801,11}, {620545,10}, {621569,9}, {622081,11}, {624641,12}, {626689,11}, {653313,10}, {654337,11}, {686081,10}, {687105,11}, {718849,10}, {720897,11}, {722945,10}, {724993,11}, {729089,10}, {734209,11}, {737281,10}, {744449,11}, {745473,10}, {747521,11}, {749569,10}, {752641,11}, {784385,10}, {785409,11}, {808961,10}, {809985,11}, {817153,10}, {818177,11}, {849921,10}, {850945,11}, {851969,10}, {852993,11}, {858113,10}, {859137,11}, {860161,10}, {861185,11}, {882689,10}, {883713,11}, {980993,13}, {1040385,11}, {1112065,12}, {1175553,11}, {1243137,12}, {1306625,11}, {1374209,10}, {1375233,12}, {1437697,11}, {1505281,10}, {1506305,12}, {1568769,11}, {1636353,10}, {1637377,12}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/x86/p6/mmx/lshift.asm b/gmp/mpn/x86/p6/mmx/lshift.asm
index febd1c0e6c..e325b67d64 100644
--- a/gmp/mpn/x86/p6/mmx/lshift.asm
+++ b/gmp/mpn/x86/p6/mmx/lshift.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-II mpn_lshift -- mpn left shift.
dnl Copyright 2001 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl The P55 code runs well on P-II/III, but could stand some minor tweaks
diff --git a/gmp/mpn/x86/p6/mmx/popham.asm b/gmp/mpn/x86/p6/mmx/popham.asm
index fd340e4b45..421daa5308 100644
--- a/gmp/mpn/x86/p6/mmx/popham.asm
+++ b/gmp/mpn/x86/p6/mmx/popham.asm
@@ -2,32 +2,21 @@ dnl Intel Pentium-II mpn_popcount, mpn_hamdist -- population count and
dnl hamming distance.
dnl Copyright 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/mmx/rshift.asm b/gmp/mpn/x86/p6/mmx/rshift.asm
index 77aa1909fa..b1543cdf52 100644
--- a/gmp/mpn/x86/p6/mmx/rshift.asm
+++ b/gmp/mpn/x86/p6/mmx/rshift.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-II mpn_rshift -- mpn left shift.
dnl Copyright 2001 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl The P55 code runs well on P-II/III, but could stand some minor tweaks
diff --git a/gmp/mpn/x86/p6/mod_1.asm b/gmp/mpn/x86/p6/mod_1.asm
new file mode 100644
index 0000000000..b6eacf7e82
--- /dev/null
+++ b/gmp/mpn/x86/p6/mod_1.asm
@@ -0,0 +1,472 @@
+dnl Intel P6 mpn_mod_1 -- mpn by limb remainder.
+
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P6: 21.5 cycles/limb
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse);
+C
+C The code here is in two parts, a simple divl loop and a mul-by-inverse.
+C The divl is used by mod_1 and mod_1c for small sizes, until the savings in
+C the mul-by-inverse can overcome the time to calculate an inverse.
+C preinv_mod_1 goes straight to the mul-by-inverse.
+C
+C The mul-by-inverse normalizes the divisor (or for preinv_mod_1 it's
+C already normalized). The calculation done is r=a%(d*2^n) followed by a
+C final (r*2^n)%(d*2^n), where a is the dividend, d the divisor, and n is
+C the number of leading zero bits on d. This means there's no bit shifts in
+C the main loop, at the cost of an extra divide step at the end.
+C
+C The simple divl for mod_1 is able to skip one divide step if high<divisor.
+C For mod_1c the carry parameter is the high of the first divide step, and
+C no attempt is make to skip that step since carry==0 will be very rare.
+C
+C The mul-by-inverse always skips one divide step, but then needs an extra
+C step at the end, unless the divisor was already normalized (n==0). This
+C leads to different mul-by-inverse thresholds for normalized and
+C unnormalized divisors, in mod_1 and mod_1c.
+C
+C Alternatives:
+C
+C If n is small then the extra divide step could be done by a few shift and
+C trial subtract steps instead of a full divide. That would probably be 3
+C or 4 cycles/bit, so say up to n=8 might benefit from that over a 21 cycle
+C divide. However it's considered that small divisors, meaning biggish n,
+C are more likely than small n, and that it's not worth the branch
+C mispredicts of a loop.
+C
+C Past:
+C
+C There used to be some MMX based code for P-II and P-III, roughly following
+C the K7 form, but it was slower (about 24.0 c/l) than the code here. That
+C code did have an advantage that mod_1 was able to do one less divide step
+C when high<divisor and the divisor unnormalized, but the speed advantage of
+C the current code soon overcomes that.
+C
+C Future:
+C
+C It's not clear whether what's here is optimal. A rough count of micro-ops
+C on the dependent chain would suggest a couple of cycles could be shaved,
+C perhaps.
+
+
+dnl The following thresholds are the sizes where the multiply by inverse
+dnl method is used instead of plain divl's. Minimum value 2 each.
+dnl
+dnl MUL_NORM_THRESHOLD is for normalized divisors (high bit set),
+dnl MUL_UNNORM_THRESHOLD for unnormalized divisors.
+dnl
+dnl With the divl loop at 39 c/l, and the inverse loop at 21.5 c/l but
+dnl setups for the inverse of about 50, the threshold should be around
+dnl 50/(39-21.5)==2.85. An unnormalized divisor gets an extra divide step
+dnl at the end, so if that's about 25 cycles then that threshold might be
+dnl around (50+25)/(39-21.5) == 4.3.
+
+deflit(MUL_NORM_THRESHOLD, 4)
+deflit(MUL_UNNORM_THRESHOLD, 5)
+
+deflit(MUL_NORM_DELTA, eval(MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD))
+
+
+defframe(PARAM_INVERSE, 16) dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY, 16) dnl mpn_mod_1c
+defframe(PARAM_DIVISOR, 12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+
+defframe(VAR_NORM, -20)
+defframe(VAR_INVERSE, -24)
+
+deflit(STACK_SPACE, 24)
+
+ TEXT
+
+ ALIGN(16)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SRC, %edx
+ subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_SIZE, %ebx
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_INVERSE, %eax
+
+ movl %edi, SAVE_EDI
+ movl -4(%edx,%ebx,4), %edi C src high limb
+
+ movl $0, VAR_NORM
+ leal -8(%edx,%ebx,4), %ecx C &src[size-2]
+
+ C
+
+ movl %edi, %esi
+ subl %ebp, %edi C high-divisor
+
+ cmovc( %esi, %edi) C restore if underflow
+ decl %ebx
+ jnz L(preinv_entry)
+
+ jmp L(done_edi)
+
+EPILOGUE()
+
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %eax
+
+ movl %esi, SAVE_ESI
+ movl PARAM_CARRY, %edx
+
+ movl PARAM_SRC, %esi
+ orl %ecx, %ecx
+ jz L(done_edx) C result==carry if size==0
+
+ sarl $31, %eax
+ movl PARAM_DIVISOR, %ebp
+
+ andl $MUL_NORM_DELTA, %eax
+
+ addl $MUL_UNNORM_THRESHOLD, %eax
+
+ cmpl %eax, %ecx
+ jb L(divide_top)
+
+
+ C The carry parameter pretends to be the src high limb.
+
+ movl %ebx, SAVE_EBX
+ leal 1(%ecx), %ebx C size+1
+
+ movl %edx, %eax C carry
+ jmp L(mul_by_inverse_1c)
+
+EPILOGUE()
+
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
+ movl $0, %edx C initial carry (if can't skip a div)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %eax
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_DIVISOR, %ebp
+
+ movl PARAM_DIVISOR, %esi
+ orl %ecx, %ecx
+ jz L(done_edx)
+
+ movl -4(%eax,%ecx,4), %eax C src high limb
+
+ sarl $31, %ebp
+
+ andl $MUL_NORM_DELTA, %ebp
+
+ addl $MUL_UNNORM_THRESHOLD, %ebp
+ cmpl %esi, %eax C carry flag if high<divisor
+
+ cmovc( %eax, %edx) C src high limb as initial carry
+ movl PARAM_SRC, %esi
+
+ sbbl $0, %ecx C size-1 to skip one div
+ jz L(done_eax) C done if had size==1
+
+ cmpl %ebp, %ecx
+ movl PARAM_DIVISOR, %ebp
+ jae L(mul_by_inverse)
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp divisor
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl %ebp
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+L(done_edx):
+ movl %edx, %eax
+L(done_eax):
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax src high limb
+ C ebx
+ C ecx
+ C edx
+ C esi src
+ C edi
+ C ebp divisor
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_SIZE, %ebx
+
+L(mul_by_inverse_1c):
+ bsrl %ebp, %ecx C 31-l
+
+ movl %edi, SAVE_EDI
+ xorl $31, %ecx C l
+
+ movl %ecx, VAR_NORM
+ shll %cl, %ebp C d normalized
+
+ movl %eax, %edi C src high -> n2
+ subl %ebp, %eax
+
+ cmovnc( %eax, %edi) C n2-divisor if no underflow
+
+ movl $-1, %eax
+ movl $-1, %edx
+
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+ leal -8(%esi,%ebx,4), %ecx C &src[size-2]
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+L(preinv_entry):
+ movl %eax, VAR_INVERSE
+
+
+
+C No special scheduling of loads is necessary in this loop, out of order
+C execution hides the latencies already.
+C
+C The way q1+1 is generated in %ebx and d is moved to %eax for the multiply
+C seems fastest. The obvious change to generate q1+1 in %eax and then just
+C multiply by %ebp (as per mpn/x86/pentium/mod_1.asm in fact) runs 1 cycle
+C slower, for no obvious reason.
+
+
+ ALIGN(16)
+L(inverse_top):
+ C eax n10 (then scratch)
+ C ebx scratch (nadj, q1)
+ C ecx src pointer, decrementing
+ C edx scratch
+ C esi n10
+ C edi n2
+ C ebp divisor
+
+ movl (%ecx), %eax C next src limb
+ movl %eax, %esi
+
+ sarl $31, %eax C -n1
+ movl %ebp, %ebx
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %edi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ addl %esi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ subl $4, %ecx
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%edi), %ebx C n2+1
+ movl %ebp, %eax C d
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+ jz L(q1_ff)
+
+ mull %ebx C (q1+1)*d
+
+ C
+
+ subl %eax, %esi C low n - (q1+1)*d
+
+ sbbl %edx, %edi C high n - (q1+1)*d, 0 or -1
+
+ andl %ebp, %edi C d if underflow
+
+ addl %esi, %edi C remainder with addback if necessary
+
+ cmpl PARAM_SRC, %ecx
+ jae L(inverse_top)
+
+
+C -----------------------------------------------------------------------------
+L(inverse_loop_done):
+
+ C %edi is the remainder modulo d*2^n and now must be reduced to
+ C 0<=r<d by calculating r*2^n mod d*2^n and then right shifting by
+ C n. If d was already normalized on entry so that n==0 then nothing
+ C is needed here. The chance of n==0 is low, but it's true of say
+ C PP from gmp-impl.h.
+ C
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi
+ C edi remainder
+ C ebp divisor (normalized)
+
+ movl VAR_NORM, %ecx
+ movl $0, %esi
+
+ orl %ecx, %ecx
+ jz L(done_edi)
+
+
+ C Here use %edi=n10 and %esi=n2, opposite to the loop above.
+ C
+ C The q1=0xFFFFFFFF case is handled with an sbbl to adjust q1+1
+ C back, rather than q1_ff special case code. This is simpler and
+ C costs only 2 uops.
+
+ shldl( %cl, %edi, %esi)
+
+ shll %cl, %edi
+
+ movl %edi, %eax C n10
+ movl %ebp, %ebx C d
+
+ sarl $31, %eax C -n1
+
+ andl %eax, %ebx C -n1 & d
+ negl %eax C n1
+
+ addl %edi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+ addl %esi, %eax C n2+n1
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ C
+
+ addl %ebx, %eax C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%esi), %ebx C n2+1
+
+ adcl %edx, %ebx C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %ebx
+ movl %ebp, %eax C d
+
+ mull %ebx C (q1+1)*d
+
+ movl SAVE_EBX, %ebx
+
+ C
+
+ subl %eax, %edi C low n - (q1+1)*d is remainder
+
+ sbbl %edx, %esi C high n - (q1+1)*d, 0 or -1
+
+ andl %ebp, %esi
+ movl SAVE_EBP, %ebp
+
+ leal (%esi,%edi), %eax C remainder
+ movl SAVE_ESI, %esi
+
+ shrl %cl, %eax C denorm remainder
+ movl SAVE_EDI, %edi
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+L(done_edi):
+ movl SAVE_EBX, %ebx
+ movl %edi, %eax
+
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBP, %ebp
+ addl $STACK_SPACE, %esp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C Special case for q1=0xFFFFFFFF, giving q=0xFFFFFFFF meaning the low dword
+C of q*d is simply -d and the remainder n-q*d = n10+d.
+C
+C This is reached only very rarely.
+
+L(q1_ff):
+ C eax (divisor)
+ C ebx (q1+1 == 0)
+ C ecx src pointer
+ C edx
+ C esi n10
+ C edi (n2)
+ C ebp divisor
+
+ leal (%ebp,%esi), %edi C n-q*d remainder -> next n2
+
+ cmpl PARAM_SRC, %ecx
+ jae L(inverse_top)
+
+ jmp L(inverse_loop_done)
+
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/p6/mod_34lsub1.asm b/gmp/mpn/x86/p6/mod_34lsub1.asm
index b88ab5d17c..5e854b7274 100644
--- a/gmp/mpn/x86/p6/mod_34lsub1.asm
+++ b/gmp/mpn/x86/p6/mod_34lsub1.asm
@@ -1,32 +1,21 @@
dnl Intel P6 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
-dnl Copyright 2000-2002, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/mode1o.asm b/gmp/mpn/x86/p6/mode1o.asm
index c62b676e5a..4aff48d7e6 100644
--- a/gmp/mpn/x86/p6/mode1o.asm
+++ b/gmp/mpn/x86/p6/mode1o.asm
@@ -1,32 +1,21 @@
dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder.
-dnl Copyright 2000-2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -112,7 +101,7 @@ ifdef(`PIC',`
subl %eax, %edi C inv = 2*inv - inv*inv*d
- ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB
movl PARAM_DIVISOR, %eax
imull %edi, %eax
cmpl $1, %eax')
@@ -124,7 +113,7 @@ C subl %edx, %eax 1
C imull %edi, %eax 4
C mull PARAM_DIVISOR 5
C ----
-C total 10
+C total 10
C
C and this is the measured speed. No special scheduling is necessary, out
C of order execution hides the load latency.
diff --git a/gmp/mpn/x86/p6/mul_basecase.asm b/gmp/mpn/x86/p6/mul_basecase.asm
index d87bc12b60..fc1afbdf0e 100644
--- a/gmp/mpn/x86/p6/mul_basecase.asm
+++ b/gmp/mpn/x86/p6/mul_basecase.asm
@@ -1,32 +1,21 @@
dnl Intel P6 mpn_mul_basecase -- multiply two mpn numbers.
-dnl Copyright 1999-2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/p3mmx/popham.asm b/gmp/mpn/x86/p6/p3mmx/popham.asm
index db2f2601c9..2f58968a31 100644
--- a/gmp/mpn/x86/p6/p3mmx/popham.asm
+++ b/gmp/mpn/x86/p6/p3mmx/popham.asm
@@ -2,32 +2,21 @@ dnl Intel Pentium-III mpn_popcount, mpn_hamdist -- population count and
dnl hamming distance.
dnl Copyright 2000, 2002, 2004, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/sqr_basecase.asm b/gmp/mpn/x86/p6/sqr_basecase.asm
index 8fc7fdf375..05a31f1a15 100644
--- a/gmp/mpn/x86/p6/sqr_basecase.asm
+++ b/gmp/mpn/x86/p6/sqr_basecase.asm
@@ -1,32 +1,21 @@
dnl Intel P6 mpn_sqr_basecase -- square an mpn number.
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -38,15 +27,15 @@ C which is the Karatsuba recursing range).
dnl These are the same as in mpn/x86/k6/sqr_basecase.asm, see that file for
dnl a description. The only difference here is that UNROLL_COUNT can go up
-dnl to 64 (not 63) making SQR_TOOM2_THRESHOLD_MAX 67.
+dnl to 64 (not 63) making SQR_KARATSUBA_THRESHOLD_MAX 67.
-deflit(SQR_TOOM2_THRESHOLD_MAX, 67)
+deflit(SQR_KARATSUBA_THRESHOLD_MAX, 67)
-ifdef(`SQR_TOOM2_THRESHOLD_OVERRIDE',
-`define(`SQR_TOOM2_THRESHOLD',SQR_TOOM2_THRESHOLD_OVERRIDE)')
+ifdef(`SQR_KARATSUBA_THRESHOLD_OVERRIDE',
+`define(`SQR_KARATSUBA_THRESHOLD',SQR_KARATSUBA_THRESHOLD_OVERRIDE)')
-m4_config_gmp_mparam(`SQR_TOOM2_THRESHOLD')
-deflit(UNROLL_COUNT, eval(SQR_TOOM2_THRESHOLD-3))
+m4_config_gmp_mparam(`SQR_KARATSUBA_THRESHOLD')
+deflit(UNROLL_COUNT, eval(SQR_KARATSUBA_THRESHOLD-3))
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/gmp/mpn/x86/p6/sse2/addmul_1.asm b/gmp/mpn/x86/p6/sse2/addmul_1.asm
index 144b627aa3..b601c54bcf 100644
--- a/gmp/mpn/x86/p6/sse2/addmul_1.asm
+++ b/gmp/mpn/x86/p6/sse2/addmul_1.asm
@@ -1,32 +1,21 @@
dnl Intel P6/SSE2 mpn_addmul_1.
dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/sse2/gmp-mparam.h b/gmp/mpn/x86/p6/sse2/gmp-mparam.h
index 69226289a7..843227b99a 100644
--- a/gmp/mpn/x86/p6/sse2/gmp-mparam.h
+++ b/gmp/mpn/x86/p6/sse2/gmp-mparam.h
@@ -1,197 +1,74 @@
/* Intel P6/sse2 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2003, 2008-2010 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2008, 2009
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-or both in parallel, as here.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+/* NOTE: In a fat binary build SQR_KARATSUBA_THRESHOLD here cannot be more
+ than the value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard
+ limit in mpn/x86/p6/sqr_basecase.asm. */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+/* 1867 MHz P6 model 13 */
+/* Generated by tuneupc, 2008-10-30, gcc 4.3 */
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
- value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in
- mpn/x86/p6/sqr_basecase.asm. */
+#define MUL_KARATSUBA_THRESHOLD 20
+#define MUL_TOOM3_THRESHOLD 77
+#define MUL_TOOM44_THRESHOLD 142
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 101
+#define SQR_TOOM4_THRESHOLD 154
-/* 1867 MHz P6 model 13 */
+#define MULLOW_BASECASE_THRESHOLD 4
+#define MULLOW_DC_THRESHOLD 38
+#define MULLOW_MUL_N_THRESHOLD 234
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* always */
+#define DIV_DC_THRESHOLD 24
+#define POWM_THRESHOLD 150
+
+#define MATRIX22_STRASSEN_THRESHOLD 23
+#define HGCD_THRESHOLD 95
+#define GCD_DC_THRESHOLD 381
+#define GCDEXT_DC_THRESHOLD 419
+#define JACOBI_BASE_METHOD 1
+
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 276
+#define SET_STR_PRECOMPUTE_THRESHOLD 1078
+
+#define MUL_FFT_TABLE { 400, 928, 1664, 3584, 10240, 40960, 98304, 393216, 1572864, 0 }
+#define MUL_FFT_MODF_THRESHOLD 496
+#define MUL_FFT_THRESHOLD 7168
-#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 21
-
-#define MUL_TOOM22_THRESHOLD 20
-#define MUL_TOOM33_THRESHOLD 77
-#define MUL_TOOM44_THRESHOLD 169
-#define MUL_TOOM6H_THRESHOLD 246
-#define MUL_TOOM8H_THRESHOLD 381
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 30
-#define SQR_TOOM3_THRESHOLD 101
-#define SQR_TOOM4_THRESHOLD 154
-#define SQR_TOOM6_THRESHOLD 222
-#define SQR_TOOM8_THRESHOLD 527
-
-#define MULMID_TOOM42_THRESHOLD 58
-
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 17
-
-#define MUL_FFT_MODF_THRESHOLD 690 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 565, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
- { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 5}, \
- { 383, 4}, { 991, 5}, { 511, 6}, { 267, 7}, \
- { 157, 8}, { 91, 9}, { 47, 8}, { 111, 9}, \
- { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
- { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \
- { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
- { 159,10}, { 335, 9}, { 671,11}, { 191,10}, \
- { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \
- { 415,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
- { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \
- { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \
- { 1471,13}, { 383,12}, { 831,11}, { 1727,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
- { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
- { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \
- { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
- { 1407,12}, { 2815,14}, { 767,13}, { 1663,12}, \
- { 3455,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 132
-#define MUL_FFT_THRESHOLD 7424
-
-#define SQR_FFT_MODF_THRESHOLD 565 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 472, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 63, 4}, { 1023, 8}, { 67, 9}, \
- { 39, 5}, { 639, 4}, { 1471, 6}, { 383, 7}, \
- { 209, 8}, { 119, 9}, { 63, 7}, { 255, 8}, \
- { 139, 9}, { 71, 8}, { 143, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 159, 8}, { 319, 9}, \
- { 167,10}, { 95,11}, { 63,10}, { 143, 9}, \
- { 287,10}, { 159,11}, { 95,10}, { 191,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \
- { 1087,10}, { 287, 9}, { 575,11}, { 159,10}, \
- { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
- { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \
- { 831,11}, { 223,12}, { 127,11}, { 255,10}, \
- { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
- { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \
- { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
- { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
- { 255,11}, { 543,10}, { 1087,11}, { 607,12}, \
- { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \
- { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \
- { 447,11}, { 959,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \
- { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \
- { 767,11}, { 1599,12}, { 831,11}, { 1727,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
- { 639,12}, { 1471,13}, { 767,12}, { 1727,13}, \
- { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \
- { 2111,13}, { 1151,12}, { 2431,13}, { 1407,14}, \
- { 767,13}, { 1663,12}, { 3455,13}, { 8192,14}, \
- { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 146
-#define SQR_FFT_THRESHOLD 5760
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 31
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 25
-#define DC_DIVAPPR_Q_THRESHOLD 55
-#define DC_BDIV_QR_THRESHOLD 60
-#define DC_BDIV_Q_THRESHOLD 132
-
-#define INV_MULMOD_BNM1_THRESHOLD 38
-#define INV_NEWTON_THRESHOLD 65
-#define INV_APPR_THRESHOLD 65
-
-#define BINV_NEWTON_THRESHOLD 252
-#define REDC_1_TO_REDC_N_THRESHOLD 62
-
-#define MU_DIV_QR_THRESHOLD 1164
-#define MU_DIVAPPR_Q_THRESHOLD 748
-#define MUPI_DIV_QR_THRESHOLD 38
-#define MU_BDIV_QR_THRESHOLD 1360
-#define MU_BDIV_Q_THRESHOLD 1470
-
-#define POWM_SEC_TABLE 2,23,258,879,2246
-
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 69
-#define HGCD_APPR_THRESHOLD 112
-#define HGCD_REDUCE_THRESHOLD 3389
-#define GCD_DC_THRESHOLD 386
-#define GCDEXT_DC_THRESHOLD 303
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 25
-#define SET_STR_DC_THRESHOLD 582
-#define SET_STR_PRECOMPUTE_THRESHOLD 1118
-
-#define FAC_DSC_THRESHOLD 178
-#define FAC_ODD_THRESHOLD 34
+#define SQR_FFT_TABLE { 432, 928, 1664, 3584, 10240, 40960, 98304, 393216, 1572864, 0 }
+#define SQR_FFT_MODF_THRESHOLD 448
+#define SQR_FFT_THRESHOLD 3840
diff --git a/gmp/mpn/x86/p6/sse2/mod_1_1.asm b/gmp/mpn/x86/p6/sse2/mod_1_1.asm
deleted file mode 100644
index 8b7b7adaa5..0000000000
--- a/gmp/mpn/x86/p6/sse2/mod_1_1.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel P6/SSE2 mpn_mod_1_1.
-
-dnl Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1_1p)
-include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/gmp/mpn/x86/p6/sse2/mod_1_4.asm b/gmp/mpn/x86/p6/sse2/mod_1_4.asm
deleted file mode 100644
index 49c96c60b9..0000000000
--- a/gmp/mpn/x86/p6/sse2/mod_1_4.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-dnl Intel P6/SSE2 mpn_mod_1_4.
-
-dnl Copyright 2009, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-MULFUNC_PROLOGUE(mpn_mod_1s_4p)
-include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/gmp/mpn/x86/p6/sse2/mul_1.asm b/gmp/mpn/x86/p6/sse2/mul_1.asm
index 50e5b6983a..fc3d4e6414 100644
--- a/gmp/mpn/x86/p6/sse2/mul_1.asm
+++ b/gmp/mpn/x86/p6/sse2/mul_1.asm
@@ -1,32 +1,21 @@
dnl Intel P6/SSE2 mpn_mul_1.
dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/sse2/mul_basecase.asm b/gmp/mpn/x86/p6/sse2/mul_basecase.asm
index 4687625790..f52ece025f 100644
--- a/gmp/mpn/x86/p6/sse2/mul_basecase.asm
+++ b/gmp/mpn/x86/p6/sse2/mul_basecase.asm
@@ -1,32 +1,21 @@
dnl Intel P6/SSE2 mpn_mul_basecase.
dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/sse2/popcount.asm b/gmp/mpn/x86/p6/sse2/popcount.asm
index 4c02b93be2..f818d6e230 100644
--- a/gmp/mpn/x86/p6/sse2/popcount.asm
+++ b/gmp/mpn/x86/p6/sse2/popcount.asm
@@ -1,32 +1,21 @@
dnl Intel P6/SSE2 mpn_popcount -- population count.
dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/sse2/sqr_basecase.asm b/gmp/mpn/x86/p6/sse2/sqr_basecase.asm
index 76b574b6c7..8a7f24974d 100644
--- a/gmp/mpn/x86/p6/sse2/sqr_basecase.asm
+++ b/gmp/mpn/x86/p6/sse2/sqr_basecase.asm
@@ -1,32 +1,21 @@
dnl Intel P6/SSE2 mpn_sqr_basecase.
dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/p6/sse2/submul_1.asm b/gmp/mpn/x86/p6/sse2/submul_1.asm
index 98a603ce93..ae97fd6346 100644
--- a/gmp/mpn/x86/p6/sse2/submul_1.asm
+++ b/gmp/mpn/x86/p6/sse2/submul_1.asm
@@ -1,32 +1,21 @@
dnl Intel P6/SSE2 mpn_submul_1.
dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/README b/gmp/mpn/x86/pentium/README
index 305936bbd9..6c4d872c47 100644
--- a/gmp/mpn/x86/pentium/README
+++ b/gmp/mpn/x86/pentium/README
@@ -1,30 +1,19 @@
-Copyright 1996, 1999-2001, 2003 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
diff --git a/gmp/mpn/x86/pentium/aors_n.asm b/gmp/mpn/x86/pentium/aors_n.asm
index 01ebfb96ae..30d0df79b0 100644
--- a/gmp/mpn/x86/pentium/aors_n.asm
+++ b/gmp/mpn/x86/pentium/aors_n.asm
@@ -1,32 +1,22 @@
dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -88,13 +78,13 @@ deflit(`FRAME',16)
pushl %edx
FRAME_pushl()
movl PARAM_CARRY,%eax
- shrl %eax C shift bit 0 into carry
+ shrl $1,%eax C shift bit 0 into carry
jmp L(oop)
L(endgo):
deflit(`FRAME',16)
movl PARAM_CARRY,%eax
- shrl %eax C shift bit 0 into carry
+ shrl $1,%eax C shift bit 0 into carry
jmp L(end)
EPILOGUE()
diff --git a/gmp/mpn/x86/pentium/aorsmul_1.asm b/gmp/mpn/x86/pentium/aorsmul_1.asm
index d83cc4513b..a50299b5cf 100644
--- a/gmp/mpn/x86/pentium/aorsmul_1.asm
+++ b/gmp/mpn/x86/pentium/aorsmul_1.asm
@@ -2,32 +2,21 @@ dnl Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
dnl Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/bdiv_q_1.asm b/gmp/mpn/x86/pentium/bdiv_q_1.asm
deleted file mode 100644
index 9fee3cb87a..0000000000
--- a/gmp/mpn/x86/pentium/bdiv_q_1.asm
+++ /dev/null
@@ -1,260 +0,0 @@
-dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
-
-dnl Rearranged from mpn/x86/pentium/dive_1.asm by Marco Bodrato.
-
-dnl Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C divisor
-C odd even
-C P54: 24.5 30.5 cycles/limb
-C P55: 23.0 28.0
-
-MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
-
-C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as
-C expected. On P54 in the even case the shrdl pairing nonsense (see
-C mpn/x86/pentium/README) costs 1 cycle, but it's not clear why there's a
-C further 1.5 slowdown for both odd and even.
-
-defframe(PARAM_SHIFT, 24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(VAR_INVERSE,`PARAM_DST')
-
- TEXT
-
- ALIGN(32)
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl $-1, %ecx
- movl PARAM_DIVISOR, %eax
-
-L(strip_twos):
- ASSERT(nz, `orl %eax, %eax')
- shrl %eax
- incl %ecx C shift count
-
- jnc L(strip_twos)
-
- leal 1(%eax,%eax), %edx C d
- andl $127, %eax C d/2, 7 bits
-
- pushl %ebx FRAME_pushl()
- pushl %ebp FRAME_pushl()
-
-ifdef(`PIC',`
- call L(here)
-L(here):
- popl %ebp C eip
-
- addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
- C AGI
- movl binvert_limb_table@GOT(%ebp), %ebp
- C AGI
- movzbl (%eax,%ebp), %eax
-',`
-
-dnl non-PIC
- movzbl binvert_limb_table(%eax), %eax C inv 8 bits
-')
-
- movl %eax, %ebp C inv
- addl %eax, %eax C 2*inv
-
- imull %ebp, %ebp C inv*inv
-
- imull %edx, %ebp C inv*inv*d
-
- subl %ebp, %eax C inv = 2*inv - inv*inv*d
- movl PARAM_SIZE, %ebx
-
- movl %eax, %ebp
- addl %eax, %eax C 2*inv
-
- imull %ebp, %ebp C inv*inv
-
- imull %edx, %ebp C inv*inv*d
-
- subl %ebp, %eax C inv = 2*inv - inv*inv*d
- movl %edx, PARAM_DIVISOR C d without twos
-
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
- pushl %eax FRAME_pushl()
- imull PARAM_DIVISOR, %eax
- cmpl $1, %eax
- popl %eax FRAME_popl()')
-
- jmp L(common)
-EPILOGUE()
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C mp_limb_t inverse, int shift)
- ALIGN(32)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_SHIFT, %ecx
-
- pushl %ebx FRAME_pushl()
- pushl %ebp FRAME_pushl()
-
- movl PARAM_SIZE, %ebx
- movl PARAM_INVERSE, %eax
-
-L(common):
- pushl %esi FRAME_pushl()
- push %edi FRAME_pushl()
-
- movl PARAM_SRC, %esi
- movl PARAM_DST, %edi
- movl %eax, VAR_INVERSE
-
- leal (%esi,%ebx,4), %esi C src end
- leal (%edi,%ebx,4), %edi C dst end
-
- negl %ebx C -size
-
- xorl %ebp, %ebp C initial carry bit
-
- orl %ecx, %ecx C shift
- movl (%esi,%ebx,4), %eax C src low limb
- jz L(odd_entry)
-
- xorl %edx, %edx C initial carry limb (for even, if one)
- incl %ebx
- jz L(one)
-
- movl (%esi,%ebx,4), %edx C src second limb (for even)
- shrdl( %cl, %edx, %eax)
-
- jmp L(even_entry)
-
-
- ALIGN(8)
-L(odd_top):
- C eax scratch
- C ebx counter, limbs, negative
- C ecx
- C edx
- C esi src end
- C edi dst end
- C ebp carry bit, 0 or -1
-
- mull PARAM_DIVISOR
-
- movl (%esi,%ebx,4), %eax
- subl %ebp, %edx
-
- subl %edx, %eax
-
- sbbl %ebp, %ebp
-
-L(odd_entry):
- imull VAR_INVERSE, %eax
-
- movl %eax, (%edi,%ebx,4)
-
- incl %ebx
- jnz L(odd_top)
-
- popl %edi
- popl %esi
-
- popl %ebp
- popl %ebx
-
- ret
-
-L(even_top):
- C eax scratch
- C ebx counter, limbs, negative
- C ecx twos
- C edx
- C esi src end
- C edi dst end
- C ebp carry bit, 0 or -1
-
- mull PARAM_DIVISOR
-
- subl %ebp, %edx C carry bit
- movl -4(%esi,%ebx,4), %eax C src limb
-
- movl (%esi,%ebx,4), %ebp C and one above it
-
- shrdl( %cl, %ebp, %eax)
-
- subl %edx, %eax C carry limb
-
- sbbl %ebp, %ebp
-
-L(even_entry):
- imull VAR_INVERSE, %eax
-
- movl %eax, -4(%edi,%ebx,4)
- incl %ebx
-
- jnz L(even_top)
-
- mull PARAM_DIVISOR
-
- movl -4(%esi), %eax C src high limb
- subl %ebp, %edx
-
-L(one):
- shrl %cl, %eax
-
- subl %edx, %eax C no carry if division is exact
-
- imull VAR_INVERSE, %eax
-
- movl %eax, -4(%edi) C dst high limb
- nop C protect against cache bank clash
-
- popl %edi
- popl %esi
-
- popl %ebp
- popl %ebx
-
- ret
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium/com.asm b/gmp/mpn/x86/pentium/com_n.asm
index b0805452a6..c6d2d72e5e 100644
--- a/gmp/mpn/x86/pentium/com.asm
+++ b/gmp/mpn/x86/pentium/com_n.asm
@@ -1,32 +1,21 @@
-dnl Intel Pentium mpn_com -- mpn ones complement.
+dnl Intel Pentium mpn_com_n -- mpn ones complement.
dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -37,7 +26,7 @@ C P5: 1.75 cycles/limb
NAILS_SUPPORT(0-31)
-C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C void mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C This code is similar to mpn_copyi, basically there's just some "xorl
C $GMP_NUMB_MASK"s inserted.
@@ -55,7 +44,7 @@ defframe(PARAM_DST, 4)
TEXT
ALIGN(8)
-PROLOGUE(mpn_com)
+PROLOGUE(mpn_com_n)
deflit(`FRAME',0)
movl PARAM_SRC, %eax
diff --git a/gmp/mpn/x86/pentium/copyd.asm b/gmp/mpn/x86/pentium/copyd.asm
index 72a543b2a3..2be8c765ac 100644
--- a/gmp/mpn/x86/pentium/copyd.asm
+++ b/gmp/mpn/x86/pentium/copyd.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing.
dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/copyi.asm b/gmp/mpn/x86/pentium/copyi.asm
index d983d6b46e..9da08e2c06 100644
--- a/gmp/mpn/x86/pentium/copyi.asm
+++ b/gmp/mpn/x86/pentium/copyi.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_copyi -- copy limb vector, incrementing.
dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/dive_1.asm b/gmp/mpn/x86/pentium/dive_1.asm
index f80632f479..79885244a5 100644
--- a/gmp/mpn/x86/pentium/dive_1.asm
+++ b/gmp/mpn/x86/pentium/dive_1.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -157,7 +146,7 @@ dnl non-PIC
negl %ebx C -size
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax FRAME_pushl()
imull PARAM_DIVISOR, %eax
cmpl $1, %eax
diff --git a/gmp/mpn/x86/pentium/gmp-mparam.h b/gmp/mpn/x86/pentium/gmp-mparam.h
index befa6e27a9..5c49c4e3cb 100644
--- a/gmp/mpn/x86/pentium/gmp-mparam.h
+++ b/gmp/mpn/x86/pentium/gmp-mparam.h
@@ -1,36 +1,26 @@
/* Intel P54 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2002, 2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
/* For mpn/x86/pentium/mod_1.asm */
@@ -41,11 +31,11 @@ see https://www.gnu.org/licenses/. */
/* Generated by tuneup.c, 2004-02-10, gcc 2.95 */
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 90
+#define MUL_KARATSUBA_THRESHOLD 16
+#define MUL_TOOM3_THRESHOLD 90
#define SQR_BASECASE_THRESHOLD 0 /* always */
-#define SQR_TOOM2_THRESHOLD 22
+#define SQR_KARATSUBA_THRESHOLD 22
#define SQR_TOOM3_THRESHOLD 122
#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */
diff --git a/gmp/mpn/x86/pentium/hamdist.asm b/gmp/mpn/x86/pentium/hamdist.asm
index 2d7bc99b12..a129030f74 100644
--- a/gmp/mpn/x86/pentium/hamdist.asm
+++ b/gmp/mpn/x86/pentium/hamdist.asm
@@ -1,32 +1,21 @@
dnl Intel P5 mpn_hamdist -- mpn hamming distance.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/logops_n.asm b/gmp/mpn/x86/pentium/logops_n.asm
index 18773172e9..0552e55809 100644
--- a/gmp/mpn/x86/pentium/logops_n.asm
+++ b/gmp/mpn/x86/pentium/logops_n.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/lshift.asm b/gmp/mpn/x86/pentium/lshift.asm
index 2a31f36c6e..ece51e06d3 100644
--- a/gmp/mpn/x86/pentium/lshift.asm
+++ b/gmp/mpn/x86/pentium/lshift.asm
@@ -1,32 +1,22 @@
dnl Intel Pentium mpn_lshift -- mpn left shift.
-dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mmx/gmp-mparam.h b/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
index 02a0def127..e443c8c300 100644
--- a/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
+++ b/gmp/mpn/x86/pentium/mmx/gmp-mparam.h
@@ -1,37 +1,26 @@
/* Intel P55 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2009, 2010 Free Software
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
/* For mpn/x86/pentium/mod_1.asm */
@@ -40,124 +29,45 @@ see https://www.gnu.org/licenses/. */
/* 233MHz P55 */
-#define MOD_1_NORM_THRESHOLD 5
-#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 12
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 11
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 63
-#define USE_PREINV_DIVREM_1 0
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 51
-
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 53
-#define MUL_TOOM44_THRESHOLD 128
-#define MUL_TOOM6H_THRESHOLD 189
-#define MUL_TOOM8H_THRESHOLD 260
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 90
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 20
-#define SQR_TOOM3_THRESHOLD 73
-#define SQR_TOOM4_THRESHOLD 178
-#define SQR_TOOM6_THRESHOLD 210
-#define SQR_TOOM8_THRESHOLD 375
-
-#define MULMOD_BNM1_THRESHOLD 11
-#define SQRMOD_BNM1_THRESHOLD 12
-
-#define MUL_FFT_MODF_THRESHOLD 364 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 364, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
- { 9, 5}, { 19, 6}, { 17, 7}, { 9, 6}, \
- { 21, 7}, { 11, 6}, { 23, 7}, { 15, 6}, \
- { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
- { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
- { 47,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159, 8}, { 319, 9}, { 167,10}, \
- { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \
- { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \
- { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
- { 383,12}, { 63,11}, { 127,10}, { 271, 9}, \
- { 543,10}, { 287,11}, { 159,10}, { 351,11}, \
- { 191,10}, { 415,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 511,11}, { 287,10}, { 575,11}, \
- { 351,12}, { 191,11}, { 415,13}, { 127,12}, \
- { 255,11}, { 575,12}, { 319,11}, { 703,12}, \
- { 383,11}, { 831,12}, { 447,13}, { 8192,14}, \
- { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 90
-#define MUL_FFT_THRESHOLD 3520
-
-#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 340, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 17, 7}, { 9, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
- { 11, 7}, { 29, 8}, { 15, 7}, { 33, 8}, \
- { 19, 7}, { 39, 8}, { 27, 7}, { 55, 9}, \
- { 15, 8}, { 31, 7}, { 65, 8}, { 43, 9}, \
- { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
- { 67, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \
- { 95,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
- { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
- { 63, 9}, { 127, 8}, { 255, 9}, { 135,10}, \
- { 79, 9}, { 159, 8}, { 319,10}, { 95, 9}, \
- { 191,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511, 9}, { 271,10}, { 143, 9}, { 287, 8}, \
- { 575, 9}, { 303,10}, { 159, 9}, { 319,11}, \
- { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \
- { 63,11}, { 127,10}, { 271, 9}, { 543,10}, \
- { 287, 9}, { 575,10}, { 303,11}, { 159,10}, \
- { 351,11}, { 191,10}, { 415,11}, { 223,10}, \
- { 447,12}, { 127,11}, { 255,10}, { 543,11}, \
- { 287,10}, { 607,11}, { 351,12}, { 191,11}, \
- { 479,13}, { 127,12}, { 255,11}, { 575,12}, \
- { 319,11}, { 703,12}, { 383,11}, { 767,12}, \
- { 447,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 96
-#define SQR_FFT_THRESHOLD 5504
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 48
-#define MULLO_MUL_N_THRESHOLD 6633
-
-#define DC_DIV_QR_THRESHOLD 43
-#define DC_DIVAPPR_Q_THRESHOLD 170
-#define DC_BDIV_QR_THRESHOLD 43
-#define DC_BDIV_Q_THRESHOLD 110
-
-#define INV_MULMOD_BNM1_THRESHOLD 30
-#define INV_NEWTON_THRESHOLD 177
-#define INV_APPR_THRESHOLD 171
-
-#define BINV_NEWTON_THRESHOLD 194
-#define REDC_1_TO_REDC_N_THRESHOLD 50
-
-#define MU_DIV_QR_THRESHOLD 1142
-#define MU_DIVAPPR_Q_THRESHOLD 1142
-#define MUPI_DIV_QR_THRESHOLD 90
-#define MU_BDIV_QR_THRESHOLD 942
-#define MU_BDIV_Q_THRESHOLD 1017
-
-#define MATRIX22_STRASSEN_THRESHOLD 13
-#define HGCD_THRESHOLD 92
-#define GCD_DC_THRESHOLD 283
-#define GCDEXT_DC_THRESHOLD 221
-#define JACOBI_BASE_METHOD 2
-
-#define GET_STR_DC_THRESHOLD 18
-#define GET_STR_PRECOMPUTE_THRESHOLD 31
-#define SET_STR_DC_THRESHOLD 490
-#define SET_STR_PRECOMPUTE_THRESHOLD 994
+/* Generated by tuneup.c, 2009-01-06, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD 16
+#define MUL_TOOM3_THRESHOLD 89
+#define MUL_TOOM44_THRESHOLD 131
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD 22
+#define SQR_TOOM3_THRESHOLD 77
+#define SQR_TOOM4_THRESHOLD 168
+
+#define MULLOW_BASECASE_THRESHOLD 0 /* always */
+#define MULLOW_DC_THRESHOLD 40
+#define MULLOW_MUL_N_THRESHOLD 266
+
+#define DIV_SB_PREINV_THRESHOLD 4
+#define DIV_DC_THRESHOLD 43
+#define POWM_THRESHOLD 64
+
+#define MATRIX22_STRASSEN_THRESHOLD 13
+#define HGCD_THRESHOLD 95
+#define GCD_DC_THRESHOLD 316
+#define GCDEXT_DC_THRESHOLD 316
+#define JACOBI_BASE_METHOD 2
+
+#define USE_PREINV_DIVREM_1 0
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+
+#define GET_STR_DC_THRESHOLD 17
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 527
+#define SET_STR_PRECOMPUTE_THRESHOLD 1069
+
+#define MUL_FFT_TABLE { 304, 672, 1152, 3584, 10240, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD 320
+#define MUL_FFT_THRESHOLD 3840
+
+#define SQR_FFT_TABLE { 304, 672, 1152, 4608, 10240, 24576, 0 }
+#define SQR_FFT_MODF_THRESHOLD 320
+#define SQR_FFT_THRESHOLD 3840
diff --git a/gmp/mpn/x86/pentium/mmx/hamdist.asm b/gmp/mpn/x86/pentium/mmx/hamdist.asm
index 72e3196697..185eeaee22 100644
--- a/gmp/mpn/x86/pentium/mmx/hamdist.asm
+++ b/gmp/mpn/x86/pentium/mmx/hamdist.asm
@@ -1,32 +1,21 @@
dnl Intel P55 mpn_hamdist -- mpn hamming distance.
dnl Copyright 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mmx/lshift.asm b/gmp/mpn/x86/pentium/mmx/lshift.asm
index 04b0ddcc8f..012d794952 100644
--- a/gmp/mpn/x86/pentium/mmx/lshift.asm
+++ b/gmp/mpn/x86/pentium/mmx/lshift.asm
@@ -1,32 +1,21 @@
dnl Intel P5 mpn_lshift -- mpn left shift.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mmx/mul_1.asm b/gmp/mpn/x86/pentium/mmx/mul_1.asm
index 4ced577b13..b9fe77ed07 100644
--- a/gmp/mpn/x86/pentium/mmx/mul_1.asm
+++ b/gmp/mpn/x86/pentium/mmx/mul_1.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium MMX mpn_mul_1 -- mpn by limb multiplication.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mmx/rshift.asm b/gmp/mpn/x86/pentium/mmx/rshift.asm
index e3b274bb63..f50b8ab0e0 100644
--- a/gmp/mpn/x86/pentium/mmx/rshift.asm
+++ b/gmp/mpn/x86/pentium/mmx/rshift.asm
@@ -1,32 +1,21 @@
dnl Intel P5 mpn_rshift -- mpn right shift.
dnl Copyright 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mod_1.asm b/gmp/mpn/x86/pentium/mod_1.asm
new file mode 100644
index 0000000000..408242e7a9
--- /dev/null
+++ b/gmp/mpn/x86/pentium/mod_1.asm
@@ -0,0 +1,454 @@
+dnl Intel P5 mpn_mod_1 -- mpn by limb remainder.
+
+dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P5: 28.0 cycles/limb
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse);
+C
+C This code is not unlike mpn/x86/p6/mod_1.asm, it does the same sort of
+C multiply by inverse without on-the-fly shifts. See that code for some
+C general comments.
+C
+C Alternatives:
+C
+C P5 shldl is 4 cycles, so shifting on the fly would be at least 5 cycles
+C slower, probably more depending what it did to register usage. Using MMX
+C on P55 would be better, but still at least 4 or 5 instructions and so 2 or
+C 3 cycles.
+
+
+dnl These thresholds are the sizes where the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+dnl
+dnl MUL_NORM_THRESHOLD is for an already normalized divisor (high bit set),
+dnl MUL_UNNORM_THRESHOLD for an unnormalized divisor.
+dnl
+dnl With the divl loop at 44 c/l and the inverse at 28 c/l with about 70
+dnl cycles to setup, the threshold should be about ceil(70/16)==5, which is
+dnl what happens in practice.
+dnl
+dnl An unnormalized divisor gets an extra 40 cycles at the end for the
+dnl final (r*2^n)%(d*2^n) and shift. This increases the threshold by about
+dnl 40/16=3.
+dnl
+dnl PIC adds between 4 and 7 cycles (not sure why it varies), but this
+dnl doesn't change the thresholds.
+dnl
+dnl The entry sequence code that chooses between MUL_NORM_THRESHOLD and
+dnl MUL_UNNORM_THRESHOLD is a bit horrible, but it adds only 2 cycles
+dnl (branch free) and ensures the choice between div or mul is optimal.
+
+deflit(MUL_NORM_THRESHOLD, ifdef(`PIC',5,5))
+deflit(MUL_UNNORM_THRESHOLD, ifdef(`PIC',8,8))
+
+deflit(MUL_NORM_DELTA, eval(MUL_NORM_THRESHOLD - MUL_UNNORM_THRESHOLD))
+
+
+defframe(PARAM_INVERSE, 16) dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY, 16) dnl mpn_mod_1c
+defframe(PARAM_DIVISOR, 12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+dnl re-using parameter space
+define(VAR_NORM, `PARAM_DIVISOR')
+define(VAR_INVERSE, `PARAM_SIZE')
+
+ TEXT
+
+ ALIGN(8)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+ pushl %ebp FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_SRC, %esi
+ movl PARAM_SIZE, %edx
+
+ pushl %edi FRAME_pushl()
+ pushl %ebx FRAME_pushl()
+
+ movl PARAM_DIVISOR, %ebp
+ movl PARAM_INVERSE, %eax
+
+ movl -4(%esi,%edx,4), %edi C src high limb
+ leal -8(%esi,%edx,4), %esi C &src[size-2]
+
+ movl $0, VAR_NORM
+ decl %edx
+
+ jnz L(start_preinv)
+
+ subl %ebp, %edi C src-divisor
+ popl %ebx
+
+ sbbl %ecx, %ecx C -1 if underflow
+ movl %edi, %eax C src-divisor
+
+ andl %ebp, %ecx C d if underflow
+ popl %edi
+
+ addl %ecx, %eax C remainder, with possible addback
+ popl %esi
+
+ popl %ebp
+
+ ret
+
+EPILOGUE()
+
+
+ ALIGN(8)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+
+ movl PARAM_DIVISOR, %eax
+ movl PARAM_SIZE, %ecx
+
+ sarl $31, %eax C d highbit
+ movl PARAM_CARRY, %edx
+
+ orl %ecx, %ecx
+ jz L(done_edx) C result==carry if size==0
+
+ andl $MUL_NORM_DELTA, %eax
+ pushl %ebp FRAME_pushl()
+
+ addl $MUL_UNNORM_THRESHOLD, %eax C norm or unnorm thresh
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_SRC, %esi
+ movl PARAM_DIVISOR, %ebp
+
+ cmpl %eax, %ecx
+ jb L(divide_top)
+
+ movl %edx, %eax C carry as pretend src high limb
+ leal 1(%ecx), %edx C size+1
+
+ cmpl $0x1000000, %ebp
+ jmp L(mul_by_inverse_1c)
+
+EPILOGUE()
+
+
+ ALIGN(8)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ pushl %ebp FRAME_pushl()
+
+ orl %ecx, %ecx
+ jz L(done_zero)
+
+ movl PARAM_SRC, %eax
+ movl PARAM_DIVISOR, %ebp
+
+ sarl $31, %ebp C -1 if divisor normalized
+ movl -4(%eax,%ecx,4), %eax C src high limb
+
+ movl PARAM_DIVISOR, %edx
+ pushl %esi FRAME_pushl()
+
+ andl $MUL_NORM_DELTA, %ebp
+ cmpl %edx, %eax C carry flag if high<divisor
+
+ sbbl %edx, %edx C -1 if high<divisor
+ addl $MUL_UNNORM_THRESHOLD, %ebp C norm or unnorm thresh
+
+ addl %edx, %ecx C size-1 if high<divisor
+ jz L(done_eax)
+
+ cmpl %ebp, %ecx
+ movl PARAM_DIVISOR, %ebp
+
+ movl PARAM_SRC, %esi
+ jae L(mul_by_inverse)
+
+ andl %eax, %edx C high as initial carry if high<divisor
+
+
+L(divide_top):
+ C eax scratch (quotient)
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx scratch (remainder)
+ C esi src
+ C edi
+ C ebp divisor
+
+ movl -4(%esi,%ecx,4), %eax
+
+ divl %ebp
+
+ decl %ecx
+ jnz L(divide_top)
+
+
+ popl %esi
+ popl %ebp
+
+L(done_edx):
+ movl %edx, %eax
+
+ ret
+
+
+L(done_zero):
+ xorl %eax, %eax
+ popl %ebp
+
+ ret
+
+
+C -----------------------------------------------------------------------------
+C
+C The divisor is normalized using the same code as the pentium
+C count_leading_zeros in longlong.h. Going through the GOT for PIC costs a
+C couple of cycles, but is more or less unavoidable.
+
+
+ ALIGN(8)
+L(mul_by_inverse):
+ C eax src high limb
+ C ebx
+ C ecx size or size-1
+ C edx
+ C esi src
+ C edi
+ C ebp divisor
+
+ movl PARAM_SIZE, %edx
+ cmpl $0x1000000, %ebp
+
+L(mul_by_inverse_1c):
+ sbbl %ecx, %ecx
+ cmpl $0x10000, %ebp
+
+ sbbl $0, %ecx
+ cmpl $0x100, %ebp
+
+ sbbl $0, %ecx
+ pushl %edi FRAME_pushl()
+
+ pushl %ebx FRAME_pushl()
+ movl %ebp, %ebx C d
+
+ifdef(`PIC',`
+ call L(here)
+L(here):
+ popl %edi
+ leal 25(,%ecx,8), %ecx C 0,-1,-2,-3 -> 25,17,9,1
+
+ shrl %cl, %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %edi
+
+ C AGI
+ movl __clz_tab@GOT(%edi), %edi
+ addl $-34, %ecx
+
+ C AGI
+ movb (%ebx,%edi), %bl
+
+',`
+ leal 25(,%ecx,8), %ecx C 0,-1,-2,-3 -> 25,17,9,1
+
+ shrl %cl, %ebx
+ addl $-34, %ecx
+
+ C AGI
+ movb __clz_tab(%ebx), %bl
+')
+ movl %eax, %edi C carry -> n1
+
+ addl %ebx, %ecx C -34 + c + __clz_tab[d>>c] = -clz-1
+ leal -8(%esi,%edx,4), %esi C &src[size-2]
+
+ xorl $-1, %ecx C clz
+ movl $-1, %edx
+
+ ASSERT(e,`pushl %eax C clz calculation same as bsrl
+ bsrl %ebp, %eax
+ xorl $31, %eax
+ cmpl %eax, %ecx
+ popl %eax')
+
+ shll %cl, %ebp C d normalized
+ movl %ecx, VAR_NORM
+
+ subl %ebp, %edx C (b-d)-1, so edx:eax = b*(b-d)-1
+ movl $-1, %eax
+
+ divl %ebp C floor (b*(b-d)-1) / d
+
+L(start_preinv):
+ movl %eax, VAR_INVERSE
+ movl %ebp, %eax C d
+
+ movl %ecx, %edx C fake high, will cancel
+
+
+C For mpn_mod_1 and mpn_preinv_mod_1, the initial carry in %edi is the src
+C high limb, and this may be greater than the divisor and may need one copy
+C of the divisor subtracted (only one, because the divisor is normalized).
+C This is accomplished by having the initial ecx:edi act as a fake previous
+C n2:n10. The initial edx:eax is d, acting as a fake (q1+1)*d which is
+C subtracted from ecx:edi, with the usual addback if it produces an
+C underflow.
+
+
+L(inverse_top):
+ C eax scratch (n10, n1, q1, etc)
+ C ebx scratch (nadj, src limit)
+ C ecx old n2
+ C edx scratch
+ C esi src pointer, &src[size-2] to &src[0]
+ C edi old n10
+ C ebp d
+
+ subl %eax, %edi C low n - (q1+1)*d
+ movl (%esi), %eax C new n10
+
+ sbbl %edx, %ecx C high n - (q1+1)*d, 0 or -1
+ movl %ebp, %ebx C d
+
+ sarl $31, %eax C -n1
+ andl %ebp, %ecx C d if underflow
+
+ addl %edi, %ecx C remainder -> n2, and possible addback
+ ASSERT(b,`cmpl %ebp, %ecx')
+ andl %eax, %ebx C -n1 & d
+
+ movl (%esi), %edi C n10
+ andl $1, %eax C n1
+
+ addl %ecx, %eax C n2+n1
+ addl %edi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ addl %eax, %ebx C low(m*(n2+n1) + nadj), giving carry flag
+ leal 1(%ecx), %eax C 1+n2
+
+ adcl %edx, %eax C 1 + high[n2<<32 + m*(n2+n1) + nadj] = q1+1
+ movl PARAM_SRC, %ebx
+
+ sbbl $0, %eax C use q1 if q1+1 overflows
+ subl $4, %esi C step src ptr
+
+ mull %ebp C (q1+1)*d
+
+ cmpl %ebx, %esi
+ jae L(inverse_top)
+
+
+
+ C %edi (after subtract and addback) is the remainder modulo d*2^n
+ C and must be reduced to 0<=r<d by calculating r*2^n mod d*2^n and
+ C right shifting by n.
+ C
+ C If d was already normalized on entry so that n==0 then nothing is
+ C needed here. This is always the case for preinv_mod_1. For mod_1
+ C or mod_1c the chance of n==0 is low, but about 40 cycles can be
+ C saved.
+
+ subl %eax, %edi C low n - (q1+1)*d
+ movl %ecx, %ebx C n2
+
+ sbbl %edx, %ebx C high n - (q1+1)*d, 0 or -1
+ xorl %esi, %esi C next n2
+
+ andl %ebp, %ebx C d if underflow
+ movl VAR_NORM, %ecx
+
+ addl %ebx, %edi C remainder, with possible addback
+ orl %ecx, %ecx
+
+ jz L(done_mul_edi)
+
+
+ C Here using %esi=n2 and %edi=n10, unlike the above
+
+ shldl( %cl, %edi, %esi) C n2
+
+ shll %cl, %edi C n10
+
+ movl %edi, %eax C n10
+ movl %edi, %ebx C n10
+
+ sarl $31, %ebx C -n1
+
+ shrl $31, %eax C n1
+ andl %ebp, %ebx C -n1 & d
+
+ addl %esi, %eax C n2+n1
+ addl %edi, %ebx C nadj = n10 + (-n1 & d), ignoring overflow
+
+ mull VAR_INVERSE C m*(n2+n1)
+
+ addl %eax, %ebx C m*(n2+n1) + nadj, low giving carry flag
+ leal 1(%esi), %eax C 1+n2
+
+ adcl %edx, %eax C 1 + high(n2<<32 + m*(n2+n1) + nadj) = q1+1
+
+ sbbl $0, %eax C use q1 if q1+1 overflows
+
+ mull %ebp C (q1+1)*d
+
+ subl %eax, %edi C low n - (q1+1)*d
+ popl %ebx
+
+ sbbl %edx, %esi C high n - (q1+1)*d, 0 or -1
+ movl %edi, %eax
+
+ andl %ebp, %esi C d if underflow
+ popl %edi
+
+ addl %esi, %eax C addback if underflow
+ popl %esi
+
+ shrl %cl, %eax C denorm remainder
+ popl %ebp
+
+ ret
+
+
+L(done_mul_edi):
+ movl %edi, %eax
+ popl %ebx
+
+ popl %edi
+L(done_eax):
+ popl %esi
+
+ popl %ebp
+
+ ret
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/pentium/mod_34lsub1.asm b/gmp/mpn/x86/pentium/mod_34lsub1.asm
index 2d88223b84..201081a437 100644
--- a/gmp/mpn/x86/pentium/mod_34lsub1.asm
+++ b/gmp/mpn/x86/pentium/mod_34lsub1.asm
@@ -1,32 +1,21 @@
dnl Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mode1o.asm b/gmp/mpn/x86/pentium/mode1o.asm
index eb2790e1a0..222f64e5cb 100644
--- a/gmp/mpn/x86/pentium/mode1o.asm
+++ b/gmp/mpn/x86/pentium/mode1o.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_modexact_1_odd -- exact division style remainder.
-dnl Copyright 2000-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -133,7 +122,7 @@ dnl non-PIC
subl %eax, %ecx C inv = 2*inv - inv*inv*d
pushl %esi FRAME_pushl()
- ASSERT(e,` C d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C d*inv == 1 mod 2^BITS_PER_MP_LIMB
movl %ecx, %eax
imull PARAM_DIVISOR, %eax
cmpl $1, %eax')
diff --git a/gmp/mpn/x86/pentium/mul_1.asm b/gmp/mpn/x86/pentium/mul_1.asm
index a0858af2b4..c6b255c322 100644
--- a/gmp/mpn/x86/pentium/mul_1.asm
+++ b/gmp/mpn/x86/pentium/mul_1.asm
@@ -2,32 +2,21 @@ dnl Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
dnl Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
dnl Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mul_2.asm b/gmp/mpn/x86/pentium/mul_2.asm
index 4c7beb5df2..36a025c425 100644
--- a/gmp/mpn/x86/pentium/mul_2.asm
+++ b/gmp/mpn/x86/pentium/mul_2.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/mul_basecase.asm b/gmp/mpn/x86/pentium/mul_basecase.asm
index 50e15d3567..fd24fdf7fa 100644
--- a/gmp/mpn/x86/pentium/mul_basecase.asm
+++ b/gmp/mpn/x86/pentium/mul_basecase.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
-dnl Copyright 1996, 1998-2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/popcount.asm b/gmp/mpn/x86/pentium/popcount.asm
index b8d84ad2e2..df53bb8842 100644
--- a/gmp/mpn/x86/pentium/popcount.asm
+++ b/gmp/mpn/x86/pentium/popcount.asm
@@ -1,32 +1,21 @@
dnl Intel P5 mpn_popcount -- mpn bit population count.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/rshift.asm b/gmp/mpn/x86/pentium/rshift.asm
index 2105c4c935..949b0d2e2f 100644
--- a/gmp/mpn/x86/pentium/rshift.asm
+++ b/gmp/mpn/x86/pentium/rshift.asm
@@ -1,32 +1,22 @@
dnl Intel Pentium mpn_rshift -- mpn right shift.
-dnl Copyright 1992, 1994-1996, 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium/sqr_basecase.asm b/gmp/mpn/x86/pentium/sqr_basecase.asm
index b11d767da2..e4fca7c546 100644
--- a/gmp/mpn/x86/pentium/sqr_basecase.asm
+++ b/gmp/mpn/x86/pentium/sqr_basecase.asm
@@ -1,32 +1,21 @@
dnl Intel P5 mpn_sqr_basecase -- square an mpn number.
-dnl Copyright 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/README b/gmp/mpn/x86/pentium4/README
index 90f752e5d5..8dc0479f04 100644
--- a/gmp/mpn/x86/pentium4/README
+++ b/gmp/mpn/x86/pentium4/README
@@ -3,28 +3,17 @@ Copyright 2001 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
diff --git a/gmp/mpn/x86/pentium4/copyd.asm b/gmp/mpn/x86/pentium4/copyd.asm
index 82af81c522..491ad60128 100644
--- a/gmp/mpn/x86/pentium4/copyd.asm
+++ b/gmp/mpn/x86/pentium4/copyd.asm
@@ -1,32 +1,22 @@
dnl Pentium-4 mpn_copyd -- copy limb vector, decrementing.
-
-dnl Copyright 1999-2001 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
+
+dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl The std/rep/movsl/cld is very slow for small blocks on pentium4. Its
diff --git a/gmp/mpn/x86/pentium4/copyi.asm b/gmp/mpn/x86/pentium4/copyi.asm
index b6148879fa..bf812c822b 100644
--- a/gmp/mpn/x86/pentium4/copyi.asm
+++ b/gmp/mpn/x86/pentium4/copyi.asm
@@ -1,32 +1,22 @@
dnl Pentium-4 mpn_copyi -- copy limb vector, incrementing.
-
-dnl Copyright 1999-2001 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
+
+dnl Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl The rep/movsl is very slow for small blocks on pentium4. Its startup
diff --git a/gmp/mpn/x86/pentium4/mmx/lshift.asm b/gmp/mpn/x86/pentium4/mmx/lshift.asm
index b5eca66698..5d316d5da4 100644
--- a/gmp/mpn/x86/pentium4/mmx/lshift.asm
+++ b/gmp/mpn/x86/pentium4/mmx/lshift.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-4 mpn_lshift -- left shift.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/mmx/popham.asm b/gmp/mpn/x86/pentium4/mmx/popham.asm
index 9563cb57e4..2e79816821 100644
--- a/gmp/mpn/x86/pentium4/mmx/popham.asm
+++ b/gmp/mpn/x86/pentium4/mmx/popham.asm
@@ -1,33 +1,22 @@
dnl Intel Pentium 4 mpn_popcount, mpn_hamdist -- population count and
dnl hamming distance.
-dnl Copyright 2000-2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2007 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/mmx/rshift.asm b/gmp/mpn/x86/pentium4/mmx/rshift.asm
index 3ac0094a5a..a7dec54a3a 100644
--- a/gmp/mpn/x86/pentium4/mmx/rshift.asm
+++ b/gmp/mpn/x86/pentium4/mmx/rshift.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-4 mpn_rshift -- right shift.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/sse2/add_n.asm b/gmp/mpn/x86/pentium4/sse2/add_n.asm
index 8e2380e493..04c0c68d0e 100644
--- a/gmp/mpn/x86/pentium4/sse2/add_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/add_n.asm
@@ -1,44 +1,36 @@
dnl Intel Pentium-4 mpn_add_n -- mpn addition.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C dst!=src1,2 dst==src1 dst==src2
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 4 6 6
-C P4 model 3-4 (Prescott) 4.25 7.5 7.5
+C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C 6.0 cycles/limb if dst==src1 or dst==src2
+C P4 Prescott: >= 5 cycles/limb
+
+C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l
+C for a basic adc loop.
defframe(PARAM_CARRY,20)
defframe(PARAM_SIZE, 16)
@@ -54,25 +46,29 @@ define(SAVE_EBX,`PARAM_SRC1')
PROLOGUE(mpn_add_nc)
deflit(`FRAME',0)
+
movd PARAM_CARRY, %mm0
jmp L(start_nc)
+
EPILOGUE()
ALIGN(8)
PROLOGUE(mpn_add_n)
deflit(`FRAME',0)
+
pxor %mm0, %mm0
+
L(start_nc):
- mov PARAM_SRC1, %eax
- mov %ebx, SAVE_EBX
- mov PARAM_SRC2, %ebx
- mov PARAM_DST, %edx
- mov PARAM_SIZE, %ecx
+ movl PARAM_SRC1, %eax
+ movl %ebx, SAVE_EBX
+ movl PARAM_SRC2, %ebx
+ movl PARAM_DST, %edx
+ movl PARAM_SIZE, %ecx
- lea (%eax,%ecx,4), %eax C src1 end
- lea (%ebx,%ecx,4), %ebx C src2 end
- lea (%edx,%ecx,4), %edx C dst end
- neg %ecx C -size
+ leal (%eax,%ecx,4), %eax C src1 end
+ leal (%ebx,%ecx,4), %ebx C src2 end
+ leal (%edx,%ecx,4), %edx C dst end
+ negl %ecx C -size
L(top):
C eax src1 end
@@ -90,11 +86,12 @@ L(top):
psrlq $32, %mm0
- add $1, %ecx
+ addl $1, %ecx
jnz L(top)
+
movd %mm0, %eax
- mov SAVE_EBX, %ebx
+ movl SAVE_EBX, %ebx
emms
ret
diff --git a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
index 93b63b2018..46b0903c50 100644
--- a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
@@ -1,45 +1,33 @@
dnl Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y.
-dnl Copyright 2001-2004, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
+dnl Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C dst!=src1,2 dst==src1 dst==src2
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 4.25 6 6
-C P4 model 3-4 (Prescott) 5 8.5 8.5
+C cycles/limb (approx)
+C dst!=src1,2 dst==src1 dst==src2
+C P4 m2: 4.5 ?7.25 ?6.75
+C P4 m3: 5.3 ? ?
+C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C
C The slightly strange combination of indexing and pointer incrementing
C that's used seems to work best. Not sure why, but %ecx,4 with src1 and/or
C src2 is a slowdown.
@@ -63,18 +51,18 @@ define(SAVE_EBX,`PARAM_SRC1')
PROLOGUE(mpn_addlsh1_n)
deflit(`FRAME',0)
- mov PARAM_SRC1, %eax
- mov %ebx, SAVE_EBX
+ movl PARAM_SRC1, %eax
+ movl %ebx, SAVE_EBX
- mov PARAM_SRC2, %ebx
+ movl PARAM_SRC2, %ebx
pxor %mm0, %mm0 C initial carry
- mov PARAM_DST, %edx
+ movl PARAM_DST, %edx
- mov PARAM_SIZE, %ecx
+ movl PARAM_SIZE, %ecx
- lea (%edx,%ecx,4), %edx C dst end
- neg %ecx C -size
+ leal (%edx,%ecx,4), %edx C dst end
+ negl %ecx C -size
L(top):
C eax src1 end
@@ -83,24 +71,24 @@ L(top):
C edx dst end
C mm0 carry
- movd (%ebx), %mm2
movd (%eax), %mm1
+ movd (%ebx), %mm2
psrlq $32, %mm0
- lea 4(%eax), %eax
- lea 4(%ebx), %ebx
+ leal 4(%eax), %eax
+ leal 4(%ebx), %ebx
- psllq $1, %mm2
+ paddq %mm2, %mm1
paddq %mm2, %mm1
paddq %mm1, %mm0
movd %mm0, (%edx,%ecx,4)
- add $1, %ecx
+ addl $1, %ecx
jnz L(top)
psrlq $32, %mm0
- mov SAVE_EBX, %ebx
+ movl SAVE_EBX, %ebx
movd %mm0, %eax
emms
ret
diff --git a/gmp/mpn/x86/pentium4/sse2/addmul_1.asm b/gmp/mpn/x86/pentium4/sse2/addmul_1.asm
index 78102072bf..3a8d0bb9bd 100644
--- a/gmp/mpn/x86/pentium4/sse2/addmul_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/addmul_1.asm
@@ -1,48 +1,37 @@
dnl mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
-
+dnl Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) 5.24
-C P6 model 13 (Dothan) 5.24
-C P4 model 0-1 (Willamette) 5
-C P4 model 2 (Northwood) 5
-C P4 model 3-4 (Prescott) 5
-
C TODO:
C * Tweak eax/edx offsets in loop as to save some lea's
C * Perhaps software pipeline small-case code
+C cycles/limb
+C P6 model 0-8,10-12) -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 5.24
+C P4 model 0-1 (Willamette): 5
+C P4 model 2 (Northwood): 5
+C P4 model 3-4 (Prescott): 5
+
C INPUT PARAMETERS
C rp sp + 4
C up sp + 8
@@ -51,13 +40,22 @@ C v0 sp + 16
TEXT
ALIGN(16)
+PROLOGUE(mpn_addmul_1c)
+ mov 4(%esp), %edx
+ mov 8(%esp), %eax
+ mov 12(%esp), %ecx
+ movd 16(%esp), %mm7
+ movd 20(%esp), %mm6
+ jmp L(ent)
+EPILOGUE()
+ ALIGN(16)
PROLOGUE(mpn_addmul_1)
- pxor %mm6, %mm6
-L(ent): mov 4(%esp), %edx
+ mov 4(%esp), %edx
mov 8(%esp), %eax
mov 12(%esp), %ecx
movd 16(%esp), %mm7
- cmp $4, %ecx
+ pxor %mm6, %mm6
+L(ent): cmp $4, %ecx
jnc L(big)
L(lp0): movd (%eax), %mm0
@@ -183,7 +181,3 @@ L(end): pmuludq %mm7, %mm2
emms
ret
EPILOGUE()
-PROLOGUE(mpn_addmul_1c)
- movd 20(%esp), %mm6
- jmp L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm
deleted file mode 100644
index 354300e4de..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/bdiv_dbm1c.asm
+++ /dev/null
@@ -1,141 +0,0 @@
-dnl Intel Atom mpn_bdiv_dbm1.
-
-dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C cycles/limb
-C P5 -
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) 9.75
-C P6 model 13 (Dothan)
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
-C P4 model 2 (Northwood) 8.25
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C Intel Atom 8
-C AMD K6 -
-C AMD K7 -
-C AMD K8
-C AMD K10
-
-C TODO: This code was optimised for atom-32, consider moving it back to atom
-C dir(atom currently grabs this code), and write a 4-way version(7c/l).
-
-defframe(PARAM_CARRY,20)
-defframe(PARAM_MUL, 16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
-dnl re-use parameter space
-define(SAVE_RP,`PARAM_MUL')
-define(SAVE_UP,`PARAM_SIZE')
-
-define(`rp', `%edi')
-define(`up', `%esi')
-define(`n', `%ecx')
-define(`reg', `%edx')
-define(`cy', `%eax') C contains the return value
-
-ASM_START()
- TEXT
- ALIGN(16)
-deflit(`FRAME',0)
-
-PROLOGUE(mpn_bdiv_dbm1c)
- mov PARAM_SIZE, n C size
- mov up, SAVE_UP
- mov PARAM_SRC, up
- movd PARAM_MUL, %mm7
- mov rp, SAVE_RP
- mov PARAM_DST, rp
-
- movd (up), %mm0
- pmuludq %mm7, %mm0
- shr n
- mov PARAM_CARRY, cy
- jz L(eq1)
-
- movd 4(up), %mm1
- jc L(odd)
-
- lea 4(up), up
- pmuludq %mm7, %mm1
- movd %mm0, reg
- psrlq $32, %mm0
- sub reg, cy
- movd %mm0, reg
- movq %mm1, %mm0
- dec n
- mov cy, (rp)
- lea 4(rp), rp
- jz L(end)
-
-C ALIGN(16)
-L(top): movd 4(up), %mm1
- sbb reg, cy
-L(odd): movd %mm0, reg
- psrlq $32, %mm0
- pmuludq %mm7, %mm1
- sub reg, cy
- lea 8(up), up
- movd %mm0, reg
- movd (up), %mm0
- mov cy, (rp)
- sbb reg, cy
- movd %mm1, reg
- psrlq $32, %mm1
- sub reg, cy
- movd %mm1, reg
- pmuludq %mm7, %mm0
- dec n
- mov cy, 4(rp)
- lea 8(rp), rp
- jnz L(top)
-
-L(end): sbb reg, cy
-
-L(eq1): movd %mm0, reg
- psrlq $32, %mm0
- mov SAVE_UP, up
- sub reg, cy
- movd %mm0, reg
- emms
- mov cy, (rp)
- sbb reg, cy
-
- mov SAVE_RP, rp
- ret
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm b/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm
deleted file mode 100644
index f7f461d56f..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/bdiv_q_1.asm
+++ /dev/null
@@ -1,233 +0,0 @@
-dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
-
-dnl Rearranged from mpn/x86/pentium4/sse2/dive_1.asm by Marco Bodrato.
-
-dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C P4: 19.0 cycles/limb
-
-C Pairs of movd's are used to avoid unaligned loads. Despite the loads not
-C being on the dependent chain and there being plenty of cycles available,
-C using an unaligned movq on every second iteration measured about 23 c/l.
-C
-
-defframe(PARAM_SHIFT, 24)
-defframe(PARAM_INVERSE,20)
-defframe(PARAM_DIVISOR,16)
-defframe(PARAM_SIZE, 12)
-defframe(PARAM_SRC, 8)
-defframe(PARAM_DST, 4)
-
- TEXT
-
-C mp_limb_t
-C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
-C mp_limb_t inverse, int shift)
- ALIGN(32)
-PROLOGUE(mpn_pi1_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_SIZE, %edx
-
- movl PARAM_SRC, %eax
-
- movl PARAM_DIVISOR, %ecx
-
- movd %ecx, %mm6
- movl PARAM_SHIFT, %ecx
-
- movd %ecx, %mm7 C shift
-
- C
-
- movl PARAM_INVERSE, %ecx
- movd %ecx, %mm5 C inv
-
- movl PARAM_DST, %ecx
- pxor %mm1, %mm1 C initial carry limb
- pxor %mm0, %mm0 C initial carry bit
-
- subl $1, %edx
- jz L(done)
-
- pcmpeqd %mm4, %mm4
- psrlq $32, %mm4 C 0x00000000FFFFFFFF
-
-C The dependent chain here is as follows.
-C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
-C
-C Yet the loop measures 19.0 c/l, so obviously there's something gained
-C there over a straight reading of the chip documentation.
-
-L(top):
- C eax src, incrementing
- C ebx
- C ecx dst, incrementing
- C edx counter, size-1 iterations
- C
- C mm0 carry bit
- C mm1 carry limb
- C mm4 0x00000000FFFFFFFF
- C mm5 inverse
- C mm6 divisor
- C mm7 shift
-
- movd (%eax), %mm2
- movd 4(%eax), %mm3
- addl $4, %eax
- punpckldq %mm3, %mm2
-
- psrlq %mm7, %mm2
- pand %mm4, %mm2 C src
- psubq %mm0, %mm2 C src - cbit
-
- psubq %mm1, %mm2 C src - cbit - climb
- movq %mm2, %mm0
- psrlq $63, %mm0 C new cbit
-
- pmuludq %mm5, %mm2 C s*inverse
- movd %mm2, (%ecx) C q
- addl $4, %ecx
-
- movq %mm6, %mm1
- pmuludq %mm2, %mm1 C q*divisor
- psrlq $32, %mm1 C new climb
-
-L(entry):
- subl $1, %edx
- jnz L(top)
-
-L(done):
- movd (%eax), %mm2
- psrlq %mm7, %mm2 C src
- psubq %mm0, %mm2 C src - cbit
-
- psubq %mm1, %mm2 C src - cbit - climb
-
- pmuludq %mm5, %mm2 C s*inverse
- movd %mm2, (%ecx) C q
-
- emms
- ret
-
-EPILOGUE()
-
- ALIGN(16)
-C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C
-PROLOGUE(mpn_bdiv_q_1)
-deflit(`FRAME',0)
-
- movl PARAM_SIZE, %edx
-
- movl PARAM_DIVISOR, %ecx
-
- C eax src
- C ebx
- C ecx divisor
- C edx size-1
-
- movl %ecx, %eax
- bsfl %ecx, %ecx C trailing twos
-
- shrl %cl, %eax C d = divisor without twos
- movd %eax, %mm6
- movd %ecx, %mm7 C shift
-
- shrl %eax C d/2
-
- andl $127, %eax C d/2, 7 bits
-
-ifdef(`PIC',`
- LEA( binvert_limb_table, %ecx)
- movzbl (%eax,%ecx), %eax C inv 8 bits
-',`
- movzbl binvert_limb_table(%eax), %eax C inv 8 bits
-')
-
- C
-
- movd %eax, %mm5 C inv
-
- movd %eax, %mm0 C inv
-
- pmuludq %mm5, %mm5 C inv*inv
-
- C
-
- pmuludq %mm6, %mm5 C inv*inv*d
- paddd %mm0, %mm0 C 2*inv
-
- C
-
- psubd %mm5, %mm0 C inv = 2*inv - inv*inv*d
- pxor %mm5, %mm5
-
- paddd %mm0, %mm5
- pmuludq %mm0, %mm0 C inv*inv
-
- pcmpeqd %mm4, %mm4
- psrlq $32, %mm4 C 0x00000000FFFFFFFF
-
- C
-
- pmuludq %mm6, %mm0 C inv*inv*d
- paddd %mm5, %mm5 C 2*inv
-
- movl PARAM_SRC, %eax
- movl PARAM_DST, %ecx
- pxor %mm1, %mm1 C initial carry limb
-
- C
-
- psubd %mm0, %mm5 C inv = 2*inv - inv*inv*d
-
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
- pushl %eax FRAME_pushl()
- movq %mm6, %mm0
- pmuludq %mm5, %mm0
- movd %mm0, %eax
- cmpl $1, %eax
- popl %eax FRAME_popl()')
-
- pxor %mm0, %mm0 C initial carry bit
- jmp L(entry)
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm b/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm
deleted file mode 100644
index b3f3474e67..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/cnd_add_n.asm
+++ /dev/null
@@ -1,95 +0,0 @@
-dnl Intel Pentium-4 mpn_cnd_add_n -- mpn addition.
-
-dnl Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 4.67
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 5
-C P4 model 3-4 (Prescott) 5.25
-
-defframe(PARAM_SIZE, 20)
-defframe(PARAM_SRC2, 16)
-defframe(PARAM_SRC1, 12)
-defframe(PARAM_DST, 8)
-defframe(PARAM_CND, 4)
-
-dnl re-use parameter space
-define(SAVE_EBX,`PARAM_SRC1')
-
-define(`cnd', `%mm3')
-
- TEXT
- ALIGN(8)
-
- ALIGN(8)
-PROLOGUE(mpn_cnd_add_n)
-deflit(`FRAME',0)
- pxor %mm0, %mm0
-
- mov PARAM_CND, %eax
- neg %eax
- sbb %eax, %eax
- movd %eax, cnd
-
- mov PARAM_SRC1, %eax
- mov %ebx, SAVE_EBX
- mov PARAM_SRC2, %ebx
- mov PARAM_DST, %edx
- mov PARAM_SIZE, %ecx
-
- lea (%eax,%ecx,4), %eax C src1 end
- lea (%ebx,%ecx,4), %ebx C src2 end
- lea (%edx,%ecx,4), %edx C dst end
- neg %ecx C -size
-
-L(top): movd (%ebx,%ecx,4), %mm2
- movd (%eax,%ecx,4), %mm1
- pand cnd, %mm2
- paddq %mm2, %mm1
-
- paddq %mm1, %mm0
- movd %mm0, (%edx,%ecx,4)
-
- psrlq $32, %mm0
-
- add $1, %ecx
- jnz L(top)
-
- movd %mm0, %eax
- mov SAVE_EBX, %ebx
- emms
- ret
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm b/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm
deleted file mode 100644
index 339a23e0b6..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/cnd_sub_n.asm
+++ /dev/null
@@ -1,114 +0,0 @@
-dnl Intel Pentium-4 mpn_cnd_sub_n -- mpn subtraction.
-
-dnl Copyright 2001, 2002, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 4.67
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 5
-C P4 model 3-4 (Prescott) 5.25
-
-defframe(PARAM_SIZE, 20)
-defframe(PARAM_SRC2, 16)
-defframe(PARAM_SRC1, 12)
-defframe(PARAM_DST, 8)
-defframe(PARAM_CND, 4)
-
-dnl re-use parameter space
-define(SAVE_EBX,`PARAM_SRC1')
-
-define(`cnd', `%mm3')
-
- TEXT
- ALIGN(8)
-
- ALIGN(8)
-PROLOGUE(mpn_cnd_sub_n)
-deflit(`FRAME',0)
- pxor %mm0, %mm0
-
- mov PARAM_CND, %eax
- neg %eax
- sbb %eax, %eax
- movd %eax, cnd
-
- mov PARAM_SRC1, %eax
- mov %ebx, SAVE_EBX
- mov PARAM_SRC2, %ebx
- mov PARAM_DST, %edx
- mov PARAM_SIZE, %ecx
-
- lea (%eax,%ecx,4), %eax C src1 end
- lea (%ebx,%ecx,4), %ebx C src2 end
- lea (%edx,%ecx,4), %edx C dst end
- neg %ecx C -size
-
-L(top): movd (%ebx,%ecx,4), %mm2
- movd (%eax,%ecx,4), %mm1
- pand cnd, %mm2
- psubq %mm2, %mm1
-
- psubq %mm0, %mm1
- movd %mm1, (%edx,%ecx,4)
-
- psrlq $63, %mm1
-
- add $1, %ecx
- jz L(done_mm1)
-
- movd (%ebx,%ecx,4), %mm2
- movd (%eax,%ecx,4), %mm0
- pand cnd, %mm2
- psubq %mm2, %mm0
-
- psubq %mm1, %mm0
- movd %mm0, (%edx,%ecx,4)
-
- psrlq $63, %mm0
-
- add $1, %ecx
- jnz L(top)
-
- movd %mm0, %eax
- mov SAVE_EBX, %ebx
- emms
- ret
-
-L(done_mm1):
- movd %mm1, %eax
- mov SAVE_EBX, %ebx
- emms
- ret
-
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/dive_1.asm b/gmp/mpn/x86/pentium4/sse2/dive_1.asm
index 238f0dd8a5..c50ef7d29e 100644
--- a/gmp/mpn/x86/pentium4/sse2/dive_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/dive_1.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -137,7 +126,7 @@ ifdef(`PIC',`
psubd %mm0, %mm5 C inv = 2*inv - inv*inv*d
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax FRAME_pushl()
movq %mm6, %mm0
pmuludq %mm5, %mm0
@@ -150,13 +139,13 @@ ifdef(`PIC',`
C The dependent chain here is as follows.
C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
C
C Yet the loop measures 19.0 c/l, so obviously there's something gained
C there over a straight reading of the chip documentation.
diff --git a/gmp/mpn/x86/pentium4/sse2/divrem_1.asm b/gmp/mpn/x86/pentium4/sse2/divrem_1.asm
index 0146fab117..7f973dbf98 100644
--- a/gmp/mpn/x86/pentium4/sse2/divrem_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/divrem_1.asm
@@ -1,32 +1,22 @@
dnl Intel Pentium-4 mpn_divrem_1 -- mpn by limb division.
-dnl Copyright 1999-2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h b/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h
index a94ae868b3..5071aae092 100644
--- a/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h
+++ b/gmp/mpn/x86/pentium4/sse2/gmp-mparam.h
@@ -1,206 +1,68 @@
/* Intel Pentium-4 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000-2005, 2007-2010, 2014 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
+2009 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-or
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
-or both in parallel, as here.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 2600 MHz P4 Northwood */
-/* FFT tuning limit = 12500000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.2 */
-
-#define MOD_1_NORM_THRESHOLD 24
-#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 13
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 2
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 2
-#define DIV_QR_1_NORM_THRESHOLD 19
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 20
-
-#define MUL_TOOM22_THRESHOLD 29
-#define MUL_TOOM33_THRESHOLD 113
-#define MUL_TOOM44_THRESHOLD 288
-#define MUL_TOOM6H_THRESHOLD 454
-#define MUL_TOOM8H_THRESHOLD 592
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 118
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 214
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 193
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 186
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 287
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 44
-#define SQR_TOOM3_THRESHOLD 173
-#define SQR_TOOM4_THRESHOLD 390
-#define SQR_TOOM6_THRESHOLD 0 /* always */
-#define SQR_TOOM8_THRESHOLD 915
-
-#define MULMID_TOOM42_THRESHOLD 66
-
-#define MULMOD_BNM1_THRESHOLD 19
-#define SQRMOD_BNM1_THRESHOLD 23
-
-#define MUL_FFT_MODF_THRESHOLD 1147 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 1147, 5}, { 36, 6}, { 19, 5}, { 39, 6}, \
- { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \
- { 43, 7}, { 23, 6}, { 49, 7}, { 27, 6}, \
- { 55, 7}, { 31, 6}, { 63, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
- { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
- { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \
- { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \
- { 47, 9}, { 111,11}, { 31,10}, { 63, 9}, \
- { 143,10}, { 79, 9}, { 159,10}, { 111,11}, \
- { 63,10}, { 127, 9}, { 255,10}, { 159, 9}, \
- { 319,11}, { 95,10}, { 207,12}, { 63,11}, \
- { 127,10}, { 287,11}, { 159,10}, { 335,11}, \
- { 191,10}, { 383,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 511,11}, { 319,10}, { 671,11}, \
- { 351,12}, { 191,11}, { 383,10}, { 799,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1055, 9}, \
- { 2111,10}, { 1119, 9}, { 2239,11}, { 607,12}, \
- { 319,11}, { 671,10}, { 1407,11}, { 735,10}, \
- { 1471, 9}, { 2943,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 863,10}, { 1727, 9}, { 3455,12}, \
- { 447,11}, { 895,13}, { 255,12}, { 511,11}, \
- { 1055,10}, { 2111,11}, { 1119,10}, { 2239, 9}, \
- { 4479,12}, { 575,11}, { 1247,10}, { 2495, 9}, \
- { 4991,12}, { 639,11}, { 1471,10}, { 2943,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,10}, { 3455,12}, { 895,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2047,12}, { 1087,11}, \
- { 2239,10}, { 4479,12}, { 1215,11}, { 2495,10}, \
- { 4991,13}, { 639,12}, { 1471,11}, { 2943,10}, \
- { 5887,11}, { 3007,13}, { 767,12}, { 1727,11}, \
- { 3455,13}, { 895,12}, { 1791,11}, { 3711,12}, \
- { 1983,11}, { 3967,10}, { 7935,14}, { 511,13}, \
- { 1023,12}, { 2239,11}, { 4479,13}, { 1151,12}, \
- { 2495,11}, { 4991,13}, { 1279,12}, { 2623,13}, \
- { 1407,12}, { 2943,11}, { 5887,12}, { 3007,14}, \
- { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 158
-#define MUL_FFT_THRESHOLD 7808
-
-#define SQR_FFT_MODF_THRESHOLD 896 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 896, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \
- { 17, 5}, { 35, 6}, { 19, 5}, { 39, 6}, \
- { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
- { 47, 7}, { 27, 6}, { 55, 7}, { 31, 6}, \
- { 63, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \
- { 23, 7}, { 51, 8}, { 27, 7}, { 55, 8}, \
- { 31, 7}, { 63, 8}, { 39, 7}, { 79, 8}, \
- { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
- { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 127,10}, { 79, 9}, { 159,10}, { 95, 9}, \
- { 191,11}, { 63,10}, { 127, 9}, { 255,10}, \
- { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
- { 543,11}, { 159,10}, { 319, 9}, { 639,11}, \
- { 191,10}, { 383, 9}, { 767,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 511, 9}, { 1055,10}, \
- { 543,11}, { 287,10}, { 607,11}, { 319,12}, \
- { 191,11}, { 383,10}, { 767,13}, { 127,12}, \
- { 255,11}, { 511,10}, { 1055,11}, { 543,10}, \
- { 1119, 9}, { 2239,11}, { 607,12}, { 319,11}, \
- { 671,10}, { 1407,11}, { 735,10}, { 1471, 9}, \
- { 2943,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 863,10}, { 1727,12}, { 447,11}, { 991,13}, \
- { 255,12}, { 511,11}, { 1055,10}, { 2111,11}, \
- { 1119,10}, { 2239,12}, { 575,11}, { 1247,10}, \
- { 2495,12}, { 639,11}, { 1471,10}, { 2943,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1727,10}, { 3455,12}, { 959,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \
- { 2239,10}, { 4479,12}, { 1215,11}, { 2495,13}, \
- { 639,12}, { 1471,11}, { 2943,10}, { 5887,13}, \
- { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \
- { 1791,11}, { 3711,12}, { 1983,11}, { 3967,10}, \
- { 7935,14}, { 511,13}, { 1023,12}, { 2239,11}, \
- { 4479,13}, { 1151,12}, { 2495,11}, { 4991,13}, \
- { 1279,12}, { 2623,13}, { 1407,12}, { 2943,11}, \
- { 5887,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 159
-#define SQR_FFT_THRESHOLD 7296
-
-#define MULLO_BASECASE_THRESHOLD 12
-#define MULLO_DC_THRESHOLD 55
-#define MULLO_MUL_N_THRESHOLD 14709
-
-#define DC_DIV_QR_THRESHOLD 38
-#define DC_DIVAPPR_Q_THRESHOLD 77
-#define DC_BDIV_QR_THRESHOLD 51
-#define DC_BDIV_Q_THRESHOLD 85
-
-#define INV_MULMOD_BNM1_THRESHOLD 56
-#define INV_NEWTON_THRESHOLD 121
-#define INV_APPR_THRESHOLD 93
-
-#define BINV_NEWTON_THRESHOLD 366
-#define REDC_1_TO_REDC_N_THRESHOLD 64
-
-#define MU_DIV_QR_THRESHOLD 2350
-#define MU_DIVAPPR_Q_THRESHOLD 2172
-#define MUPI_DIV_QR_THRESHOLD 62
-#define MU_BDIV_QR_THRESHOLD 2172
-#define MU_BDIV_Q_THRESHOLD 2304
-
-#define POWM_SEC_TABLE 1,19,102,615,2111
-
-#define MATRIX22_STRASSEN_THRESHOLD 23
-#define HGCD_THRESHOLD 88
-#define HGCD_APPR_THRESHOLD 93
-#define HGCD_REDUCE_THRESHOLD 5010
-#define GCD_DC_THRESHOLD 379
-#define GCDEXT_DC_THRESHOLD 258
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 26
-#define SET_STR_DC_THRESHOLD 147
-#define SET_STR_PRECOMPUTE_THRESHOLD 894
-
-#define FAC_DSC_THRESHOLD 906
-#define FAC_ODD_THRESHOLD 28
+/* 2600 MHz Pentium 4 model 2 */
+
+/* Generated by tuneup.c, 2009-01-06, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD 31
+#define MUL_TOOM3_THRESHOLD 119
+#define MUL_TOOM44_THRESHOLD 178
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_KARATSUBA_THRESHOLD 49
+#define SQR_TOOM3_THRESHOLD 165
+#define SQR_TOOM4_THRESHOLD 252
+
+#define MULLOW_BASECASE_THRESHOLD 15
+#define MULLOW_DC_THRESHOLD 44
+#define MULLOW_MUL_N_THRESHOLD 363
+
+#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIV_DC_THRESHOLD 33
+#define POWM_THRESHOLD 95
+
+#define MATRIX22_STRASSEN_THRESHOLD 23
+#define HGCD_THRESHOLD 64
+#define GCD_DC_THRESHOLD 310
+#define GCDEXT_DC_THRESHOLD 310
+#define JACOBI_BASE_METHOD 1
+
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define USE_PREINV_MOD_1 1 /* native */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 26
+#define SET_STR_DC_THRESHOLD 118
+#define SET_STR_PRECOMPUTE_THRESHOLD 1078
+
+#define MUL_FFT_TABLE { 560, 928, 1920, 5632, 14336, 40960, 0 }
+#define MUL_FFT_MODF_THRESHOLD 720
+#define MUL_FFT_THRESHOLD 9216
+
+#define SQR_FFT_TABLE { 592, 928, 1920, 4608, 14336, 40960, 0 }
+#define SQR_FFT_MODF_THRESHOLD 608
+#define SQR_FFT_THRESHOLD 5888
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1.asm b/gmp/mpn/x86/pentium4/sse2/mod_1.asm
new file mode 100644
index 0000000000..0e95f13913
--- /dev/null
+++ b/gmp/mpn/x86/pentium4/sse2/mod_1.asm
@@ -0,0 +1,391 @@
+dnl Intel Pentium-4 mpn_mod_1 -- mpn by limb remainder.
+
+dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+dnl P4: 31 cycles/limb.
+
+
+C mp_limb_t mpn_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor);
+C mp_limb_t mpn_mod_1c (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t carry);
+C mp_limb_t mpn_preinv_mod_1 (mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse);
+C
+C An idea was tried in the mul-by-inverse to process the last limb by a jump
+C back to the top of the loop skipping the -4(%esi) fetch. But that seemed
+C to produce slightly strange timings, like 9 and 10 limb operations about
+C the same speed. The jump would be successively taken and not-taken, which
+C in theory should predict ok, but perhaps isn't enjoyed by the chip.
+C Duplicating the loop for the last limb seems to be a couple of cycles
+C quicker too.
+C
+C Enhancements:
+C
+C The loop measures 31 cycles, but the dependent chain would suggest it
+C could be done with 30. Not sure where to start looking for the extra
+C cycle.
+
+
+dnl MUL_THRESHOLD is the size at which the multiply by inverse method is
+dnl used, rather than plain "divl"s. Minimum value 2.
+dnl
+dnl The inverse takes about 80-90 cycles to calculate, but after that the
+dnl multiply is 31 c/l versus division at about 58 c/l.
+
+deflit(MUL_THRESHOLD, 5)
+
+
+defframe(PARAM_INVERSE,16) dnl mpn_preinv_mod_1
+defframe(PARAM_CARRY, 16) dnl mpn_mod_1c
+defframe(PARAM_DIVISOR,12)
+defframe(PARAM_SIZE, 8)
+defframe(PARAM_SRC, 4)
+
+dnl re-use parameter space
+define(SAVE_ESI,`PARAM_SIZE')
+define(SAVE_EBP,`PARAM_SRC')
+
+ TEXT
+
+ ALIGN(16)
+PROLOGUE(mpn_preinv_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, SAVE_ESI
+ movl $32, %eax
+
+ movd %eax, %mm6 C l = 0, so 32-l = 32
+ movl PARAM_SRC, %esi
+ movl %ebp, SAVE_EBP
+
+ movd PARAM_DIVISOR, %mm5
+ pxor %mm7, %mm7 C l = 0
+
+ movd -4(%esi,%ecx,4), %mm0 C src high limb
+ leal -8(%esi,%ecx,4), %esi C &src[size-2]
+
+ movd PARAM_INVERSE, %mm4
+ subl $2, %ecx C size-2
+
+ psubq %mm5, %mm0 C high-divisor
+ movq %mm0, %mm2
+
+ psrlq $32, %mm0 C -1 if underflow
+
+ pand %mm5, %mm0 C divisor if underflow
+
+ paddq %mm2, %mm0 C addback if underflow
+ jz L(inverse_last) C if size==2
+ ja L(inverse_top) C if size>2
+
+
+ C if size==1
+ movl SAVE_ESI, %esi
+ movd %mm0, %eax
+ emms
+ ret
+
+EPILOGUE()
+
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1c)
+deflit(`FRAME',0)
+ movl PARAM_SIZE, %ecx
+ movl %esi, SAVE_ESI
+
+ movl PARAM_SRC, %esi
+ movl %ebp, SAVE_EBP
+
+ movl PARAM_CARRY, %edx
+ orl %ecx, %ecx
+ jz L(divide_done) C result==carry if size==0
+
+ movl PARAM_DIVISOR, %ebp
+ jmp L(start_1c)
+
+EPILOGUE()
+
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %ecx
+ movl %esi, SAVE_ESI
+
+ movl PARAM_SRC, %esi
+ movl %ebp, SAVE_EBP
+
+ movl PARAM_DIVISOR, %ebp
+ xorl %edx, %edx C result 0 if size==0
+
+ orl %ecx, %ecx
+ jz L(divide_done)
+ movl -4(%esi,%ecx,4), %eax C src high limb
+
+ leal -1(%ecx), %edx
+ cmpl %ebp, %eax C c if high<divisor
+
+ cmovc( %edx, %ecx) C size-1 if high<divisor
+
+ movl $0, %edx C initial carry
+ cmovc( %eax, %edx) C src high limb if high<divisor
+
+ orl %ecx, %ecx
+ jz L(divide_done) C if size==1 and skip div
+
+
+L(start_1c):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ leal -4(%esi,%ecx,4), %esi C &src[size-1]
+ cmpl $MUL_THRESHOLD, %ecx
+ jae L(mul_by_inverse)
+
+
+L(divide_top):
+ C eax
+ C ebx
+ C ecx counter, limbs, decrementing
+ C edx remainder
+ C esi src, decrementing
+ C edi
+ C ebp divisor
+
+ movl (%esi), %eax
+ subl $4, %esi
+
+ divl %ebp
+
+ subl $1, %ecx
+ jnz L(divide_top)
+
+
+L(divide_done):
+ movl SAVE_ESI, %esi
+ movl SAVE_EBP, %ebp
+ movl %edx, %eax
+ ret
+
+
+C -----------------------------------------------------------------------------
+
+L(mul_by_inverse):
+ C eax
+ C ebx
+ C ecx size
+ C edx carry
+ C esi src
+ C edi
+ C ebp divisor
+
+ bsrl %ebp, %eax C 31-l
+
+ movd %edx, %mm1 C carry
+ movl %ecx, %edx C size
+ movl $31, %ecx
+
+ C
+
+ xorl %eax, %ecx C l = leading zeros on d
+ addl $1, %eax C 32-l
+
+ shll %cl, %ebp C normalize d
+ movd %ecx, %mm7 C l
+ leal -1(%edx), %ecx C size-1
+
+ movd %eax, %mm6 C 32-l
+ movl $-1, %edx
+ movl $-1, %eax
+
+ C
+
+ subl %ebp, %edx C (b-d)-1 so edx:eax = b*(b-d)-1
+
+ divl %ebp C floor (b*(b-d)-1 / d)
+
+ movd %ebp, %mm5 C d
+ movd (%esi), %mm0 C src high limb
+ punpckldq %mm1, %mm0
+ psrlq %mm6, %mm0 C n2 = high (carry:srchigh << l)
+
+ C
+
+ movd %eax, %mm4 C m
+
+
+C The dependent chain here consists of
+C
+C 2 paddd n1+n2
+C 8 pmuludq m*(n1+n2)
+C 2 paddq n2:nadj + m*(n1+n2)
+C 2 psrlq q1
+C 8 pmuludq d*q1
+C 2 psubq (n-d)-q1*d
+C 2 psrlq high mask
+C 2 pand d masked
+C 2 paddd n2+d addback
+C --
+C 30
+C
+C But it seems to run at 31 cycles, so presumably there's something else
+C going on.
+
+
+ ALIGN(16)
+L(inverse_top):
+ C eax
+ C ebx
+ C ecx counter, size-1 to 1
+ C edx
+ C esi src, decrementing
+ C edi
+ C ebp
+ C
+ C mm0 n2
+ C mm4 m
+ C mm5 d
+ C mm6 32-l
+ C mm7 l
+
+ ASSERT(b,`C n2<d
+ movd %mm0, %eax
+ movd %mm5, %edx
+ cmpl %edx, %eax')
+
+ movd -4(%esi), %mm1 C next src limbs
+ movd (%esi), %mm2
+ leal -4(%esi), %esi
+
+ punpckldq %mm2, %mm1
+ psrlq %mm6, %mm1 C n10
+
+ movq %mm1, %mm2 C n10
+ movq %mm1, %mm3 C n10
+ psrad $31, %mm1 C -n1
+ pand %mm5, %mm1 C -n1 & d
+ paddd %mm2, %mm1 C nadj = n10+(-n1&d), ignore overflow
+
+ psrld $31, %mm2 C n1
+ paddd %mm0, %mm2 C n2+n1
+ punpckldq %mm0, %mm1 C n2:nadj
+
+ pmuludq %mm4, %mm2 C m*(n2+n1)
+
+ paddq %mm2, %mm1 C n2:nadj + m*(n2+n1)
+
+ psrlq $32, %mm1 C q1 = high(n2:nadj + m*(n2+n1))
+
+ pmuludq %mm5, %mm1 C q1*d
+ punpckldq %mm0, %mm3 C n
+ psubq %mm5, %mm3 C n - d
+ pxor %mm0, %mm0
+
+ psubq %mm1, %mm3 C n - (q1+1)*d
+
+ por %mm3, %mm0 C remainder -> n2
+ psrlq $32, %mm3 C high n - (q1+1)*d, 0 or -1
+
+ ASSERT(be,`C 0 or -1
+ movd %mm3, %eax
+ addl $1, %eax
+ cmpl $1, %eax')
+
+ pand %mm5, %mm3 C mask & d
+
+ paddd %mm3, %mm0 C addback if necessary
+
+ subl $1, %ecx
+ jnz L(inverse_top)
+
+
+ C Least significant limb.
+ C Same code as the loop, but there's no -4(%esi) limb to fetch.
+
+L(inverse_last):
+ C eax
+ C ebx
+ C ecx
+ C edx
+ C esi &src[0]
+ C
+ C mm0 n2
+ C mm4 m
+ C mm5 d
+ C mm6 32-l
+ C mm7 l
+
+ movd (%esi), %mm1 C src[0]
+ psllq %mm7, %mm1 C n10
+
+ movq %mm1, %mm2 C n10
+ movq %mm1, %mm3 C n10
+ psrad $31, %mm1 C -n1
+ pand %mm5, %mm1 C -n1 & d
+ paddd %mm2, %mm1 C nadj = n10+(-n1&d), ignore overflow
+
+ psrld $31, %mm2 C n1
+ paddd %mm0, %mm2 C n2+n1
+ punpckldq %mm0, %mm1 C n2:nadj
+
+ pmuludq %mm4, %mm2 C m*(n2+n1)
+
+ paddq %mm2, %mm1 C n2:nadj + m*(n2+n1)
+
+ psrlq $32, %mm1 C q1 = high(n2:nadj + m*(n2+n1))
+
+ pmuludq %mm5, %mm1 C q1*d
+ punpckldq %mm0, %mm3 C n
+ psubq %mm5, %mm3 C n - d
+ pxor %mm0, %mm0
+
+ psubq %mm1, %mm3 C n - (q1+1)*d
+
+ por %mm3, %mm0 C remainder -> n2
+ psrlq $32, %mm3 C high n - (q1+1)*d, 0 or -1
+
+ ASSERT(be,`C 0 or -1
+ movd %mm3, %eax
+ addl $1, %eax
+ cmpl $1, %eax')
+
+ movl SAVE_EBP, %ebp
+ pand %mm5, %mm3 C mask & d
+
+ movl SAVE_ESI, %esi
+ paddd %mm3, %mm0 C addback if necessary
+
+ psrld %mm7, %mm0
+
+ movd %mm0, %eax
+
+ emms
+ ret
+
+EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm b/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm
deleted file mode 100644
index ee88babeee..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/mod_1_1.asm
+++ /dev/null
@@ -1,166 +0,0 @@
-dnl x86-32 mpn_mod_1_1p for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-
-dnl Contributed to the GNU project by Torbjorn Granlund.
-
-dnl Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO:
-C * Optimize. The present code was written quite straightforwardly.
-C * Optimize post-loop reduction code; it is from mod_1s_4p, thus overkill.
-C * Write a cps function that uses sse2 insns.
-
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 16
-C P4 model 3-4 (Prescott) 18
-
-C INPUT PARAMETERS
-C ap sp + 4
-C n sp + 8
-C b sp + 12
-C cps sp + 16
-
-define(`B1modb', `%mm1')
-define(`B2modb', `%mm2')
-define(`ap', `%edx')
-define(`n', `%eax')
-
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_mod_1_1p)
- push %ebx
- mov 8(%esp), ap
- mov 12(%esp), n
- mov 20(%esp), %ecx
- movd 8(%ecx), B1modb
- movd 12(%ecx), B2modb
-
- lea -4(ap,n,4), ap
-
-C FIXME: See comment in generic/mod_1_1.c.
- movd (ap), %mm7
- movd -4(ap), %mm4
- pmuludq B1modb, %mm7
- paddq %mm4, %mm7
- add $-2, n
- jz L(end)
-
- ALIGN(8)
-L(top): movq %mm7, %mm6
- psrlq $32, %mm7 C rh
- movd -8(ap), %mm0
- add $-4, ap
- pmuludq B2modb, %mm7
- pmuludq B1modb, %mm6
- add $-1, n
- paddq %mm0, %mm7
- paddq %mm6, %mm7
- jnz L(top)
-
-L(end): pcmpeqd %mm4, %mm4
- psrlq $32, %mm4 C 0x00000000FFFFFFFF
- pand %mm7, %mm4 C rl
- psrlq $32, %mm7 C rh
- pmuludq B1modb, %mm7 C rh,cl
- paddq %mm4, %mm7 C rh,rl
- movd 4(%ecx), %mm4 C cnt
- psllq %mm4, %mm7 C rh,rl normalized
- movq %mm7, %mm2 C rl in low half
- psrlq $32, %mm7 C rh
- movd (%ecx), %mm1 C bi
- pmuludq %mm7, %mm1 C qh,ql
- paddq %mm2, %mm1 C qh-1,ql
- movd %mm1, %ecx C ql
- psrlq $32, %mm1 C qh-1
- movd 16(%esp), %mm3 C b
- pmuludq %mm1, %mm3 C (qh-1) * b
- psubq %mm3, %mm2 C r in low half (could use psubd)
- movd %mm2, %eax C r
- mov 16(%esp), %ebx
- sub %ebx, %eax C r
- cmp %eax, %ecx
- lea (%eax,%ebx), %edx
- cmovc( %edx, %eax)
- movd %mm4, %ecx C cnt
- cmp %ebx, %eax
- jae L(fix)
- emms
- pop %ebx
- shr %cl, %eax
- ret
-
-L(fix): sub %ebx, %eax
- emms
- pop %ebx
- shr %cl, %eax
- ret
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps)
-C CAUTION: This is the same code as in k7/mod_1_1.asm
- push %ebp
- mov 12(%esp), %ebp
- push %esi
- bsr %ebp, %ecx
- push %ebx
- xor $31, %ecx
- mov 16(%esp), %esi
- sal %cl, %ebp
- mov %ebp, %edx
- not %edx
- mov $-1, %eax
- div %ebp
- mov %eax, (%esi) C store bi
- mov %ecx, 4(%esi) C store cnt
- xor %ebx, %ebx
- sub %ebp, %ebx
- mov $1, %edx
- shld %cl, %eax, %edx
- imul %edx, %ebx
- mul %ebx
- add %ebx, %edx
- not %edx
- imul %ebp, %edx
- add %edx, %ebp
- cmp %edx, %eax
- cmovc( %ebp, %edx)
- shr %cl, %ebx
- mov %ebx, 8(%esi) C store B1modb
- shr %cl, %edx
- mov %edx, 12(%esi) C store B2modb
- pop %ebx
- pop %esi
- pop %ebp
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm b/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm
deleted file mode 100644
index eb2edb6297..0000000000
--- a/gmp/mpn/x86/pentium4/sse2/mod_1_4.asm
+++ /dev/null
@@ -1,269 +0,0 @@
-dnl x86-32 mpn_mod_1s_4p for Pentium 4 and P6 models with SSE2 (i.e. 9,D,E,F).
-
-dnl Contributed to the GNU project by Torbjorn Granlund.
-
-dnl Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO:
-C * Optimize. The present code was written quite straightforwardly.
-C * Optimize post-loop reduction code.
-C * Write a cps function that uses sse2 insns.
-
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 3.4
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 4
-C P4 model 3-4 (Prescott) 4.5
-
-C INPUT PARAMETERS
-C ap sp + 4
-C n sp + 8
-C b sp + 12
-C cps sp + 16
-
-define(`B1modb', `%mm1')
-define(`B2modb', `%mm2')
-define(`B3modb', `%mm3')
-define(`B4modb', `%mm4')
-define(`B5modb', `%mm5')
-define(`ap', `%edx')
-define(`n', `%eax')
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p)
- push %ebx
- mov 8(%esp), ap
- mov 12(%esp), n
- mov 20(%esp), %ecx
-
- movd 8(%ecx), B1modb
- movd 12(%ecx), B2modb
- movd 16(%ecx), B3modb
- movd 20(%ecx), B4modb
- movd 24(%ecx), B5modb
-
- mov n, %ebx
- lea -4(ap,n,4), ap
- and $3, %ebx
- je L(b0)
- cmp $2, %ebx
- jc L(b1)
- je L(b2)
-
-L(b3): movd -4(ap), %mm7
- pmuludq B1modb, %mm7
- movd -8(ap), %mm6
- paddq %mm6, %mm7
- movd (ap), %mm6
- pmuludq B2modb, %mm6
- paddq %mm6, %mm7
- lea -24(ap), ap
- add $-3, n
- jz L(end)
- jmp L(top)
-
-L(b0): movd -8(ap), %mm7
- pmuludq B1modb, %mm7
- movd -12(ap), %mm6
- paddq %mm6, %mm7
- movd -4(ap), %mm6
- pmuludq B2modb, %mm6
- paddq %mm6, %mm7
- movd (ap), %mm6
- pmuludq B3modb, %mm6
- paddq %mm6, %mm7
- lea -28(ap), ap
- add $-4, n
- jz L(end)
- jmp L(top)
-
-L(b1): movd (ap), %mm7
- lea -16(ap), ap
- dec n
- jz L(x)
- jmp L(top)
-
-L(b2): movd -4(ap), %mm7 C rl
- punpckldq (ap), %mm7 C rh
- lea -20(ap), ap
- add $-2, n
- jz L(end)
-
- ALIGN(8)
-L(top): movd 4(ap), %mm0
- pmuludq B1modb, %mm0
- movd 0(ap), %mm6
- paddq %mm6, %mm0
-
- movd 8(ap), %mm6
- pmuludq B2modb, %mm6
- paddq %mm6, %mm0
-
- movd 12(ap), %mm6
- pmuludq B3modb, %mm6
- paddq %mm6, %mm0
-
- movq %mm7, %mm6
- psrlq $32, %mm7 C rh
- pmuludq B5modb, %mm7
- pmuludq B4modb, %mm6
-
- paddq %mm0, %mm7
- paddq %mm6, %mm7
-
- add $-16, ap
- add $-4, n
- jnz L(top)
-
-L(end): pcmpeqd %mm4, %mm4
- psrlq $32, %mm4 C 0x00000000FFFFFFFF
- pand %mm7, %mm4 C rl
- psrlq $32, %mm7 C rh
- pmuludq B1modb, %mm7 C rh,cl
- paddq %mm4, %mm7 C rh,rl
-L(x): movd 4(%ecx), %mm4 C cnt
- psllq %mm4, %mm7 C rh,rl normalized
- movq %mm7, %mm2 C rl in low half
- psrlq $32, %mm7 C rh
- movd (%ecx), %mm1 C bi
- pmuludq %mm7, %mm1 C qh,ql
- paddq %mm2, %mm1 C qh-1,ql
- movd %mm1, %ecx C ql
- psrlq $32, %mm1 C qh-1
- movd 16(%esp), %mm3 C b
- pmuludq %mm1, %mm3 C (qh-1) * b
- psubq %mm3, %mm2 C r in low half (could use psubd)
- movd %mm2, %eax C r
- mov 16(%esp), %ebx
- sub %ebx, %eax C r
- cmp %eax, %ecx
- lea (%eax,%ebx), %edx
- cmovc( %edx, %eax)
- movd %mm4, %ecx C cnt
- cmp %ebx, %eax
- jae L(fix)
- emms
- pop %ebx
- shr %cl, %eax
- ret
-
-L(fix): sub %ebx, %eax
- emms
- pop %ebx
- shr %cl, %eax
- ret
-EPILOGUE()
-
- ALIGN(16)
-PROLOGUE(mpn_mod_1s_4p_cps)
-C CAUTION: This is the same code as in k7/mod_1_4.asm
- push %ebp
- push %edi
- push %esi
- push %ebx
- mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx
- mov 24(%esp), %ebx
- bsr %ebx, %ecx
- xor $31, %ecx
- sal %cl, %ebx C b << cnt
- mov %ebx, %edx
- not %edx
- mov $-1, %eax
- div %ebx
- xor %edi, %edi
- sub %ebx, %edi
- mov $1, %esi
- mov %eax, (%ebp) C store bi
- mov %ecx, 4(%ebp) C store cnt
- shld %cl, %eax, %esi
- imul %edi, %esi
- mov %eax, %edi
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 8(%ebp) C store B1modb
-
- not %edx
- imul %ebx, %edx
- lea (%edx,%ebx), %esi
- cmp %edx, %eax
- cmovnc( %edx, %esi)
- mov %edi, %eax
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 12(%ebp) C store B2modb
-
- not %edx
- imul %ebx, %edx
- lea (%edx,%ebx), %esi
- cmp %edx, %eax
- cmovnc( %edx, %esi)
- mov %edi, %eax
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 16(%ebp) C store B3modb
-
- not %edx
- imul %ebx, %edx
- lea (%edx,%ebx), %esi
- cmp %edx, %eax
- cmovnc( %edx, %esi)
- mov %edi, %eax
- mul %esi
-
- add %esi, %edx
- shr %cl, %esi
- mov %esi, 20(%ebp) C store B4modb
-
- not %edx
- imul %ebx, %edx
- add %edx, %ebx
- cmp %edx, %eax
- cmovnc( %edx, %ebx)
-
- shr %cl, %ebx
- mov %ebx, 24(%ebp) C store B5modb
-
- pop %ebx
- pop %esi
- pop %edi
- pop %ebp
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm b/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm
index 31e25b79bc..1598b41785 100644
--- a/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mod_34lsub1.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium 4 mpn_mod_34lsub1 -- remainder modulo 2^24-1.
-dnl Copyright 2000-2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/sse2/mode1o.asm b/gmp/mpn/x86/pentium4/sse2/mode1o.asm
index 778c478169..2f0b177a00 100644
--- a/gmp/mpn/x86/pentium4/sse2/mode1o.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mode1o.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-4 mpn_modexact_1_odd -- mpn by limb exact remainder.
dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -112,7 +101,7 @@ ifdef(`PIC',`
psubd %mm0, %mm6 C inv = 2*inv - inv*inv*d
- ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ ASSERT(e,` C expect d*inv == 1 mod 2^BITS_PER_MP_LIMB
pushl %eax FRAME_pushl()
movd %mm6, %eax
imul PARAM_DIVISOR, %eax
@@ -124,13 +113,13 @@ ifdef(`PIC',`
C The dependent chain here is as follows.
C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
C
C Yet the loop measures 19.0 c/l, so obviously there's something gained
C there over a straight reading of the chip documentation.
diff --git a/gmp/mpn/x86/pentium4/sse2/mul_1.asm b/gmp/mpn/x86/pentium4/sse2/mul_1.asm
index 6347b8bf62..07be951921 100644
--- a/gmp/mpn/x86/pentium4/sse2/mul_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mul_1.asm
@@ -1,48 +1,37 @@
dnl mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
-
+dnl Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) 4.17
-C P6 model 13 (Dothan) 4.17
-C P4 model 0-1 (Willamette) 4
-C P4 model 2 (Northwood) 4
-C P4 model 3-4 (Prescott) 4.55
-
C TODO:
C * Tweak eax/edx offsets in loop as to save some lea's
C * Perhaps software pipeline small-case code
+C cycles/limb
+C P6 model 0-8,10-12) -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 4.17
+C P4 model 0-1 (Willamette): 4
+C P4 model 2 (Northwood): 4
+C P4 model 3-4 (Prescott): 4.55
+
C INPUT PARAMETERS
C rp sp + 4
C up sp + 8
@@ -51,13 +40,22 @@ C v0 sp + 16
TEXT
ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+ mov 4(%esp), %edx
+ mov 8(%esp), %eax
+ mov 12(%esp), %ecx
+ movd 16(%esp), %mm7
+ movd 20(%esp), %mm6
+ jmp L(ent)
+EPILOGUE()
+ ALIGN(16)
PROLOGUE(mpn_mul_1)
- pxor %mm6, %mm6
-L(ent): mov 4(%esp), %edx
+ mov 4(%esp), %edx
mov 8(%esp), %eax
mov 12(%esp), %ecx
movd 16(%esp), %mm7
- cmp $4, %ecx
+ pxor %mm6, %mm6
+L(ent): cmp $4, %ecx
jnc L(big)
L(lp0): movd (%eax), %mm0
@@ -158,7 +156,3 @@ L(end): pmuludq %mm7, %mm2
emms
ret
EPILOGUE()
-PROLOGUE(mpn_mul_1c)
- movd 20(%esp), %mm6
- jmp L(ent)
-EPILOGUE()
diff --git a/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm b/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm
index 6e3775ae09..2628e5eb72 100644
--- a/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm
+++ b/gmp/mpn/x86/pentium4/sse2/mul_basecase.asm
@@ -1,32 +1,21 @@
dnl mpn_mul_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
dnl Copyright 2001, 2002, 2005, 2007 Free Software Foundation, Inc.
-
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/sse2/popcount.asm b/gmp/mpn/x86/pentium4/sse2/popcount.asm
index b8238b9b66..cb982ade46 100644
--- a/gmp/mpn/x86/pentium4/sse2/popcount.asm
+++ b/gmp/mpn/x86/pentium4/sse2/popcount.asm
@@ -1,66 +1,52 @@
dnl X86-32 and X86-64 mpn_popcount using SSE2.
-dnl Copyright 2006, 2007, 2011 Free Software Foundation, Inc.
-
+dnl Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C 32-bit popcount hamdist
-C cycles/limb cycles/limb
-C P5 -
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 4
-C P4 model 0 (Willamette) ?
-C P4 model 1 (?) ?
-C P4 model 2 (Northwood) 3.9
-C P4 model 3 (Prescott) ?
-C P4 model 4 (Nocona) ?
-C AMD K6 -
-C AMD K7 -
-C AMD K8 ?
-
-C 64-bit popcount hamdist
-C cycles/limb cycles/limb
-C P4 model 4 (Nocona): 8
-C AMD K8,K9 7.5
-C AMD K10 3.5
-C Intel core2 3.68
-C Intel corei 3.15
-C Intel atom 10.8
-C VIA nano 6.5
+C 32-bit popcount hamdist
+C cycles/limb cycles/limb
+C P5: -
+C P6 model 0-8,10-12) -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 4
+C P4 model 0 (Willamette) ?
+C P4 model 1 (?) ?
+C P4 model 2 (Northwood) 3.9
+C P4 model 3 (Prescott) ?
+C P4 model 4 (Nocona) ?
+C K6: -
+C K7: -
+C K8: ?
+
+C 64-bit popcount hamdist
+C cycles/limb cycles/limb
+C P4 model 4 (Nocona): 8
+C K8: 7.5
+C K10: 3.5
+C P6-15: 3.68
C TODO
C * Make a mpn_hamdist based on this. Alignment could either be handled by
C using movdqu for one operand and movdqa for the other, or by painfully
-C shifting as we go. Unfortunately, there seem to be no usable shift
+C shifting as we go. Unfortunately, there seem to be no useable shift
C instruction, except for one that takes an immediate count.
C * It would probably be possible to cut a few cycles/limb using software
C pipelining.
diff --git a/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm b/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm
index f421d1323e..bbf43245cb 100644
--- a/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/rsh1add_n.asm
@@ -1,32 +1,21 @@
dnl Intel Pentium-4 mpn_rsh1add_n -- mpn (x+y)/2
-dnl Copyright 2001-2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm b/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm
index 2dd57d25d9..fc56f164ed 100644
--- a/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm
+++ b/gmp/mpn/x86/pentium4/sse2/sqr_basecase.asm
@@ -1,32 +1,21 @@
dnl mpn_sqr_basecase for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
-
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -41,7 +30,7 @@ C * Look into different loop alignment, we now expand the code about 50 bytes
C with possibly needless alignment.
C * Use OSP, should solve feed-in latency problems.
C * Address relative slowness for un<=3 for Pentium M. The old code is there
-C considerably faster. (1:20/14, 2:34:32, 3:66/57)
+C consideraly faster. (1:20/14, 2:34:32, 3:66/57)
C INPUT PARAMETERS
C rp sp + 4
diff --git a/gmp/mpn/x86/pentium4/sse2/sub_n.asm b/gmp/mpn/x86/pentium4/sse2/sub_n.asm
index 5ba1c018ec..02d5f01474 100644
--- a/gmp/mpn/x86/pentium4/sse2/sub_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/sub_n.asm
@@ -1,44 +1,37 @@
dnl Intel Pentium-4 mpn_sub_n -- mpn subtraction.
dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C dst!=src1,2 dst==src1 dst==src2
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 4 6 6
-C P4 model 3-4 (Prescott) 4.25 7.5 7.5
+C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
+C 6.0 cycles/limb if dst==src1 or dst==src2
+C P4 Prescott: >= 5 cycles/limb
+
+
+C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C The main loop code is 2x unrolled so that the carry bit can alternate
+C between mm0 and mm1.
defframe(PARAM_CARRY,20)
defframe(PARAM_SIZE, 16)
@@ -54,8 +47,10 @@ define(SAVE_EBX,`PARAM_SRC1')
PROLOGUE(mpn_sub_nc)
deflit(`FRAME',0)
+
movd PARAM_CARRY, %mm0
jmp L(start_nc)
+
EPILOGUE()
ALIGN(8)
@@ -63,16 +58,16 @@ PROLOGUE(mpn_sub_n)
deflit(`FRAME',0)
pxor %mm0, %mm0
L(start_nc):
- mov PARAM_SRC1, %eax
- mov %ebx, SAVE_EBX
- mov PARAM_SRC2, %ebx
- mov PARAM_DST, %edx
- mov PARAM_SIZE, %ecx
+ movl PARAM_SRC1, %eax
+ movl %ebx, SAVE_EBX
+ movl PARAM_SRC2, %ebx
+ movl PARAM_DST, %edx
+ movl PARAM_SIZE, %ecx
- lea (%eax,%ecx,4), %eax C src1 end
- lea (%ebx,%ecx,4), %ebx C src2 end
- lea (%edx,%ecx,4), %edx C dst end
- neg %ecx C -size
+ leal (%eax,%ecx,4), %eax C src1 end
+ leal (%ebx,%ecx,4), %ebx C src2 end
+ leal (%edx,%ecx,4), %edx C dst end
+ negl %ecx C -size
L(top):
C eax src1 end
@@ -90,7 +85,7 @@ L(top):
psrlq $63, %mm1
- add $1, %ecx
+ addl $1, %ecx
jz L(done_mm1)
movd (%eax,%ecx,4), %mm0
@@ -102,17 +97,18 @@ L(top):
psrlq $63, %mm0
- add $1, %ecx
+ addl $1, %ecx
jnz L(top)
+
movd %mm0, %eax
- mov SAVE_EBX, %ebx
+ movl SAVE_EBX, %ebx
emms
ret
L(done_mm1):
movd %mm1, %eax
- mov SAVE_EBX, %ebx
+ movl SAVE_EBX, %ebx
emms
ret
diff --git a/gmp/mpn/x86/pentium4/sse2/submul_1.asm b/gmp/mpn/x86/pentium4/sse2/submul_1.asm
index 020675bd7b..ceb41f2ac0 100644
--- a/gmp/mpn/x86/pentium4/sse2/submul_1.asm
+++ b/gmp/mpn/x86/pentium4/sse2/submul_1.asm
@@ -1,71 +1,60 @@
dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
dnl subtract the result from a second limb vector.
-dnl Copyright 2001, 2002, 2008, 2010 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) 6.8
-C P6 model 13 (Dothan) 6.9
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 5.87
-C P4 model 3-4 (Prescott) 6.5
+C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
+C (stepping 10).
-C This code represents a step forwards compared to the code available before
-C GMP 5.1, but it is not carefully tuned for either P6 or P4. In fact, it is
-C not good for P6. For P4 it saved a bit over 1 c/l for both Northwood and
-C Prescott compared to the old code.
+
+C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult);
+C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t mult, mp_limb_t carry);
+C
+C This code is not particularly good at 7 c/l. The dependent chain is only
+C 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that
+C speed isn't achieved.
C
C The arrangements made here to get a two instruction dependent chain are
-C slightly subtle. In the loop the carry (or borrow rather) is a negative so
-C that a paddq can be used to give a low limb ready to store, and a high limb
-C ready to become the new carry after a psrlq.
+C slightly subtle. In the loop the carry (or borrow rather) is a negative
+C so that a paddq can be used to give a low limb ready to store, and a high
+C limb ready to become the new carry after a psrlq.
C
-C If the carry was a simple twos complement negative then the psrlq shift would
-C need to bring in 0 bits or 1 bits according to whether the high was zero or
-C non-zero, since a non-zero value would represent a negative needing sign
-C extension. That wouldn't be particularly easy to arrange and certainly would
-C add an instruction to the dependent chain, so instead an offset is applied so
-C that the high limb will be 0xFFFFFFFF+c. With c in the range -0xFFFFFFFF to
-C 0, the value 0xFFFFFFFF+c is in the range 0 to 0xFFFFFFFF and is therefore
-C always positive and can always have 0 bits shifted in, which is what psrlq
-C does.
+C If the carry was a simple twos complement negative then the psrlq shift
+C would need to bring in 0 bits or 1 bits according to whether the high was
+C zero or non-zero, since a non-zero value would represent a negative
+C needing sign extension. That wouldn't be particularly easy to arrange and
+C certainly would add an instruction to the dependent chain, so instead an
+C offset is applied so that the high limb will be 0xFFFFFFFF+c. With c in
+C the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to
+C 0xFFFFFFFF and is therefore always positive and can always have 0 bits
+C shifted in, which is what psrlq does.
C
C The extra 0xFFFFFFFF must be subtracted before c is used, but that can be
C done off the dependent chain. The total adjustment then is to add
-C 0xFFFFFFFF00000000 to offset the new carry, and subtract 0x00000000FFFFFFFF
-C to remove the offset from the current carry, for a net add of
-C 0xFFFFFFFE00000001. In the code this is applied to the destination limb when
-C fetched.
+C 0xFFFFFFFF00000000 to offset the new carry, and subtract
+C 0x00000000FFFFFFFF to remove the offset from the current carry, for a net
+C add of 0xFFFFFFFE00000001. In the code this is applied to the destination
+C limb when fetched.
C
C It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement
C negative, which is how it's undone for the return value, but that doesn't
@@ -91,16 +80,16 @@ deflit(`FRAME',0)
pxor %mm1, %mm1 C initial borrow
L(start_1c):
- mov PARAM_SRC, %eax
+ movl PARAM_SRC, %eax
pcmpeqd %mm0, %mm0
movd PARAM_MULTIPLIER, %mm7
pcmpeqd %mm6, %mm6
- mov PARAM_DST, %edx
+ movl PARAM_DST, %edx
psrlq $32, %mm0 C 0x00000000FFFFFFFF
- mov PARAM_SIZE, %ecx
+ movl PARAM_SIZE, %ecx
psllq $32, %mm6 C 0xFFFFFFFF00000000
psubq %mm0, %mm6 C 0xFFFFFFFE00000001
@@ -108,75 +97,32 @@ L(start_1c):
psubq %mm1, %mm0 C 0xFFFFFFFF - borrow
- movd (%eax), %mm3 C up
- movd (%edx), %mm4 C rp
-
- add $-1, %ecx
- paddq %mm6, %mm4 C add 0xFFFFFFFE00000001
- pmuludq %mm7, %mm3
- jnz L(gt1)
- psubq %mm3, %mm4 C prod
- paddq %mm4, %mm0 C borrow
- movd %mm0, (%edx) C result
- jmp L(rt)
-
-L(gt1): movd 4(%eax), %mm1 C up
- movd 4(%edx), %mm2 C rp
-
- add $-1, %ecx
- jz L(eev)
-
- ALIGN(16)
-L(top): paddq %mm6, %mm2 C add 0xFFFFFFFE00000001
+ C eax src, incrementing
+ C ebx
+ C ecx loop counter, decrementing
+ C edx dst, incrementing
+ C
+ C mm0 0xFFFFFFFF - borrow
+ C mm6 0xFFFFFFFE00000001
+ C mm7 multiplier
+
+L(loop):
+ movd (%eax), %mm1 C src
+ leal 4(%eax), %eax
+ movd (%edx), %mm2 C dst
+ paddq %mm6, %mm2 C add 0xFFFFFFFE00000001
pmuludq %mm7, %mm1
- psubq %mm3, %mm4 C prod
- movd 8(%eax), %mm3 C up
- paddq %mm4, %mm0 C borrow
- movd 8(%edx), %mm4 C rp
- movd %mm0, (%edx) C result
- psrlq $32, %mm0
-
- add $-1, %ecx
- jz L(eod)
-
- paddq %mm6, %mm4 C add 0xFFFFFFFE00000001
- pmuludq %mm7, %mm3
psubq %mm1, %mm2 C prod
- movd 12(%eax), %mm1 C up
paddq %mm2, %mm0 C borrow
- movd 12(%edx), %mm2 C rp
- movd %mm0, 4(%edx) C result
- psrlq $32, %mm0
-
- lea 8(%eax), %eax
- lea 8(%edx), %edx
- add $-1, %ecx
- jnz L(top)
-
-
-L(eev): paddq %mm6, %mm2 C add 0xFFFFFFFE00000001
- pmuludq %mm7, %mm1
- psubq %mm3, %mm4 C prod
- paddq %mm4, %mm0 C borrow
+ subl $1, %ecx
movd %mm0, (%edx) C result
psrlq $32, %mm0
- psubq %mm1, %mm2 C prod
- paddq %mm2, %mm0 C borrow
- movd %mm0, 4(%edx) C result
-L(rt): psrlq $32, %mm0
+ leal 4(%edx), %edx
+ jnz L(loop)
+
movd %mm0, %eax
- not %eax
+ notl %eax
emms
ret
-L(eod): paddq %mm6, %mm4 C add 0xFFFFFFFE00000001
- pmuludq %mm7, %mm3
- psubq %mm1, %mm2 C prod
- paddq %mm2, %mm0 C borrow
- movd %mm0, 4(%edx) C result
- psrlq $32, %mm0
- psubq %mm3, %mm4 C prod
- paddq %mm4, %mm0 C borrow
- movd %mm0, 8(%edx) C result
- jmp L(rt)
EPILOGUE()
diff --git a/gmp/mpn/x86/rshift.asm b/gmp/mpn/x86/rshift.asm
index a60dcaa4b2..8e33eabd61 100644
--- a/gmp/mpn/x86/rshift.asm
+++ b/gmp/mpn/x86/rshift.asm
@@ -1,43 +1,33 @@
dnl x86 mpn_rshift -- mpn right shift.
-dnl Copyright 1992, 1994, 1996, 1999-2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Foundation, Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C P54 7.5
-C P55 7.0
-C P6 2.5
-C K6 4.5
-C K7 5.0
-C P4 16.5
+C P54: 7.5
+C P55: 7.0
+C P6: 2.5
+C K6: 4.5
+C K7: 5.0
+C P4: 16.5
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
diff --git a/gmp/mpn/x86/sec_tabselect.asm b/gmp/mpn/x86/sec_tabselect.asm
deleted file mode 100644
index c7c2e059f1..0000000000
--- a/gmp/mpn/x86/sec_tabselect.asm
+++ /dev/null
@@ -1,115 +0,0 @@
-dnl x86 mpn_sec_tabselect.
-
-dnl Copyright 2011 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb
-C P5 ?
-C P6 model 0-8,10-12 ?
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0 (Willamette) ?
-C P4 model 1 (?) ?
-C P4 model 2 (Northwood) 4.5
-C P4 model 3 (Prescott) ?
-C P4 model 4 (Nocona) ?
-C Intel Atom ?
-C AMD K6 ?
-C AMD K7 3.4
-C AMD K8 ?
-C AMD K10 ?
-
-C NOTES
-C * This has not been tuned for any specific processor. Its speed should not
-C be too bad, though.
-C * Using SSE2 could result in many-fold speedup.
-
-C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
-define(`rp', `%edi')
-define(`tp', `%esi')
-define(`n', `%ebx')
-define(`nents', `%ecx')
-define(`which', `36(%esp)')
-
-define(`i', `%ebp')
-define(`maskp', `20(%esp)')
-define(`maskn', `32(%esp)')
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_sec_tabselect)
- push %edi
- push %esi
- push %ebx
- push %ebp
- mov 20(%esp), rp
- mov 24(%esp), tp
- mov 28(%esp), n
- mov 32(%esp), nents
-
- lea (rp,n,4), rp
- lea (tp,n,4), tp
- sub nents, which
-L(outer):
- mov which, %eax
- add nents, %eax
- neg %eax C set CF iff 'which' != k
- sbb %eax, %eax
- mov %eax, maskn
- not %eax
- mov %eax, maskp
-
- mov n, i
- neg i
-
- ALIGN(16)
-L(top): mov (tp,i,4), %eax
- and maskp, %eax
- mov (rp,i,4), %edx
- and maskn, %edx
- or %edx, %eax
- mov %eax, (rp,i,4)
- inc i
- js L(top)
-
-L(end): mov n, %eax
- lea (tp,%eax,4), tp
- dec nents
- jne L(outer)
-
-L(outer_end):
- pop %ebp
- pop %ebx
- pop %esi
- pop %edi
- ret
-EPILOGUE()
diff --git a/gmp/mpn/x86/sqr_basecase.asm b/gmp/mpn/x86/sqr_basecase.asm
index 39f8a89805..9a7e13327b 100644
--- a/gmp/mpn/x86/sqr_basecase.asm
+++ b/gmp/mpn/x86/sqr_basecase.asm
@@ -1,43 +1,32 @@
dnl x86 generic mpn_sqr_basecase -- square an mpn number.
dnl Copyright 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
-
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/crossproduct cycles/triangleproduct
-C P5
-C P6
-C K6
-C K7
-C P4
+C P5:
+C P6:
+C K6:
+C K7:
+C P4:
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
diff --git a/gmp/mpn/x86/t-zdisp.sh b/gmp/mpn/x86/t-zdisp.sh
index 61efdd6c4f..6c55067b6c 100755
--- a/gmp/mpn/x86/t-zdisp.sh
+++ b/gmp/mpn/x86/t-zdisp.sh
@@ -2,31 +2,20 @@
#
# Copyright 2000 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
# Usage: cd $(builddir)/mpn
diff --git a/gmp/mpn/x86/t-zdisp2.pl b/gmp/mpn/x86/t-zdisp2.pl
index b441b6579a..d5e2d93dc0 100755
--- a/gmp/mpn/x86/t-zdisp2.pl
+++ b/gmp/mpn/x86/t-zdisp2.pl
@@ -2,31 +2,20 @@
#
# Copyright 2001, 2002 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
# Usage: cd $(builddir)/mpn
@@ -82,7 +71,7 @@ sub process {
}
}
-# Ensure we're using the right SQR_TOOM2_THRESHOLD for the part of the
+# Ensure we're using the right SQR_KARATSUBA_THRESHOLD for the part of the
# tree being processed.
sub process_mparam {
my $file = "$File::Find::dir/gmp-mparam.h";
@@ -90,10 +79,10 @@ sub process_mparam {
print "$file\n" if $opt{'t'};
open MPARAM, "<$file" or die;
while (<MPARAM>) {
- if (/^#define SQR_TOOM2_THRESHOLD[ \t]*([0-9][0-9]*)/) {
+ if (/^#define SQR_KARATSUBA_THRESHOLD[ \t]*([0-9][0-9]*)/) {
open KARA, ">$tempfile" or die;
- print KARA "define(\`SQR_TOOM2_THRESHOLD',$1)\n\n";
- print "define(\`SQR_TOOM2_THRESHOLD',$1)\n" if $opt{'t'};
+ print KARA "define(\`SQR_KARATSUBA_THRESHOLD',$1)\n\n";
+ print "define(\`SQR_KARATSUBA_THRESHOLD',$1)\n" if $opt{'t'};
close KARA or die;
last;
}
diff --git a/gmp/mpn/x86/udiv.asm b/gmp/mpn/x86/udiv.asm
index a3ee08860f..5c7d3f3533 100644
--- a/gmp/mpn/x86/udiv.asm
+++ b/gmp/mpn/x86/udiv.asm
@@ -1,32 +1,21 @@
dnl x86 mpn_udiv_qrnnd -- 2 by 1 limb division
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/umul.asm b/gmp/mpn/x86/umul.asm
index 34fe434400..d0116de6d9 100644
--- a/gmp/mpn/x86/umul.asm
+++ b/gmp/mpn/x86/umul.asm
@@ -1,32 +1,21 @@
dnl mpn_umul_ppmm -- 1x1->2 limb multiplication
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/x86/x86-defs.m4 b/gmp/mpn/x86/x86-defs.m4
index 1538b6820c..5b4a8e1fad 100644
--- a/gmp/mpn/x86/x86-defs.m4
+++ b/gmp/mpn/x86/x86-defs.m4
@@ -4,33 +4,23 @@ divert(-1)
dnl m4 macros for x86 assembler.
-dnl Copyright 1999-2003, 2007, 2010, 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl Inc.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl Notes:
@@ -51,7 +41,7 @@ dnl This is only a problem in macro definitions, not in ordinary text,
dnl and not in macro parameters like text passed to forloop() or ifdef().
-deflit(GMP_LIMB_BYTES, 4)
+deflit(BYTES_PER_MP_LIMB, 4)
dnl Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL. We
@@ -68,41 +58,24 @@ dnl order they appear in that structure.
define(CPUVEC_FUNCS_LIST,
``add_n',
-`addlsh1_n',
-`addlsh2_n',
`addmul_1',
-`addmul_2',
-`bdiv_dbm1c',
-`cnd_add_n',
-`cnd_sub_n',
-`com',
`copyd',
`copyi',
`divexact_1',
+`divexact_by3c',
`divrem_1',
`gcd_1',
`lshift',
-`lshiftc',
`mod_1',
-`mod_1_1p',
-`mod_1_1p_cps',
-`mod_1s_2p',
-`mod_1s_2p_cps',
-`mod_1s_4p',
-`mod_1s_4p_cps',
`mod_34lsub1',
`modexact_1c_odd',
`mul_1',
`mul_basecase',
-`mullo_basecase',
`preinv_divrem_1',
`preinv_mod_1',
-`redc_1',
-`redc_2',
`rshift',
`sqr_basecase',
`sub_n',
-`sublsh1_n',
`submul_1'')
@@ -922,7 +895,7 @@ dnl movl_code_address(L(foo),%eax)
dnl
dnl This macro is only meant for use in ASSERT()s or when testing, since
dnl the PIC sequence it generates will want to be done with a ret balancing
-dnl the call on CPUs with return address branch prediction.
+dnl the call on CPUs with return address branch predition.
dnl
dnl The addl generated here has a backward reference to the label, and so
dnl won't suffer from the two forwards references bug in old gas (described
@@ -955,9 +928,7 @@ m4_assert_numargs(1)
dnl Usage LEA(symbol,reg)
-define(`LEA',
-m4_assert_numargs(2)
-`ifdef(`PIC',`
+define(`LEA',`
define(`EPILOGUE_cpu',
`
L(movl_eip_`'substr($2,1)):
@@ -965,12 +936,11 @@ L(movl_eip_`'substr($2,1)):
ret_internal
SIZE($'`1, .-$'`1)')
- call L(movl_eip_`'substr($2,1))
- addl $_GLOBAL_OFFSET_TABLE_, $2
- movl $1@GOT($2), $2
-',`
- movl `$'$1, $2
-')')
+ call L(movl_eip_`'substr($2,1))
+ addl $_GLOBAL_OFFSET_TABLE_, $2
+ movl $1@GOT($2), $2
+')
+
define(`DEF_OBJECT',
m4_assert_numargs_range(1,2)
@@ -983,17 +953,4 @@ define(`END_OBJECT',
m4_assert_numargs(1)
` SIZE(`$1',.-`$1')')
-dnl Usage: CALL(funcname)
-dnl
-
-define(`CALL',
-m4_assert_numargs(1)
-`ifdef(`PIC',
- `call GSYM_PREFIX`'$1@PLT',
- `call GSYM_PREFIX`'$1')')
-
-ifdef(`PIC',
-`define(`PIC_WITH_EBX')',
-`undefine(`PIC_WITH_EBX')')
-
divert`'dnl