summaryrefslogtreecommitdiff
path: root/gmp/mpn/alpha
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/alpha')
-rw-r--r--gmp/mpn/alpha/README34
-rw-r--r--gmp/mpn/alpha/add_n.asm239
-rw-r--r--gmp/mpn/alpha/addmul_1.asm31
-rw-r--r--gmp/mpn/alpha/alpha-defs.m433
-rw-r--r--gmp/mpn/alpha/aorslsh1_n.asm226
-rw-r--r--gmp/mpn/alpha/aorslsh2_n.asm167
-rw-r--r--gmp/mpn/alpha/bdiv_dbm1c.asm31
-rw-r--r--gmp/mpn/alpha/cntlz.asm31
-rw-r--r--gmp/mpn/alpha/copyd.asm31
-rw-r--r--gmp/mpn/alpha/copyi.asm31
-rw-r--r--gmp/mpn/alpha/default.m449
-rw-r--r--gmp/mpn/alpha/dive_1.c27
-rw-r--r--gmp/mpn/alpha/diveby3.asm (renamed from gmp/mpn/alpha/ev5/diveby3.asm)40
-rw-r--r--gmp/mpn/alpha/divrem_2.asm38
-rw-r--r--gmp/mpn/alpha/ev5/add_n.asm146
-rw-r--r--gmp/mpn/alpha/ev5/com_n.asm (renamed from gmp/mpn/alpha/com.asm)39
-rw-r--r--gmp/mpn/alpha/ev5/gmp-mparam.h242
-rw-r--r--gmp/mpn/alpha/ev5/lshift.asm171
-rw-r--r--gmp/mpn/alpha/ev5/rshift.asm169
-rw-r--r--gmp/mpn/alpha/ev5/sub_n.asm146
-rw-r--r--gmp/mpn/alpha/ev6/add_n.asm31
-rw-r--r--gmp/mpn/alpha/ev6/aorslsh1_n.asm172
-rw-r--r--gmp/mpn/alpha/ev6/aorsmul_1.asm33
-rw-r--r--gmp/mpn/alpha/ev6/gmp-mparam.h257
-rw-r--r--gmp/mpn/alpha/ev6/mod_1_4.asm337
-rw-r--r--gmp/mpn/alpha/ev6/mul_1.asm35
-rw-r--r--gmp/mpn/alpha/ev6/nails/README27
-rw-r--r--gmp/mpn/alpha/ev6/nails/addmul_1.asm35
-rw-r--r--gmp/mpn/alpha/ev6/nails/addmul_2.asm33
-rw-r--r--gmp/mpn/alpha/ev6/nails/addmul_3.asm33
-rw-r--r--gmp/mpn/alpha/ev6/nails/addmul_4.asm33
-rw-r--r--gmp/mpn/alpha/ev6/nails/aors_n.asm33
-rw-r--r--gmp/mpn/alpha/ev6/nails/gmp-mparam.h38
-rw-r--r--gmp/mpn/alpha/ev6/nails/mul_1.asm37
-rw-r--r--gmp/mpn/alpha/ev6/nails/submul_1.asm37
-rw-r--r--[-rwxr-xr-x]gmp/mpn/alpha/ev6/slot.pl39
-rw-r--r--gmp/mpn/alpha/ev6/sqr_diagonal.asm115
-rw-r--r--gmp/mpn/alpha/ev6/sub_n.asm31
-rw-r--r--gmp/mpn/alpha/ev67/gcd_1.asm31
-rw-r--r--gmp/mpn/alpha/ev67/hamdist.asm31
-rw-r--r--gmp/mpn/alpha/ev67/popcount.asm31
-rw-r--r--gmp/mpn/alpha/gmp-mparam.h45
-rw-r--r--gmp/mpn/alpha/invert_limb.asm399
-rw-r--r--gmp/mpn/alpha/lshift.asm193
-rw-r--r--gmp/mpn/alpha/mod_34lsub1.asm31
-rw-r--r--gmp/mpn/alpha/mode1o.asm33
-rw-r--r--gmp/mpn/alpha/mul_1.asm31
-rw-r--r--gmp/mpn/alpha/rshift.asm193
-rw-r--r--gmp/mpn/alpha/sec_tabselect.asm137
-rw-r--r--gmp/mpn/alpha/sqr_diag_addlsh1.asm93
-rw-r--r--gmp/mpn/alpha/sqr_diagonal.asm65
-rw-r--r--gmp/mpn/alpha/sub_n.asm243
-rw-r--r--gmp/mpn/alpha/submul_1.asm31
-rw-r--r--gmp/mpn/alpha/umul.asm31
-rw-r--r--gmp/mpn/alpha/unicos.m438
55 files changed, 2127 insertions, 2806 deletions
diff --git a/gmp/mpn/alpha/README b/gmp/mpn/alpha/README
index 09c2f04047..3578c53b85 100644
--- a/gmp/mpn/alpha/README
+++ b/gmp/mpn/alpha/README
@@ -1,30 +1,20 @@
-Copyright 1996, 1997, 1999-2005 Free Software Foundation, Inc.
+Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
@@ -46,7 +36,7 @@ Cray T3 code is very very different...
them to "$6" or "$f6" where necessary.
"0x" introduces a hex constant in gas and DEC as, but on Unicos "^X" is
-required. The X() macro accommodates this difference.
+required. The X() macro accomodates this difference.
"cvttqc" is required by DEC as, "cvttq/c" is required by Unicos, and gas will
accept either. We use cvttqc and have an m4 define expand to cvttq/c where
@@ -70,7 +60,7 @@ RELEVANT OPTIMIZATION ISSUES
EV4
1. This chip has very limited store bandwidth. The on-chip L1 cache is write-
- through, and a cache line is transferred from the store buffer to the off-
+ through, and a cache line is transfered from the store buffer to the off-
chip L2 in as much 15 cycles on most systems. This delay hurts mpn_add_n,
mpn_sub_n, mpn_lshift, and mpn_rshift.
diff --git a/gmp/mpn/alpha/add_n.asm b/gmp/mpn/alpha/add_n.asm
index bc572a57a9..77d4cad2ef 100644
--- a/gmp/mpn/alpha/add_n.asm
+++ b/gmp/mpn/alpha/add_n.asm
@@ -1,164 +1,117 @@
dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
dnl store sum in a third limb vector.
-dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
+dnl Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C EV4: ?
-C EV5: 4.75
-C EV6: 3
+C EV4: 7.75
+C EV5: 5.75
+C EV6: 4
-dnl INPUT PARAMETERS
-dnl res_ptr r16
-dnl s1_ptr r17
-dnl s2_ptr r18
-dnl size r19
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C vp r18
+C n r19
ASM_START()
-PROLOGUE(mpn_add_nc)
- bis r20,r31,r25
- br L(com)
-EPILOGUE()
PROLOGUE(mpn_add_n)
- bis r31,r31,r25 C clear cy
-L(com): subq r19,4,r19 C decr loop cnt
- blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
-C Start software pipeline for 1st loop
- ldq r0,0(r18)
- ldq r4,0(r17)
- ldq r1,8(r18)
- ldq r5,8(r17)
- addq r17,32,r17 C update s1_ptr
- addq r0,r4,r28 C 1st main add
- ldq r2,16(r18)
- addq r25,r28,r20 C 1st carry add
- ldq r3,24(r18)
- cmpult r28,r4,r8 C compute cy from last add
- ldq r6,-16(r17)
- cmpult r20,r28,r25 C compute cy from last add
- ldq r7,-8(r17)
- bis r8,r25,r25 C combine cy from the two adds
- subq r19,4,r19 C decr loop cnt
- addq r1,r5,r28 C 2nd main add
- addq r18,32,r18 C update s2_ptr
- addq r28,r25,r21 C 2nd carry add
- cmpult r28,r5,r8 C compute cy from last add
- blt r19,$Lend1 C if less than 4 limbs remain, jump
-C 1st loop handles groups of 4 limbs in a software pipeline
- ALIGN(16)
-$Loop: cmpult r21,r28,r25 C compute cy from last add
- ldq r0,0(r18)
- bis r8,r25,r25 C combine cy from the two adds
- ldq r1,8(r18)
- addq r2,r6,r28 C 3rd main add
- ldq r4,0(r17)
- addq r28,r25,r22 C 3rd carry add
- ldq r5,8(r17)
- cmpult r28,r6,r8 C compute cy from last add
- cmpult r22,r28,r25 C compute cy from last add
- stq r20,0(r16)
- bis r8,r25,r25 C combine cy from the two adds
- stq r21,8(r16)
- addq r3,r7,r28 C 4th main add
- addq r28,r25,r23 C 4th carry add
- cmpult r28,r7,r8 C compute cy from last add
- cmpult r23,r28,r25 C compute cy from last add
- addq r17,32,r17 C update s1_ptr
- bis r8,r25,r25 C combine cy from the two adds
- addq r16,32,r16 C update res_ptr
- addq r0,r4,r28 C 1st main add
- ldq r2,16(r18)
- addq r25,r28,r20 C 1st carry add
- ldq r3,24(r18)
- cmpult r28,r4,r8 C compute cy from last add
- ldq r6,-16(r17)
- cmpult r20,r28,r25 C compute cy from last add
- ldq r7,-8(r17)
- bis r8,r25,r25 C combine cy from the two adds
- subq r19,4,r19 C decr loop cnt
- stq r22,-16(r16)
- addq r1,r5,r28 C 2nd main add
- stq r23,-8(r16)
- addq r25,r28,r21 C 2nd carry add
- addq r18,32,r18 C update s2_ptr
- cmpult r28,r5,r8 C compute cy from last add
- bge r19,$Loop
-C Finish software pipeline for 1st loop
-$Lend1: cmpult r21,r28,r25 C compute cy from last add
- bis r8,r25,r25 C combine cy from the two adds
- addq r2,r6,r28 C 3rd main add
- addq r28,r25,r22 C 3rd carry add
- cmpult r28,r6,r8 C compute cy from last add
- cmpult r22,r28,r25 C compute cy from last add
- stq r20,0(r16)
- bis r8,r25,r25 C combine cy from the two adds
- stq r21,8(r16)
- addq r3,r7,r28 C 4th main add
- addq r28,r25,r23 C 4th carry add
- cmpult r28,r7,r8 C compute cy from last add
- cmpult r23,r28,r25 C compute cy from last add
- bis r8,r25,r25 C combine cy from the two adds
- addq r16,32,r16 C update res_ptr
- stq r22,-16(r16)
- stq r23,-8(r16)
-$Lend2: addq r19,4,r19 C restore loop cnt
- beq r19,$Lret
-C Start software pipeline for 2nd loop
- ldq r0,0(r18)
- ldq r4,0(r17)
+ ldq r3,0(r17)
+ ldq r4,0(r18)
+
subq r19,1,r19
- beq r19,$Lend0
-C 2nd loop handles remaining 1-3 limbs
- ALIGN(16)
-$Loop0: addq r0,r4,r28 C main add
- ldq r0,8(r18)
- cmpult r28,r4,r8 C compute cy from last add
- ldq r4,8(r17)
- addq r28,r25,r20 C carry add
- addq r18,8,r18
+ and r19,4-1,r2 C number of limbs in first loop
+ bis r31,r31,r0
+ beq r2,$L0 C if multiple of 4 limbs, skip first loop
+
+ subq r19,r2,r19
+
+$Loop0: subq r2,1,r2
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
addq r17,8,r17
- stq r20,0(r16)
- cmpult r20,r28,r25 C compute cy from last add
- subq r19,1,r19 C decr loop cnt
- bis r8,r25,r25 C combine cy from the two adds
+ addq r18,8,r18
+ bis r5,r5,r3
+ bis r6,r6,r4
addq r16,8,r16
- bne r19,$Loop0
-$Lend0: addq r0,r4,r28 C main add
- addq r28,r25,r20 C carry add
- cmpult r28,r4,r8 C compute cy from last add
- cmpult r20,r28,r25 C compute cy from last add
- stq r20,0(r16)
- bis r8,r25,r25 C combine cy from the two adds
-
-$Lret: bis r25,r31,r0 C return cy
+ bne r2,$Loop0
+
+$L0: beq r19,$Lend
+
+ ALIGN(8)
+$Loop: subq r19,4,r19
+
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ ldq r3,16(r17)
+ addq r6,r0,r6
+ ldq r4,16(r18)
+ cmpult r6,r0,r1
+ addq r5,r6,r6
+ cmpult r6,r5,r0
+ stq r6,8(r16)
+ bis r0,r1,r0
+
+ ldq r5,24(r17)
+ addq r4,r0,r4
+ ldq r6,24(r18)
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,16(r16)
+ bis r0,r1,r0
+
+ ldq r3,32(r17)
+ addq r6,r0,r6
+ ldq r4,32(r18)
+ cmpult r6,r0,r1
+ addq r5,r6,r6
+ cmpult r6,r5,r0
+ stq r6,24(r16)
+ bis r0,r1,r0
+
+ addq r17,32,r17
+ addq r18,32,r18
+ addq r16,32,r16
+ bne r19,$Loop
+
+$Lend: addq r4,r0,r4
+ cmpult r4,r0,r1
+ addq r3,r4,r4
+ cmpult r4,r3,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
ret r31,(r26),1
-EPILOGUE()
+EPILOGUE(mpn_add_n)
ASM_END()
diff --git a/gmp/mpn/alpha/addmul_1.asm b/gmp/mpn/alpha/addmul_1.asm
index c4e6834b61..22c41a5c74 100644
--- a/gmp/mpn/alpha/addmul_1.asm
+++ b/gmp/mpn/alpha/addmul_1.asm
@@ -4,30 +4,19 @@ dnl result to a second limb vector.
dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/alpha-defs.m4 b/gmp/mpn/alpha/alpha-defs.m4
index af34c9294c..b2f9a242a8 100644
--- a/gmp/mpn/alpha/alpha-defs.m4
+++ b/gmp/mpn/alpha/alpha-defs.m4
@@ -3,32 +3,21 @@ divert(-1)
dnl m4 macros for Alpha assembler.
dnl Copyright 2003, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl Usage: ASSERT([reg] [,code])
diff --git a/gmp/mpn/alpha/aorslsh1_n.asm b/gmp/mpn/alpha/aorslsh1_n.asm
index 9525e669db..3694f78761 100644
--- a/gmp/mpn/alpha/aorslsh1_n.asm
+++ b/gmp/mpn/alpha/aorslsh1_n.asm
@@ -1,40 +1,36 @@
dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
-dnl Copyright 2003, 2013 Free Software Foundation, Inc.
+dnl Copyright 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C EV4: ?
+C EV4: 12.5
C EV5: 6.25
-C EV6: 4.5
+C EV6: 4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875)
+C TODO
+C * Write special version for ev6, as this is a slowdown for 100 < n < 2200
+C compared to separate mpn_lshift and mpn_add_n.
+C * Use addq instead of sll for left shift, and similarly cmplt instead of srl
+C for right shift.
+
+dnl INPUT PARAMETERS
define(`rp',`r16')
define(`up',`r17')
define(`vp',`r18')
@@ -42,8 +38,12 @@ define(`n', `r19')
define(`u0', `r8')
define(`u1', `r1')
+define(`u2', `r2')
+define(`u3', `r3')
define(`v0', `r4')
define(`v1', `r5')
+define(`v2', `r6')
+define(`v3', `r7')
define(`cy0', `r0')
define(`cy1', `r20')
@@ -67,98 +67,168 @@ MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
ASM_START()
PROLOGUE(func)
- and n, 2, cy0
- blbs n, L(bx1)
-L(bx0): ldq v1, 0(vp)
+ lda n, -4(n)
+ bis r31, r31, cy1
+ and n, 3, r1
+ beq r1, $Lb00
+ cmpeq r1, 1, r2
+ bne r2, $Lb01
+ cmpeq r1, 2, r2
+ bne r2, $Lb10
+$Lb11: C n = 3, 7, 11, ...
+ ldq v0, 0(vp)
+ ldq u0, 0(up)
+ ldq v1, 8(vp)
+ ldq u1, 8(up)
+ ldq v2, 16(vp)
+ ldq u2, 16(up)
+ lda vp, 24(vp)
+ lda up, 24(up)
+ bge n, $Loop
+ br r31, $Lcj3
+$Lb10: C n = 2, 6, 10, ...
+ bis r31, r31, cy0
+ ldq v1, 0(vp)
ldq u1, 0(up)
- nop
- bne cy0, L(b10)
-
-L(b00): lda vp, 48(vp)
- lda up, -16(up)
+ ldq v2, 8(vp)
+ ldq u2, 8(up)
lda rp, -8(rp)
- br r31, L(lo0)
-
-L(b10): lda vp, 32(vp)
+ blt n, $Lcj2
+ ldq v3, 16(vp)
+ ldq u3, 16(up)
+ lda vp, 48(vp)
+ lda up, 16(up)
+ br r31, $LL10
+$Lb01: C n = 1, 5, 9, ...
+ ldq v2, 0(vp)
+ ldq u2, 0(up)
+ lda rp, -16(rp)
+ blt n, $Lcj1
+ ldq v3, 8(vp)
+ ldq u3, 8(up)
+ ldq v0, 16(vp)
+ ldq u0, 16(up)
+ lda vp, 40(vp)
+ lda up, 8(up)
+ lda rp, 32(rp)
+ br r31, $LL01
+$Lb00: C n = 4, 8, 12, ...
+ bis r31, r31, cy0
+ ldq v3, 0(vp)
+ ldq u3, 0(up)
+ ldq v0, 8(vp)
+ ldq u0, 8(up)
+ ldq v1, 16(vp)
+ ldq u1, 16(up)
+ lda vp, 32(vp)
lda rp, 8(rp)
- lda cy0, 0(r31)
- br r31, L(lo2)
-
-L(bx1): ldq v0, 0(vp)
- ldq u0, 0(up)
- lda cy1, 0(r31)
- beq cy0, L(b01)
-
-L(b11): lda vp, 40(vp)
- lda up, -24(up)
- lda rp, 16(rp)
- br r31, L(lo3)
-
-L(b01): lda n, -4(n)
- ble n, L(end)
- lda vp, 24(vp)
- lda up, -8(up)
-
+ br r31, $LL00x
ALIGN(16)
-L(top): addq v0, v0, sl C left shift vlimb
- ldq v1, -16(vp)
+C 0
+$Loop: sll v0, 1, sl C left shift vlimb
+ ldq v3, 0(vp)
+C 1
ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
- cmplt v0, r31, cy0 C carry out #1
- ldq u1, 16(up)
+ ldq u3, 0(up)
+C 2
ADDSUB ps, cy1, rr C consume carry from previous operation
+ srl v0, 63, cy0 C carry out #1
+C 3
CARRY( ps, u0, cy) C carry out #2
stq rr, 0(rp)
+C 4
addq cy, cy0, cy0 C combine carry out #1 and #2
CARRY( rr, ps, cy) C carry out #3
+C 5
addq cy, cy0, cy0 C final carry out
lda vp, 32(vp) C bookkeeping
-L(lo0): addq v1, v1, sl
- ldq v0, -40(vp)
+C 6
+$LL10: sll v1, 1, sl
+ ldq v0, -24(vp)
+C 7
ADDSUB u1, sl, ps
- cmplt v1, r31, cy1
- ldq u0, 24(up)
+ ldq u0, 8(up)
+C 8
ADDSUB ps, cy0, rr
+ srl v1, 63, cy1
+C 9
CARRY( ps, u1, cy)
stq rr, 8(rp)
+C 10
addq cy, cy1, cy1
CARRY( rr, ps, cy)
+C 11
addq cy, cy1, cy1
lda rp, 32(rp) C bookkeeping
-L(lo3): addq v0, v0, sl
- ldq v1, -32(vp)
- ADDSUB u0, sl, ps
- cmplt v0, r31, cy0
- ldq u1, 32(up)
+C 12
+$LL01: sll v2, 1, sl
+ ldq v1, -16(vp)
+C 13
+ ADDSUB u2, sl, ps
+ ldq u1, 16(up)
+C 14
ADDSUB ps, cy1, rr
- CARRY( ps, u0, cy)
+ srl v2, 63, cy0
+C 15
+ CARRY( ps, u2, cy)
stq rr, -16(rp)
+C 16
addq cy, cy0, cy0
CARRY( rr, ps, cy)
+C 17
addq cy, cy0, cy0
- lda up, 32(up) C bookkeeping
-L(lo2): addq v1, v1, sl
- ldq v0, -24(vp)
- ADDSUB u1, sl, ps
- cmplt v1, r31, cy1
- ldq u0, 8(up)
+$LL00x: lda up, 32(up) C bookkeeping
+C 18
+ sll v3, 1, sl
+ ldq v2, -8(vp)
+C 19
+ ADDSUB u3, sl, ps
+ ldq u2, -8(up)
+C 20
ADDSUB ps, cy0, rr
- CARRY( ps, u1, cy)
+ srl v3, 63, cy1
+C 21
+ CARRY( ps, u3, cy)
stq rr, -8(rp)
+C 22
addq cy, cy1, cy1
CARRY( rr, ps, cy)
+C 23
addq cy, cy1, cy1
lda n, -4(n) C bookkeeping
- bgt n, L(top)
+C 24
+ bge n, $Loop
-L(end): addq v0, v0, sl
+$Lcj3: sll v0, 1, sl
ADDSUB u0, sl, ps
ADDSUB ps, cy1, rr
- cmplt v0, r31, cy0
+ srl v0, 63, cy0
CARRY( ps, u0, cy)
stq rr, 0(rp)
addq cy, cy0, cy0
CARRY( rr, ps, cy)
- addq cy, cy0, r0
+ addq cy, cy0, cy0
+
+$Lcj2: sll v1, 1, sl
+ ADDSUB u1, sl, ps
+ ADDSUB ps, cy0, rr
+ srl v1, 63, cy1
+ CARRY( ps, u1, cy)
+ stq rr, 8(rp)
+ addq cy, cy1, cy1
+ CARRY( rr, ps, cy)
+ addq cy, cy1, cy1
+
+$Lcj1: sll v2, 1, sl
+ ADDSUB u2, sl, ps
+ ADDSUB ps, cy1, rr
+ srl v2, 63, cy0
+ CARRY( ps, u2, cy)
+ stq rr, 16(rp)
+ addq cy, cy0, cy0
+ CARRY( rr, ps, cy)
+ addq cy, cy0, cy0
+
ret r31,(r26),1
EPILOGUE()
ASM_END()
diff --git a/gmp/mpn/alpha/aorslsh2_n.asm b/gmp/mpn/alpha/aorslsh2_n.asm
deleted file mode 100644
index bdee1d6d02..0000000000
--- a/gmp/mpn/alpha/aorslsh2_n.asm
+++ /dev/null
@@ -1,167 +0,0 @@
-dnl Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
-
-dnl Copyright 2003, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: ?
-C EV5: 6
-C EV6: 3.75
-
-C TODO
-C * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5.
-
-define(`rp',`r16')
-define(`up',`r17')
-define(`vp',`r18')
-define(`n', `r19')
-
-define(`u0', `r8')
-define(`u1', `r1')
-define(`v0', `r4')
-define(`v1', `r5')
-
-define(`cy0', `r0')
-define(`cy1', `r20')
-define(`cy', `r22')
-define(`rr', `r24')
-define(`ps', `r25')
-define(`sl', `r28')
-
-ifdef(`OPERATION_addlsh2_n',`
- define(ADDSUB, addq)
- define(CARRY, `cmpult $1,$2,$3')
- define(func, mpn_addlsh2_n)
-')
-ifdef(`OPERATION_sublsh2_n',`
- define(ADDSUB, subq)
- define(CARRY, `cmpult $2,$1,$3')
- define(func, mpn_sublsh2_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
-
-ASM_START()
-PROLOGUE(func)
- and n, 2, cy0
- blbs n, L(bx1)
-L(bx0): ldq v1, 0(vp)
- ldq u1, 0(up)
- bis r31, r31, r2
- bne cy0, L(b10)
-
-L(b00): lda vp, 48(vp)
- lda up, -16(up)
- lda rp, -8(rp)
- s4addq v1, r31, sl
- br r31, L(lo0)
-
-L(b10): lda vp, 32(vp)
- lda rp, 8(rp)
- lda cy0, 0(r31)
- br r31, L(lo2)
-
-L(bx1): ldq v0, 0(vp)
- ldq u0, 0(up)
- lda cy1, 0(r31)
- bis r31, r31, r3
- nop
- beq cy0, L(b01)
-
-L(b11): lda vp, 40(vp)
- lda up, -24(up)
- lda rp, 16(rp)
- br r31, L(lo3)
-
-L(b01): lda n, -4(n)
- ble n, L(end)
- lda vp, 24(vp)
- lda up, -8(up)
-
- ALIGN(16)
-L(top): s4addq v0, r3, sl C combined vlimb
- ldq v1, -16(vp)
- ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
- ldq u1, 16(up)
- srl v0, 62, r2 C high v bits
- ADDSUB ps, cy1, rr C consume carry from previous operation
- CARRY( ps, u0, cy0) C carry out #2
- stq rr, 0(rp)
- CARRY( rr, ps, cy) C carry out #3
- lda vp, 32(vp) C bookkeeping
- addq cy, cy0, cy0 C final carry out
- s4addq v1, r2, sl
-L(lo0): ldq v0, -40(vp)
- ADDSUB u1, sl, ps
- ldq u0, 24(up)
- srl v1, 62, r3
- ADDSUB ps, cy0, rr
- CARRY( ps, u1, cy1)
- stq rr, 8(rp)
- CARRY( rr, ps, cy)
- lda rp, 32(rp) C bookkeeping
- addq cy, cy1, cy1
-L(lo3): s4addq v0, r3, sl
- ldq v1, -32(vp)
- ADDSUB u0, sl, ps
- ldq u1, 32(up)
- srl v0, 62, r2
- ADDSUB ps, cy1, rr
- CARRY( ps, u0, cy0)
- stq rr, -16(rp)
- CARRY( rr, ps, cy)
- lda up, 32(up) C bookkeeping
- addq cy, cy0, cy0
-L(lo2): s4addq v1, r2, sl
- ldq v0, -24(vp)
- ADDSUB u1, sl, ps
- ldq u0, 8(up)
- srl v1, 62, r3
- ADDSUB ps, cy0, rr
- CARRY( ps, u1, cy1)
- stq rr, -8(rp)
- CARRY( rr, ps, cy)
- lda n, -4(n) C bookkeeping
- addq cy, cy1, cy1
- bgt n, L(top)
-
-L(end): s4addq v0, r3, sl
- ADDSUB u0, sl, ps
- srl v0, 62, r2
- ADDSUB ps, cy1, rr
- CARRY( ps, u0, cy0)
- stq rr, 0(rp)
- CARRY( rr, ps, cy)
- addq cy, cy0, cy0
- addq cy0, r2, r0
-
- ret r31,(r26),1
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/alpha/bdiv_dbm1c.asm b/gmp/mpn/alpha/bdiv_dbm1c.asm
index 472966ca98..e5f11dbf48 100644
--- a/gmp/mpn/alpha/bdiv_dbm1c.asm
+++ b/gmp/mpn/alpha/bdiv_dbm1c.asm
@@ -3,30 +3,19 @@ dnl Alpha mpn_bdiv_dbm1c.
dnl Copyright 2008 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/cntlz.asm b/gmp/mpn/alpha/cntlz.asm
index 25af19b131..2bfd923e5e 100644
--- a/gmp/mpn/alpha/cntlz.asm
+++ b/gmp/mpn/alpha/cntlz.asm
@@ -3,30 +3,19 @@ dnl Alpha auxiliary for longlong.h's count_leading_zeros
dnl Copyright 1997, 2000, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/copyd.asm b/gmp/mpn/alpha/copyd.asm
index b41b5366cc..ba8fa1c633 100644
--- a/gmp/mpn/alpha/copyd.asm
+++ b/gmp/mpn/alpha/copyd.asm
@@ -3,30 +3,19 @@ dnl Alpha mpn_copyd -- copy, decrementing.
dnl Copyright 2002, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/copyi.asm b/gmp/mpn/alpha/copyi.asm
index f7e2ad6f6a..425804127e 100644
--- a/gmp/mpn/alpha/copyi.asm
+++ b/gmp/mpn/alpha/copyi.asm
@@ -3,30 +3,19 @@ dnl Alpha mpn_copyi -- copy, incrementing.
dnl Copyright 2002, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/default.m4 b/gmp/mpn/alpha/default.m4
index 8fe7c4e122..e7aae2eeea 100644
--- a/gmp/mpn/alpha/default.m4
+++ b/gmp/mpn/alpha/default.m4
@@ -3,33 +3,22 @@ divert(-1)
dnl m4 macros for alpha assembler (everywhere except unicos).
-dnl Copyright 2000, 2002-2004, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl Usage: ASM_START()
@@ -64,9 +53,8 @@ ifelse(`$2',noalign,,` ALIGN(16)')
.globl $1
.ent $1
$1:
- .frame r30,0,r26,0
-ifelse(`$2',gp,` ldgp r29, 0(r27)
-`$'$1..ng:')
+ifelse(`$2',gp,` ldgp r29,0(r27)')
+ .frame r30,0,r26
.prologue ifelse(`$2',gp,1,0)')
define(`EPILOGUE_cpu',
@@ -102,13 +90,12 @@ forloop(i,0,31,`defreg(`r'i,$i)')
forloop(i,0,31,`deflit(`f'i,``$f''i)')
-dnl Usage: DATASTART(name,align) or DATASTART(name)
+dnl Usage: DATASTART(name)
dnl DATAEND()
define(`DATASTART',
-m4_assert_numargs_range(1,2)
-` RODATA
- ALIGN(ifelse($#,1,2,$2))
+m4_assert_numargs(1)
+` DATA
$1:')
define(`DATAEND',
m4_assert_numargs(0)
@@ -117,7 +104,7 @@ m4_assert_numargs(0)
dnl Load a symbolic address into a register
define(`LEA',
m4_assert_numargs(2)
-`lda $1, $2')
+`lda $1, $2')
dnl Usage: ASM_END()
define(`ASM_END',
diff --git a/gmp/mpn/alpha/dive_1.c b/gmp/mpn/alpha/dive_1.c
index 88b82db2f7..a915c58a9e 100644
--- a/gmp/mpn/alpha/dive_1.c
+++ b/gmp/mpn/alpha/dive_1.c
@@ -4,33 +4,22 @@
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
FUTURE GNU MP RELEASES.
-Copyright 2000-2003 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
diff --git a/gmp/mpn/alpha/ev5/diveby3.asm b/gmp/mpn/alpha/diveby3.asm
index 3758188e02..e2d1c6beee 100644
--- a/gmp/mpn/alpha/ev5/diveby3.asm
+++ b/gmp/mpn/alpha/diveby3.asm
@@ -1,42 +1,32 @@
dnl Alpha mpn_divexact_by3c -- mpn division by 3, expecting no remainder.
-dnl Copyright 2004, 2005, 2009 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C EV4: 22
C EV5: 11.5
-C EV6: 6.3 Note that mpn_bdiv_dbm1c is faster
+C EV6: 6.3
C TODO
-C * Remove the unops, they benefit just ev6, which no longer uses this file.
+C * Trim this to 6.0 c/l for ev6.
+C * Write special ev5 version, should reach 9 c/l, and could be smaller.
C * Try prefetch for destination, using lds.
C * Improve feed-in code, by moving initial mulq earlier; make initial load
C to u0/u0 to save some copying.
@@ -50,7 +40,7 @@ define(`cy', `r19')
ASM_START()
-DATASTART(L(LC),8)
+DATASTART(L(LC))
.quad 0xAAAAAAAAAAAAAAAB
.quad 0x5555555555555555
.quad 0xAAAAAAAAAAAAAAAA
diff --git a/gmp/mpn/alpha/divrem_2.asm b/gmp/mpn/alpha/divrem_2.asm
index 046b246a95..b68468bca0 100644
--- a/gmp/mpn/alpha/divrem_2.asm
+++ b/gmp/mpn/alpha/divrem_2.asm
@@ -1,32 +1,21 @@
dnl Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
-dnl Copyright 2007, 2008, 2013 Free Software Foundation, Inc.
+dnl Copyright 2007, 2008 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -52,7 +41,8 @@ define(`un_param', `r19')
define(`dp', `r20')
ASM_START()
-PROLOGUE(mpn_divrem_2,gp)
+PROLOGUE(mpn_divrem_2)
+ ldgp r29, 0(r27)
lda r30, -80(r30)
stq r26, 0(r30)
stq r9, 8(r30)
@@ -90,7 +80,7 @@ L(L8): stq r3, 72(r30)
blt r19, L(L10)
bis r31, r12, r16
jsr r26, mpn_invert_limb
- LDGP( r29, 0(r26))
+ ldgp r29, 0(r26)
mulq r0, r12, r4 C t0 = LO(di * d1)
umulh r0, r10, r2 C s1 = HI(di * d0)
addq r4, r10, r4 C t0 += d0
diff --git a/gmp/mpn/alpha/ev5/add_n.asm b/gmp/mpn/alpha/ev5/add_n.asm
new file mode 100644
index 0000000000..626e713ccb
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/add_n.asm
@@ -0,0 +1,146 @@
+dnl Alpha EV5 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: 4.75
+C EV6: 3
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ bis r31,r31,r25 C clear cy
+ subq r19,4,r19 C decr loop cnt
+ blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ ldq r1,8(r18)
+ ldq r5,8(r17)
+ addq r17,32,r17 C update s1_ptr
+ ldq r2,16(r18)
+ addq r0,r4,r20 C 1st main add
+ ldq r3,24(r18)
+ subq r19,4,r19 C decr loop cnt
+ ldq r6,-16(r17)
+ cmpult r20,r0,r25 C compute cy from last add
+ ldq r7,-8(r17)
+ addq r1,r5,r28 C 2nd main add
+ addq r18,32,r18 C update s2_ptr
+ addq r28,r25,r21 C 2nd carry add
+ cmpult r28,r5,r8 C compute cy from last add
+ blt r19,$Lend1 C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+ ALIGN(16)
+$Loop: cmpult r21,r28,r25 C compute cy from last add
+ ldq r0,0(r18)
+ bis r8,r25,r25 C combine cy from the two adds
+ ldq r1,8(r18)
+ addq r2,r6,r28 C 3rd main add
+ ldq r4,0(r17)
+ addq r28,r25,r22 C 3rd carry add
+ ldq r5,8(r17)
+ cmpult r28,r6,r8 C compute cy from last add
+ cmpult r22,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+ stq r21,8(r16)
+ addq r3,r7,r28 C 4th main add
+ addq r28,r25,r23 C 4th carry add
+ cmpult r28,r7,r8 C compute cy from last add
+ cmpult r23,r28,r25 C compute cy from last add
+ addq r17,32,r17 C update s1_ptr
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,32,r16 C update res_ptr
+ addq r0,r4,r28 C 1st main add
+ ldq r2,16(r18)
+ addq r25,r28,r20 C 1st carry add
+ ldq r3,24(r18)
+ cmpult r28,r4,r8 C compute cy from last add
+ ldq r6,-16(r17)
+ cmpult r20,r28,r25 C compute cy from last add
+ ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two adds
+ subq r19,4,r19 C decr loop cnt
+ stq r22,-16(r16)
+ addq r1,r5,r28 C 2nd main add
+ stq r23,-8(r16)
+ addq r25,r28,r21 C 2nd carry add
+ addq r18,32,r18 C update s2_ptr
+ cmpult r28,r5,r8 C compute cy from last add
+ bge r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1: cmpult r21,r28,r25 C compute cy from last add
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r2,r6,r28 C 3rd main add
+ addq r28,r25,r22 C 3rd carry add
+ cmpult r28,r6,r8 C compute cy from last add
+ cmpult r22,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+ stq r21,8(r16)
+ addq r3,r7,r28 C 4th main add
+ addq r28,r25,r23 C 4th carry add
+ cmpult r28,r7,r8 C compute cy from last add
+ cmpult r23,r28,r25 C compute cy from last add
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,32,r16 C update res_ptr
+ stq r22,-16(r16)
+ stq r23,-8(r16)
+$Lend2: addq r19,4,r19 C restore loop cnt
+ beq r19,$Lret
+C Start software pipeline for 2nd loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ subq r19,1,r19
+ beq r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+ ALIGN(16)
+$Loop0: addq r0,r4,r28 C main add
+ ldq r0,8(r18)
+ cmpult r28,r4,r8 C compute cy from last add
+ ldq r4,8(r17)
+ addq r28,r25,r20 C carry add
+ addq r18,8,r18
+ addq r17,8,r17
+ stq r20,0(r16)
+ cmpult r20,r28,r25 C compute cy from last add
+ subq r19,1,r19 C decr loop cnt
+ bis r8,r25,r25 C combine cy from the two adds
+ addq r16,8,r16
+ bne r19,$Loop0
+$Lend0: addq r0,r4,r28 C main add
+ addq r28,r25,r20 C carry add
+ cmpult r28,r4,r8 C compute cy from last add
+ cmpult r20,r28,r25 C compute cy from last add
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two adds
+
+$Lret: bis r25,r31,r0 C return cy
+ ret r31,(r26),1
+EPILOGUE(mpn_add_n)
+ASM_END()
diff --git a/gmp/mpn/alpha/com.asm b/gmp/mpn/alpha/ev5/com_n.asm
index f084ab5e96..979e711eb8 100644
--- a/gmp/mpn/alpha/com.asm
+++ b/gmp/mpn/alpha/ev5/com_n.asm
@@ -1,32 +1,21 @@
-dnl Alpha mpn_com -- mpn one's complement.
+dnl Alpha EV5 mpn_com_n -- mpn one's complement.
dnl Copyright 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -37,7 +26,7 @@ C EV5: 2.0
C EV6: 1.5
-C mp_limb_t mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size);
C
C For ev5 the main loop is 7 cycles plus 1 taken branch bubble, for a total
C 2.0 c/l. In general, a pattern like this unrolled to N limbs per loop
@@ -71,7 +60,7 @@ FLOAT64(L(dat), 2.0)
ALIGN(16)
-PROLOGUE(mpn_com,gp)
+PROLOGUE(mpn_com_n,gp)
C r16 dst
C r17 src
diff --git a/gmp/mpn/alpha/ev5/gmp-mparam.h b/gmp/mpn/alpha/ev5/gmp-mparam.h
index b560c20afe..cbedd4f173 100644
--- a/gmp/mpn/alpha/ev5/gmp-mparam.h
+++ b/gmp/mpn/alpha/ev5/gmp-mparam.h
@@ -1,187 +1,81 @@
/* Alpha EV5 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2008-2010, 2014 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
/* 600 MHz 21164A */
-/* FFT tuning limit = 5000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */
-
-#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
-#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1_1P_METHOD 2
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 22
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
-#define USE_PREINV_DIVREM_1 1 /* preinv always */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 76
-
-#define MUL_TOOM22_THRESHOLD 14
-#define MUL_TOOM33_THRESHOLD 50
-#define MUL_TOOM44_THRESHOLD 118
-#define MUL_TOOM6H_THRESHOLD 157
-#define MUL_TOOM8H_THRESHOLD 236
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 77
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 70
-
-#define SQR_BASECASE_THRESHOLD 0 /* always */
-#define SQR_TOOM2_THRESHOLD 22
-#define SQR_TOOM3_THRESHOLD 73
-#define SQR_TOOM4_THRESHOLD 178
-#define SQR_TOOM6_THRESHOLD 0 /* always */
-#define SQR_TOOM8_THRESHOLD 260
-
-#define MULMID_TOOM42_THRESHOLD 18
-
-#define MULMOD_BNM1_THRESHOLD 9
-#define SQRMOD_BNM1_THRESHOLD 12
-
-#define MUL_FFT_MODF_THRESHOLD 284 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 284, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \
- { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
- { 15, 7}, { 8, 6}, { 17, 7}, { 13, 8}, \
- { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
- { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
- { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
- { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
- { 23, 8}, { 47,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
- { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
- { 95,10}, { 55,11}, { 31,10}, { 63, 8}, \
- { 255, 7}, { 511,10}, { 71, 9}, { 143, 8}, \
- { 287, 7}, { 575, 9}, { 159, 8}, { 319,11}, \
- { 47,12}, { 31,11}, { 63, 9}, { 255, 8}, \
- { 511,10}, { 143, 9}, { 287,11}, { 79,10}, \
- { 159, 9}, { 319,10}, { 175, 9}, { 351, 8}, \
- { 703,10}, { 191, 9}, { 383,10}, { 207, 9}, \
- { 415,12}, { 63,10}, { 255,11}, { 143,10}, \
- { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
- { 639,11}, { 175,12}, { 95,11}, { 191,10}, \
- { 383,11}, { 207,10}, { 415,11}, { 223,13}, \
- { 63,11}, { 287,10}, { 575,12}, { 159,11}, \
- { 319,10}, { 639,11}, { 351,12}, { 191,11}, \
- { 415,12}, { 223,11}, { 447,10}, { 895,11}, \
- { 479,12}, { 287,11}, { 575,12}, { 351,13}, \
- { 191,12}, { 479,13}, { 255,12}, { 575,13}, \
- { 319,12}, { 703,13}, { 383,12}, { 831,13}, \
- { 447,14}, { 255,13}, { 8192,14}, { 16384,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 121
-#define MUL_FFT_THRESHOLD 4224
-
-#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 240, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
- { 14, 5}, { 29, 7}, { 9, 6}, { 19, 7}, \
- { 13, 6}, { 27, 8}, { 7, 7}, { 21, 8}, \
- { 11, 7}, { 29, 8}, { 19, 9}, { 11, 8}, \
- { 27,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
- { 19, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
- { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
- { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
- { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \
- { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
- { 287,10}, { 79,11}, { 47,12}, { 31,11}, \
- { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
- { 287,11}, { 79,10}, { 159, 9}, { 319,10}, \
- { 175,11}, { 95,10}, { 191, 9}, { 383,10}, \
- { 207, 9}, { 415,11}, { 111,10}, { 223,12}, \
- { 63,11}, { 175,12}, { 95,11}, { 207,13}, \
- { 63,12}, { 127,11}, { 287,12}, { 159,11}, \
- { 351,12}, { 191,11}, { 415,12}, { 223,11}, \
- { 447,13}, { 127,12}, { 351,13}, { 191,12}, \
- { 383,11}, { 767,12}, { 415,11}, { 831,12}, \
- { 447,14}, { 127,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,13}, { 319,12}, { 703,13}, \
- { 383,12}, { 831,13}, { 447,14}, { 255,13}, \
- { 511,12}, { 1023,13}, { 8192,14}, { 16384,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 105
-#define SQR_FFT_THRESHOLD 3968
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 45
-#define MULLO_MUL_N_THRESHOLD 8397
-
-#define DC_DIV_QR_THRESHOLD 47
-#define DC_DIVAPPR_Q_THRESHOLD 168
-#define DC_BDIV_QR_THRESHOLD 47
-#define DC_BDIV_Q_THRESHOLD 110
-
-#define INV_MULMOD_BNM1_THRESHOLD 26
-#define INV_NEWTON_THRESHOLD 189
-#define INV_APPR_THRESHOLD 181
-
-#define BINV_NEWTON_THRESHOLD 196
-#define REDC_1_TO_REDC_N_THRESHOLD 51
-
-#define MU_DIV_QR_THRESHOLD 1558
-#define MU_DIVAPPR_Q_THRESHOLD 1558
-#define MUPI_DIV_QR_THRESHOLD 90
-#define MU_BDIV_QR_THRESHOLD 855
-#define MU_BDIV_Q_THRESHOLD 1078
-
-#define POWM_SEC_TABLE 1,16,90,452,1221
-
-#define MATRIX22_STRASSEN_THRESHOLD 11
-#define HGCD_THRESHOLD 99
-#define HGCD_APPR_THRESHOLD 103
-#define HGCD_REDUCE_THRESHOLD 2899
-#define GCD_DC_THRESHOLD 283
-#define GCDEXT_DC_THRESHOLD 201
-#define JACOBI_BASE_METHOD 3
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 28
-#define SET_STR_DC_THRESHOLD 426
-#define SET_STR_PRECOMPUTE_THRESHOLD 1505
-
-#define FAC_DSC_THRESHOLD 1404
-#define FAC_ODD_THRESHOLD 0 /* always */
+
+/* Generated by tuneup.c, 2009-01-15, gcc 3.4 */
+
+#define MUL_KARATSUBA_THRESHOLD 14
+#define MUL_TOOM3_THRESHOLD 74
+#define MUL_TOOM44_THRESHOLD 118
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_KARATSUBA_THRESHOLD 28
+#define SQR_TOOM3_THRESHOLD 77
+#define SQR_TOOM4_THRESHOLD 136
+
+#define MULLOW_BASECASE_THRESHOLD 0 /* always */
+#define MULLOW_DC_THRESHOLD 44
+#define MULLOW_MUL_N_THRESHOLD 246
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */
+#define DIV_DC_THRESHOLD 53
+#define POWM_THRESHOLD 85
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD_THRESHOLD 104
+#define GCD_DC_THRESHOLD 321
+#define GCDEXT_DC_THRESHOLD 298
+#define JACOBI_BASE_METHOD 3
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1_THRESHOLD 13
+#define MOD_1_2_THRESHOLD 14
+#define MOD_1_4_THRESHOLD 16
+#define USE_PREINV_DIVREM_1 1 /* preinv always */
+#define USE_PREINV_MOD_1 1 /* preinv always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 20
+#define GET_STR_PRECOMPUTE_THRESHOLD 32
+#define SET_STR_DC_THRESHOLD 532
+#define SET_STR_PRECOMPUTE_THRESHOLD 1501
+
+#define MUL_FFT_TABLE { 240, 480, 1344, 1792, 5120, 20480, 81920, 196608, 0 }
+#define MUL_FFT_MODF_THRESHOLD 240
+#define MUL_FFT_THRESHOLD 1920
+
+#define SQR_FFT_TABLE { 240, 480, 1216, 1792, 5120, 12288, 81920, 196608, 0 }
+#define SQR_FFT_MODF_THRESHOLD 208
+#define SQR_FFT_THRESHOLD 1408
+
+/* These tables need to be updated. */
+
+#define MUL_FFT_TABLE2 {{1, 4}, {177, 5}, {193, 4}, {209, 5}, {353, 6}, {385, 5}, {417, 6}, {833, 7}, {897, 6}, {961, 7}, {1025, 6}, {1089, 7}, {1665, 8}, {1793, 7}, {2177, 8}, {2305, 7}, {2433, 8}, {2817, 7}, {2945, 8}, {3329, 9}, {3457, 8}, {4865, 9}, {5633, 8}, {6401, 10}, {7169, 9}, {11777, 10}, {12801, 9}, {13825, 10}, {15361, 9}, {19969, 10}, {23553, 9}, {24065, 11}, {30721, 10}, {48129, 11}, {63489, 10}, {72705, 11}, {96257, 12}, {126977, 11}, {194561, 12}, {258049, 11}, {325633, 12}, {389121, 13}, {516097, 12}, {MP_SIZE_T_MAX,0}}
+
+#define SQR_FFT_TABLE2 {{1, 4}, {177, 5}, {193, 4}, {209, 5}, {353, 6}, {385, 5}, {417, 6}, {961, 7}, {1025, 6}, {1089, 7}, {1153, 6}, {1217, 7}, {1665, 8}, {1793, 7}, {2177, 8}, {2305, 7}, {2561, 8}, {2817, 7}, {2945, 8}, {3329, 9}, {3585, 8}, {5377, 9}, {5633, 8}, {6401, 9}, {6657, 10}, {6913, 9}, {11777, 10}, {13313, 9}, {13825, 10}, {15361, 9}, {18945, 10}, {19457, 9}, {19969, 10}, {23553, 9}, {24065, 11}, {30721, 10}, {48129, 11}, {53249, 10}, {56321, 11}, {63489, 10}, {72705, 11}, {73729, 10}, {79873, 11}, {96257, 12}, {126977, 11}, {194561, 12}, {258049, 11}, {325633, 12}, {389121, 13}, {516097, 12}, {1699841, 13}, {1708033, 12}, {1732609, 13}, {1748993, 12}, {1757185, 13}, {1773569, 12}, {1777665, 13}, {1781761, 12}, {1789953, 13}, {1806337, 12}, {1818625, 13}, {1822721, 12}, {1826817, 13}, {1830913, 12}, {1961985, 13}, {MP_SIZE_T_MAX,0}}
diff --git a/gmp/mpn/alpha/ev5/lshift.asm b/gmp/mpn/alpha/ev5/lshift.asm
new file mode 100644
index 0000000000..04385d3484
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/lshift.asm
@@ -0,0 +1,171 @@
+dnl Alpha EV5 mpn_lshift -- Shift a number left.
+
+dnl Copyright 1994, 1995, 2000, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: 3.25
+C EV6: 1.75
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C n r18
+C cnt r19
+
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ s8addq r18,r17,r17 C make r17 point at end of s1
+ ldq r4,-8(r17) C load first limb
+ subq r31,r19,r20
+ s8addq r18,r16,r16 C make r16 point at end of RES
+ subq r18,1,r18
+ and r18,4-1,r28 C number of limbs in first loop
+ srl r4,r20,r0 C compute function result
+
+ beq r28,$L0
+ subq r18,r28,r18
+
+ ALIGN(8)
+$Loop0: ldq r3,-16(r17)
+ subq r16,8,r16
+ sll r4,r19,r5
+ subq r17,8,r17
+ subq r28,1,r28
+ srl r3,r20,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,0(r16)
+ bne r28,$Loop0
+
+$L0: sll r4,r19,r24
+ beq r18,$Lend
+C warm up phase 1
+ ldq r1,-16(r17)
+ subq r18,4,r18
+ ldq r2,-24(r17)
+ ldq r3,-32(r17)
+ ldq r4,-40(r17)
+ beq r18,$Lend1
+C warm up phase 2
+ srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ ldq r1,-48(r17)
+ sll r2,r19,r22
+ ldq r2,-56(r17)
+ srl r3,r20,r5
+ bis r7,r24,r7
+ sll r3,r19,r23
+ bis r8,r21,r8
+ srl r4,r20,r6
+ ldq r3,-64(r17)
+ sll r4,r19,r24
+ ldq r4,-72(r17)
+ subq r18,4,r18
+ beq r18,$Lend2
+ ALIGN(16)
+C main loop
+$Loop: stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+
+ srl r1,r20,r7
+ subq r18,4,r18
+ sll r1,r19,r21
+ unop C ldq r31,-96(r17)
+
+ srl r2,r20,r8
+ ldq r1,-80(r17)
+ sll r2,r19,r22
+ ldq r2,-88(r17)
+
+ stq r5,-24(r16)
+ bis r7,r24,r7
+ stq r6,-32(r16)
+ bis r8,r21,r8
+
+ srl r3,r20,r5
+ unop C ldq r31,-96(r17)
+ sll r3,r19,r23
+ subq r16,32,r16
+
+ srl r4,r20,r6
+ ldq r3,-96(r17)
+ sll r4,r19,r24
+ ldq r4,-104(r17)
+
+ subq r17,32,r17
+ bne r18,$Loop
+C cool down phase 2/1
+$Lend2: stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+ srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ sll r2,r19,r22
+ stq r5,-24(r16)
+ bis r7,r24,r7
+ stq r6,-32(r16)
+ bis r8,r21,r8
+ srl r3,r20,r5
+ sll r3,r19,r23
+ srl r4,r20,r6
+ sll r4,r19,r24
+C cool down phase 2/2
+ stq r7,-40(r16)
+ bis r5,r22,r5
+ stq r8,-48(r16)
+ bis r6,r23,r6
+ stq r5,-56(r16)
+ stq r6,-64(r16)
+C cool down phase 2/3
+ stq r24,-72(r16)
+ ret r31,(r26),1
+
+C cool down phase 1/1
+$Lend1: srl r1,r20,r7
+ sll r1,r19,r21
+ srl r2,r20,r8
+ sll r2,r19,r22
+ srl r3,r20,r5
+ bis r7,r24,r7
+ sll r3,r19,r23
+ bis r8,r21,r8
+ srl r4,r20,r6
+ sll r4,r19,r24
+C cool down phase 1/2
+ stq r7,-8(r16)
+ bis r5,r22,r5
+ stq r8,-16(r16)
+ bis r6,r23,r6
+ stq r5,-24(r16)
+ stq r6,-32(r16)
+ stq r24,-40(r16)
+ ret r31,(r26),1
+
+$Lend: stq r24,-8(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_lshift)
+ASM_END()
diff --git a/gmp/mpn/alpha/ev5/rshift.asm b/gmp/mpn/alpha/ev5/rshift.asm
new file mode 100644
index 0000000000..0244da35a5
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/rshift.asm
@@ -0,0 +1,169 @@
+dnl Alpha EV5 mpn_rshift -- Shift a number right.
+
+dnl Copyright 1994, 1995, 2000 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: 3.25
+C EV6: 1.75
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C n r18
+C cnt r19
+
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ ldq r4,0(r17) C load first limb
+ subq r31,r19,r20
+ subq r18,1,r18
+ and r18,4-1,r28 C number of limbs in first loop
+ sll r4,r20,r0 C compute function result
+
+ beq r28,$L0
+ subq r18,r28,r18
+
+ ALIGN(8)
+$Loop0: ldq r3,8(r17)
+ addq r16,8,r16
+ srl r4,r19,r5
+ addq r17,8,r17
+ subq r28,1,r28
+ sll r3,r20,r6
+ bis r3,r3,r4
+ bis r5,r6,r8
+ stq r8,-8(r16)
+ bne r28,$Loop0
+
+$L0: srl r4,r19,r24
+ beq r18,$Lend
+C warm up phase 1
+ ldq r1,8(r17)
+ subq r18,4,r18
+ ldq r2,16(r17)
+ ldq r3,24(r17)
+ ldq r4,32(r17)
+ beq r18,$Lend1
+C warm up phase 2
+ sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ ldq r1,40(r17)
+ srl r2,r19,r22
+ ldq r2,48(r17)
+ sll r3,r20,r5
+ bis r7,r24,r7
+ srl r3,r19,r23
+ bis r8,r21,r8
+ sll r4,r20,r6
+ ldq r3,56(r17)
+ srl r4,r19,r24
+ ldq r4,64(r17)
+ subq r18,4,r18
+ beq r18,$Lend2
+ ALIGN(16)
+C main loop
+$Loop: stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+
+ sll r1,r20,r7
+ subq r18,4,r18
+ srl r1,r19,r21
+ unop C ldq r31,-96(r17)
+
+ sll r2,r20,r8
+ ldq r1,72(r17)
+ srl r2,r19,r22
+ ldq r2,80(r17)
+
+ stq r5,16(r16)
+ bis r7,r24,r7
+ stq r6,24(r16)
+ bis r8,r21,r8
+
+ sll r3,r20,r5
+ unop C ldq r31,-96(r17)
+ srl r3,r19,r23
+ addq r16,32,r16
+
+ sll r4,r20,r6
+ ldq r3,88(r17)
+ srl r4,r19,r24
+ ldq r4,96(r17)
+
+ addq r17,32,r17
+ bne r18,$Loop
+C cool down phase 2/1
+$Lend2: stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+ sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ srl r2,r19,r22
+ stq r5,16(r16)
+ bis r7,r24,r7
+ stq r6,24(r16)
+ bis r8,r21,r8
+ sll r3,r20,r5
+ srl r3,r19,r23
+ sll r4,r20,r6
+ srl r4,r19,r24
+C cool down phase 2/2
+ stq r7,32(r16)
+ bis r5,r22,r5
+ stq r8,40(r16)
+ bis r6,r23,r6
+ stq r5,48(r16)
+ stq r6,56(r16)
+C cool down phase 2/3
+ stq r24,64(r16)
+ ret r31,(r26),1
+
+C cool down phase 1/1
+$Lend1: sll r1,r20,r7
+ srl r1,r19,r21
+ sll r2,r20,r8
+ srl r2,r19,r22
+ sll r3,r20,r5
+ bis r7,r24,r7
+ srl r3,r19,r23
+ bis r8,r21,r8
+ sll r4,r20,r6
+ srl r4,r19,r24
+C cool down phase 1/2
+ stq r7,0(r16)
+ bis r5,r22,r5
+ stq r8,8(r16)
+ bis r6,r23,r6
+ stq r5,16(r16)
+ stq r6,24(r16)
+ stq r24,32(r16)
+ ret r31,(r26),1
+
+$Lend: stq r24,0(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_rshift)
+ASM_END()
diff --git a/gmp/mpn/alpha/ev5/sub_n.asm b/gmp/mpn/alpha/ev5/sub_n.asm
new file mode 100644
index 0000000000..2c25fad400
--- /dev/null
+++ b/gmp/mpn/alpha/ev5/sub_n.asm
@@ -0,0 +1,146 @@
+dnl Alpha EV5 mpn_sub_n -- Subtract two limb vectors of the same length > 0
+dnl and store difference in a third limb vector.
+
+dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: 4.75
+C EV6: 3
+
+dnl INPUT PARAMETERS
+dnl res_ptr r16
+dnl s1_ptr r17
+dnl s2_ptr r18
+dnl size r19
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ bis r31,r31,r25 C clear cy
+ subq r19,4,r19 C decr loop cnt
+ blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
+C Start software pipeline for 1st loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ ldq r1,8(r18)
+ ldq r5,8(r17)
+ addq r17,32,r17 C update s1_ptr
+ ldq r2,16(r18)
+ subq r4,r0,r20 C 1st main subtract
+ ldq r3,24(r18)
+ subq r19,4,r19 C decr loop cnt
+ ldq r6,-16(r17)
+ cmpult r4,r0,r25 C compute cy from last subtract
+ ldq r7,-8(r17)
+ subq r5,r1,r28 C 2nd main subtract
+ addq r18,32,r18 C update s2_ptr
+ subq r28,r25,r21 C 2nd carry subtract
+ cmpult r5,r1,r8 C compute cy from last subtract
+ blt r19,$Lend1 C if less than 4 limbs remain, jump
+C 1st loop handles groups of 4 limbs in a software pipeline
+ ALIGN(16)
+$Loop: cmpult r28,r25,r25 C compute cy from last subtract
+ ldq r0,0(r18)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ ldq r1,8(r18)
+ subq r6,r2,r28 C 3rd main subtract
+ ldq r4,0(r17)
+ subq r28,r25,r22 C 3rd carry subtract
+ ldq r5,8(r17)
+ cmpult r6,r2,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ stq r21,8(r16)
+ subq r7,r3,r28 C 4th main subtract
+ subq r28,r25,r23 C 4th carry subtract
+ cmpult r7,r3,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ addq r17,32,r17 C update s1_ptr
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,32,r16 C update res_ptr
+ subq r4,r0,r28 C 1st main subtract
+ ldq r2,16(r18)
+ subq r28,r25,r20 C 1st carry subtract
+ ldq r3,24(r18)
+ cmpult r4,r0,r8 C compute cy from last subtract
+ ldq r6,-16(r17)
+ cmpult r28,r25,r25 C compute cy from last subtract
+ ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r19,4,r19 C decr loop cnt
+ stq r22,-16(r16)
+ subq r5,r1,r28 C 2nd main subtract
+ stq r23,-8(r16)
+ subq r28,r25,r21 C 2nd carry subtract
+ addq r18,32,r18 C update s2_ptr
+ cmpult r5,r1,r8 C compute cy from last subtract
+ bge r19,$Loop
+C Finish software pipeline for 1st loop
+$Lend1: cmpult r28,r25,r25 C compute cy from last subtract
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r6,r2,r28 C cy add
+ subq r28,r25,r22 C 3rd main subtract
+ cmpult r6,r2,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ stq r21,8(r16)
+ subq r7,r3,r28 C cy add
+ subq r28,r25,r23 C 4th main subtract
+ cmpult r7,r3,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,32,r16 C update res_ptr
+ stq r22,-16(r16)
+ stq r23,-8(r16)
+$Lend2: addq r19,4,r19 C restore loop cnt
+ beq r19,$Lret
+C Start software pipeline for 2nd loop
+ ldq r0,0(r18)
+ ldq r4,0(r17)
+ subq r19,1,r19
+ beq r19,$Lend0
+C 2nd loop handles remaining 1-3 limbs
+ ALIGN(16)
+$Loop0: subq r4,r0,r28 C main subtract
+ cmpult r4,r0,r8 C compute cy from last subtract
+ ldq r0,8(r18)
+ ldq r4,8(r17)
+ subq r28,r25,r20 C carry subtract
+ addq r18,8,r18
+ addq r17,8,r17
+ stq r20,0(r16)
+ cmpult r28,r25,r25 C compute cy from last subtract
+ subq r19,1,r19 C decr loop cnt
+ bis r8,r25,r25 C combine cy from the two subtracts
+ addq r16,8,r16
+ bne r19,$Loop0
+$Lend0: subq r4,r0,r28 C main subtract
+ subq r28,r25,r20 C carry subtract
+ cmpult r4,r0,r8 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
+ stq r20,0(r16)
+ bis r8,r25,r25 C combine cy from the two subtracts
+
+$Lret: bis r25,r31,r0 C return cy
+ ret r31,(r26),1
+EPILOGUE(mpn_sub_n)
+ASM_END()
diff --git a/gmp/mpn/alpha/ev6/add_n.asm b/gmp/mpn/alpha/ev6/add_n.asm
index 9261f31b8a..114af73aa0 100644
--- a/gmp/mpn/alpha/ev6/add_n.asm
+++ b/gmp/mpn/alpha/ev6/add_n.asm
@@ -4,30 +4,19 @@ dnl store sum in a third limb vector.
dnl Copyright 2000, 2003, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev6/aorslsh1_n.asm b/gmp/mpn/alpha/ev6/aorslsh1_n.asm
deleted file mode 100644
index cb966ce021..0000000000
--- a/gmp/mpn/alpha/ev6/aorslsh1_n.asm
+++ /dev/null
@@ -1,172 +0,0 @@
-dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
-
-dnl Copyright 2003, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: ?
-C EV5: 7
-C EV6: 4
-
-C TODO
-C * Tune to reach 3.75 c/l on ev6.
-
-define(`rp',`r16')
-define(`up',`r17')
-define(`vp',`r18')
-define(`n', `r19')
-
-define(`u0', `r8')
-define(`u1', `r1')
-define(`v0', `r4')
-define(`v1', `r5')
-
-define(`cy0', `r0')
-define(`cy1', `r20')
-define(`cy', `r22')
-define(`rr', `r24')
-define(`ps', `r25')
-define(`sl', `r28')
-
-ifdef(`OPERATION_addlsh1_n',`
- define(ADDSUB, addq)
- define(CARRY, `cmpult $1,$2,$3')
- define(func, mpn_addlsh1_n)
-')
-ifdef(`OPERATION_sublsh1_n',`
- define(ADDSUB, subq)
- define(CARRY, `cmpult $2,$1,$3')
- define(func, mpn_sublsh1_n)
-')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
-PROLOGUE(func)
- and n, 2, cy0
- blbs n, L(bx1)
-L(bx0): ldq v1, 0(vp)
- ldq u1, 0(up)
- lda r2, 0(r31)
- bne cy0, L(b10)
-
-L(b00): lda vp, 48(vp)
- lda up, -16(up)
- lda rp, -8(rp)
- lda cy0, 0(r31)
- br r31, L(lo0)
-
-L(b10): lda vp, 32(vp)
- lda rp, 8(rp)
- lda cy0, 0(r31)
- br r31, L(lo2)
-
-L(bx1): ldq v0, 0(vp)
- ldq u0, 0(up)
- lda r3, 0(r31)
- beq cy0, L(b01)
-
-L(b11): lda vp, 40(vp)
- lda up, -24(up)
- lda rp, 16(rp)
- lda cy1, 0(r31)
- br r31, L(lo3)
-
-L(b01): lda n, -4(n)
- lda cy1, 0(r31)
- ble n, L(end)
- lda vp, 24(vp)
- lda up, -8(up)
-
- ALIGN(16)
-L(top): addq v0, v0, r6
- ldq v1, -16(vp)
- addq r6, r3, sl C combined vlimb
- ldq u1, 16(up)
- ADDSUB u0, sl, ps C ulimb + (vlimb << 1)
- cmplt v0, r31, r2 C high v bits
- ADDSUB ps, cy1, rr C consume carry from previous operation
- CARRY( ps, u0, cy0) C carry out #2
- stq rr, 0(rp)
- CARRY( rr, ps, cy) C carry out #3
- lda vp, 32(vp) C bookkeeping
- addq cy, cy0, cy0 C final carry out
-L(lo0): addq v1, v1, r7
- ldq v0, -40(vp)
- addq r7, r2, sl
- ldq u0, 24(up)
- ADDSUB u1, sl, ps
- cmplt v1, r31, r3
- ADDSUB ps, cy0, rr
- CARRY( ps, u1, cy1)
- stq rr, 8(rp)
- CARRY( rr, ps, cy)
- lda rp, 32(rp) C bookkeeping
- addq cy, cy1, cy1
-L(lo3): addq v0, v0, r6
- ldq v1, -32(vp)
- addq r6, r3, sl
- ldq u1, 32(up)
- ADDSUB u0, sl, ps
- cmplt v0, r31, r2
- ADDSUB ps, cy1, rr
- CARRY( ps, u0, cy0)
- stq rr, -16(rp)
- CARRY( rr, ps, cy)
- lda up, 32(up) C bookkeeping
- addq cy, cy0, cy0
-L(lo2): addq v1, v1, r7
- ldq v0, -24(vp)
- addq r7, r2, sl
- ldq u0, 8(up)
- ADDSUB u1, sl, ps
- cmplt v1, r31, r3
- ADDSUB ps, cy0, rr
- CARRY( ps, u1, cy1)
- stq rr, -8(rp)
- CARRY( rr, ps, cy)
- lda n, -4(n) C bookkeeping
- addq cy, cy1, cy1
- bgt n, L(top)
-
-L(end): addq v0, v0, r6
- addq r6, r3, sl
- ADDSUB u0, sl, ps
- cmplt v0, r31, r2
- ADDSUB ps, cy1, rr
- CARRY( ps, u0, cy0)
- stq rr, 0(rp)
- CARRY( rr, ps, cy)
- addq cy, cy0, cy0
- addq cy0, r2, r0
-
- ret r31,(r26),1
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/alpha/ev6/aorsmul_1.asm b/gmp/mpn/alpha/ev6/aorsmul_1.asm
index 0e68e6e7ad..eda092b2d5 100644
--- a/gmp/mpn/alpha/ev6/aorsmul_1.asm
+++ b/gmp/mpn/alpha/ev6/aorsmul_1.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 mpn_addmul_1 and mpn_submul_1.
-dnl Copyright 2000, 2003-2005, 2008 Free Software Foundation, Inc.
+dnl Copyright 2000, 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev6/gmp-mparam.h b/gmp/mpn/alpha/ev6/gmp-mparam.h
index e51d6b0d15..a01e977433 100644
--- a/gmp/mpn/alpha/ev6/gmp-mparam.h
+++ b/gmp/mpn/alpha/ev6/gmp-mparam.h
@@ -1,209 +1,76 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2008-2010, 2014 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
#define DIVEXACT_BY3_METHOD 0 /* override ../diveby3.asm */
-/* 500 MHz 21164 (agnesi.math.su.se) */
-/* FFT tuning limit = 20000000 */
-/* Generated by tuneup.c, 2014-03-14, gcc 3.3 */
-
-#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
-#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1_1P_METHOD 2
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 21
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
-#define USE_PREINV_DIVREM_1 1 /* preinv always */
-#define DIV_QR_1N_PI1_METHOD 2
-#define DIV_QR_1_NORM_THRESHOLD 5
-#define DIV_QR_1_UNNORM_THRESHOLD 1
-#define DIV_QR_2_PI2_THRESHOLD 8
-#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 20
-
-#define MUL_TOOM22_THRESHOLD 32
-#define MUL_TOOM33_THRESHOLD 117
-#define MUL_TOOM44_THRESHOLD 124
-#define MUL_TOOM6H_THRESHOLD 230
-#define MUL_TOOM8H_THRESHOLD 357
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 107
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 88
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 105
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 136
-
-#define SQR_BASECASE_THRESHOLD 0 /* always */
-#define SQR_TOOM2_THRESHOLD 59
-#define SQR_TOOM3_THRESHOLD 123
-#define SQR_TOOM4_THRESHOLD 163
-#define SQR_TOOM6_THRESHOLD 333
-#define SQR_TOOM8_THRESHOLD 0 /* always */
-
-#define MULMID_TOOM42_THRESHOLD 52
-
-#define MULMOD_BNM1_THRESHOLD 19
-#define SQRMOD_BNM1_THRESHOLD 5
-
-#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 468, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 11, 5}, { 23, 6}, { 19, 7}, { 10, 6}, \
- { 24, 7}, { 13, 6}, { 27, 7}, { 14, 6}, \
- { 29, 7}, { 17, 6}, { 35, 7}, { 29, 8}, \
- { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \
- { 19, 7}, { 39, 8}, { 29, 9}, { 15, 8}, \
- { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 51, 9}, { 27, 8}, { 55, 9}, { 35, 8}, \
- { 71, 9}, { 39,10}, { 23, 9}, { 55,10}, \
- { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
- { 47, 9}, { 95,10}, { 55,11}, { 31,10}, \
- { 79,11}, { 47,10}, { 103,12}, { 31,11}, \
- { 63,10}, { 135,11}, { 79,10}, { 167,11}, \
- { 95,10}, { 199,11}, { 111,12}, { 63,11}, \
- { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \
- { 319,12}, { 95,11}, { 191,10}, { 383,11}, \
- { 207,13}, { 63,12}, { 127,11}, { 255,10}, \
- { 511,11}, { 271,10}, { 543,11}, { 287,10}, \
- { 575,12}, { 159,11}, { 319,10}, { 639,11}, \
- { 335,10}, { 671,11}, { 351,10}, { 703,12}, \
- { 191,11}, { 383,10}, { 767,11}, { 415,12}, \
- { 223,11}, { 447,13}, { 127,12}, { 255,11}, \
- { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \
- { 607,12}, { 319,11}, { 671,12}, { 351,11}, \
- { 703,13}, { 191,12}, { 383,11}, { 767,12}, \
- { 415,11}, { 831,12}, { 447,14}, { 127,13}, \
- { 255,12}, { 575,11}, { 1151,12}, { 607,13}, \
- { 319,12}, { 735,13}, { 383,12}, { 767,11}, \
- { 1535,12}, { 831,13}, { 447,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
- { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \
- { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
- { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \
- { 511,13}, { 1215,14}, { 639,13}, { 1407,14}, \
- { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \
- { 511,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
- { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
- {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 151
-#define MUL_FFT_THRESHOLD 5760
-
-#define SQR_FFT_MODF_THRESHOLD 412 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 412, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 11, 5}, { 23, 6}, { 12, 5}, { 25, 6}, \
- { 27, 7}, { 14, 6}, { 29, 7}, { 28, 8}, \
- { 15, 7}, { 31, 8}, { 17, 7}, { 36, 8}, \
- { 19, 7}, { 39, 8}, { 29, 9}, { 15, 8}, \
- { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 49, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
- { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
- { 95,10}, { 55,11}, { 31,10}, { 79,11}, \
- { 47,10}, { 95,12}, { 31,11}, { 63,10}, \
- { 127, 9}, { 255,11}, { 79,10}, { 159, 9}, \
- { 319,10}, { 167,11}, { 95,10}, { 191, 9}, \
- { 383,11}, { 111,12}, { 63,11}, { 127,10}, \
- { 271,11}, { 143,10}, { 287, 9}, { 575,10}, \
- { 303,11}, { 159,10}, { 319,12}, { 95,11}, \
- { 191,10}, { 383,11}, { 207,13}, { 63,12}, \
- { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
- { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
- { 159,11}, { 319,10}, { 639,11}, { 335,10}, \
- { 671,11}, { 351,10}, { 703,11}, { 367,12}, \
- { 191,11}, { 383,10}, { 767,11}, { 415,12}, \
- { 223,11}, { 447,13}, { 127,12}, { 255,11}, \
- { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \
- { 607,12}, { 319,11}, { 639,10}, { 1279,11}, \
- { 671,12}, { 351,11}, { 703,13}, { 191,12}, \
- { 383,11}, { 767,12}, { 415,11}, { 831,12}, \
- { 447,11}, { 895,12}, { 479,14}, { 127,13}, \
- { 255,12}, { 575,11}, { 1151,12}, { 607,13}, \
- { 319,12}, { 703,11}, { 1407,12}, { 735,13}, \
- { 383,12}, { 831,13}, { 447,12}, { 959,14}, \
- { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
- { 1151,13}, { 639,12}, { 1279,13}, { 703,12}, \
- { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
- { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \
- { 511,13}, { 1215,14}, { 639,13}, { 1407,14}, \
- { 767,13}, { 1663,14}, { 895,13}, { 1791,15}, \
- { 511,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
- { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
- {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 159
-#define SQR_FFT_THRESHOLD 5056
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 100
-#define MULLO_MUL_N_THRESHOLD 11355
-
-#define DC_DIV_QR_THRESHOLD 124
-#define DC_DIVAPPR_Q_THRESHOLD 438
-#define DC_BDIV_QR_THRESHOLD 153
-#define DC_BDIV_Q_THRESHOLD 318
-
-#define INV_MULMOD_BNM1_THRESHOLD 62
-#define INV_NEWTON_THRESHOLD 384
-#define INV_APPR_THRESHOLD 402
-
-#define BINV_NEWTON_THRESHOLD 381
-#define REDC_1_TO_REDC_N_THRESHOLD 110
-
-#define MU_DIV_QR_THRESHOLD 1752
-#define MU_DIVAPPR_Q_THRESHOLD 1895
-#define MUPI_DIV_QR_THRESHOLD 174
-#define MU_BDIV_QR_THRESHOLD 1387
-#define MU_BDIV_Q_THRESHOLD 1787
-
-#define POWM_SEC_TABLE 1,13,66,82,579
-
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 318
-#define HGCD_APPR_THRESHOLD 363
-#define HGCD_REDUCE_THRESHOLD 2384
-#define GCD_DC_THRESHOLD 2504
-#define GCDEXT_DC_THRESHOLD 671
-#define JACOBI_BASE_METHOD 3
-
-#define GET_STR_DC_THRESHOLD 14
-#define GET_STR_PRECOMPUTE_THRESHOLD 25
-#define SET_STR_DC_THRESHOLD 3754
-#define SET_STR_PRECOMPUTE_THRESHOLD 8097
-
-#define FAC_DSC_THRESHOLD 951
-#define FAC_ODD_THRESHOLD 24
+/* 500 MHz 21164 */
+
+/* Generated by tuneup.c, 2009-01-12, gcc 3.3 */
+
+#define MUL_KARATSUBA_THRESHOLD 31
+#define MUL_TOOM3_THRESHOLD 101
+#define MUL_TOOM44_THRESHOLD 168
+
+#define SQR_BASECASE_THRESHOLD 6
+#define SQR_KARATSUBA_THRESHOLD 60
+#define SQR_TOOM3_THRESHOLD 102
+#define SQR_TOOM4_THRESHOLD 172
+
+#define MULLOW_BASECASE_THRESHOLD 0 /* always */
+#define MULLOW_DC_THRESHOLD 102
+#define MULLOW_MUL_N_THRESHOLD 399
+
+#define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */
+#define DIV_DC_THRESHOLD 134
+#define POWM_THRESHOLD 257
+
+#define MATRIX22_STRASSEN_THRESHOLD 19
+#define HGCD_THRESHOLD 303
+#define GCD_DC_THRESHOLD 1258
+#define GCDEXT_DC_THRESHOLD 807
+#define JACOBI_BASE_METHOD 3
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1_THRESHOLD 13
+#define MOD_1_2_THRESHOLD 14
+#define MOD_1_4_THRESHOLD 40
+#define USE_PREINV_DIVREM_1 1 /* preinv always */
+#define USE_PREINV_MOD_1 1 /* preinv always */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
+
+#define GET_STR_DC_THRESHOLD 16
+#define GET_STR_PRECOMPUTE_THRESHOLD 23
+#define SET_STR_DC_THRESHOLD 4615
+#define SET_STR_PRECOMPUTE_THRESHOLD 8178
+
+#define MUL_FFT_TABLE { 432, 864, 1856, 3840, 11264, 28672, 81920, 327680, 0 }
+#define MUL_FFT_MODF_THRESHOLD 448
+#define MUL_FFT_THRESHOLD 4992
+
+#define SQR_FFT_TABLE { 432, 864, 1728, 3840, 9216, 20480, 81920, 327680, 786432, 0 }
+#define SQR_FFT_MODF_THRESHOLD 344
+#define SQR_FFT_THRESHOLD 3712
diff --git a/gmp/mpn/alpha/ev6/mod_1_4.asm b/gmp/mpn/alpha/ev6/mod_1_4.asm
deleted file mode 100644
index 836de07c0f..0000000000
--- a/gmp/mpn/alpha/ev6/mod_1_4.asm
+++ /dev/null
@@ -1,337 +0,0 @@
-dnl Alpha mpn_mod_1s_4p
-
-dnl Contributed to the GNU project by Torbjorn Granlund.
-
-dnl Copyright 2009, 2010 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C TODO:
-C * Optimise. 2.75 c/l should be possible.
-C * Write a proper mpn_mod_1s_4p_cps. The code below was compiler generated.
-C * Optimise feed-in code, starting the sw pipeline in switch code.
-C * Shorten software pipeline. The mul instructions are scheduled too far
-C from their users. Fixing this will allow us to use fewer registers.
-C * If we cannot reduce register usage, write perhaps small-n basecase.
-C * Does this work for PIC?
-
-C cycles/limb
-C EV4: ?
-C EV5: 23
-C EV6: 3
-
-define(`ap', `r16')
-define(`n', `r17')
-define(`pl', `r24')
-define(`ph', `r25')
-define(`rl', `r6')
-define(`rh', `r7')
-define(`B1modb', `r1')
-define(`B2modb', `r2')
-define(`B3modb', `r3')
-define(`B4modb', `r4')
-define(`B5modb', `r5')
-
-ASM_START()
-PROLOGUE(mpn_mod_1s_4p)
- lda r30, -64(r30)
- stq r9, 8(r30)
- ldq B1modb, 16(r19)
- stq r10, 16(r30)
- ldq B2modb, 24(r19)
- stq r11, 24(r30)
- ldq B3modb, 32(r19)
- stq r12, 32(r30)
- ldq B4modb, 40(r19)
- stq r13, 40(r30)
- ldq B5modb, 48(r19)
- s8addq n, ap, ap C point ap at vector end
-
- and n, 3, r0
- lda n, -4(n)
- beq r0, L(b0)
- lda r6, -2(r0)
- blt r6, L(b1)
- beq r6, L(b2)
-
-L(b3): ldq r21, -16(ap)
- ldq r22, -8(ap)
- ldq r20, -24(ap)
- mulq r21, B1modb, r8
- umulh r21, B1modb, r12
- mulq r22, B2modb, r9
- umulh r22, B2modb, r13
- addq r8, r20, pl
- cmpult pl, r8, r0
- addq r0, r12, ph
- addq r9, pl, rl
- cmpult rl, r9, r0
- addq r13, ph, ph
- addq r0, ph, rh
- lda ap, -56(ap)
- br L(com)
-
-L(b0): ldq r21, -24(ap)
- ldq r22, -16(ap)
- ldq r23, -8(ap)
- ldq r20, -32(ap)
- mulq r21, B1modb, r8
- umulh r21, B1modb, r12
- mulq r22, B2modb, r9
- umulh r22, B2modb, r13
- mulq r23, B3modb, r10
- umulh r23, B3modb, r27
- addq r8, r20, pl
- cmpult pl, r8, r0
- addq r0, r12, ph
- addq r9, pl, pl
- cmpult pl, r9, r0
- addq r13, ph, ph
- addq r0, ph, ph
- addq r10, pl, rl
- cmpult rl, r10, r0
- addq r27, ph, ph
- addq r0, ph, rh
- lda ap, -64(ap)
- br L(com)
-
-L(b1): bis r31, r31, rh
- ldq rl, -8(ap)
- lda ap, -40(ap)
- br L(com)
-
-L(b2): ldq rh, -8(ap)
- ldq rl, -16(ap)
- lda ap, -48(ap)
-
-L(com): ble n, L(ed3)
- ldq r21, 8(ap)
- ldq r22, 16(ap)
- ldq r23, 24(ap)
- ldq r20, 0(ap)
- lda n, -4(n)
- lda ap, -32(ap)
- mulq r21, B1modb, r8
- umulh r21, B1modb, r12
- mulq r22, B2modb, r9
- umulh r22, B2modb, r13
- mulq r23, B3modb, r10
- umulh r23, B3modb, r27
- mulq rl, B4modb, r11
- umulh rl, B4modb, r28
- ble n, L(ed2)
-
- ALIGN(16)
-L(top): ldq r21, 8(ap)
- mulq rh, B5modb, rl
- addq r8, r20, pl
- ldq r22, 16(ap)
- cmpult pl, r8, r0
- umulh rh, B5modb, rh
- ldq r23, 24(ap)
- addq r0, r12, ph
- addq r9, pl, pl
- mulq r21, B1modb, r8
- cmpult pl, r9, r0
- addq r13, ph, ph
- umulh r21, B1modb, r12
- lda ap, -32(ap)
- addq r0, ph, ph
- addq r10, pl, pl
- mulq r22, B2modb, r9
- cmpult pl, r10, r0
- addq r27, ph, ph
- addq r11, pl, pl
- umulh r22, B2modb, r13
- addq r0, ph, ph
- cmpult pl, r11, r0
- addq r28, ph, ph
- mulq r23, B3modb, r10
- ldq r20, 32(ap)
- addq pl, rl, rl
- umulh r23, B3modb, r27
- addq r0, ph, ph
- cmpult rl, pl, r0
- mulq rl, B4modb, r11
- addq ph, rh, rh
- umulh rl, B4modb, r28
- addq r0, rh, rh
- lda n, -4(n)
- bgt n, L(top)
-
-L(ed2): mulq rh, B5modb, rl
- addq r8, r20, pl
- umulh rh, B5modb, rh
- cmpult pl, r8, r0
- addq r0, r12, ph
- addq r9, pl, pl
- cmpult pl, r9, r0
- addq r13, ph, ph
- addq r0, ph, ph
- addq r10, pl, pl
- cmpult pl, r10, r0
- addq r27, ph, ph
- addq r11, pl, pl
- addq r0, ph, ph
- cmpult pl, r11, r0
- addq r28, ph, ph
- addq pl, rl, rl
- addq r0, ph, ph
- cmpult rl, pl, r0
- addq ph, rh, rh
- addq r0, rh, rh
-
-L(ed3): mulq rh, B1modb, r8
- umulh rh, B1modb, rh
- addq r8, rl, rl
- cmpult rl, r8, r0
- addq r0, rh, rh
-
- ldq r24, 8(r19) C cnt
- sll rh, r24, rh
- subq r31, r24, r25
- srl rl, r25, r2
- sll rl, r24, rl
- or r2, rh, rh
-
- ldq r23, 0(r19) C bi
- mulq rh, r23, r8
- umulh rh, r23, r9
- addq rh, 1, r7
- addq r8, rl, r8 C ql
- cmpult r8, rl, r0
- addq r9, r7, r9
- addq r0, r9, r9 C qh
- mulq r9, r18, r21 C qh * b
- subq rl, r21, rl
- cmpult r8, rl, r0 C rl > ql
- negq r0, r0
- and r0, r18, r0
- addq rl, r0, rl
- cmpule r18, rl, r0 C rl >= b
- negq r0, r0
- and r0, r18, r0
- subq rl, r0, rl
-
- srl rl, r24, r0
-
- ldq r9, 8(r30)
- ldq r10, 16(r30)
- ldq r11, 24(r30)
- ldq r12, 32(r30)
- ldq r13, 40(r30)
- lda r30, 64(r30)
- ret r31, (r26), 1
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1s_4p_cps,gp)
- lda r30, -32(r30)
- stq r26, 0(r30)
- stq r9, 8(r30)
- stq r10, 16(r30)
- stq r11, 24(r30)
- mov r16, r11
- LEA( r4, __clz_tab)
- lda r10, 65(r31)
- cmpbge r31, r17, r1
- srl r1, 1, r1
- xor r1, 127, r1
- addq r1, r4, r1
- ldq_u r2, 0(r1)
- extbl r2, r1, r2
- s8subq r2, 7, r2
- srl r17, r2, r3
- subq r10, r2, r10
- addq r3, r4, r3
- ldq_u r1, 0(r3)
- extbl r1, r3, r1
- subq r10, r1, r10
- sll r17, r10, r9
- mov r9, r16
- jsr r26, mpn_invert_limb
- ldah r29, 0(r26)
- subq r31, r10, r2
- lda r1, 1(r31)
- sll r1, r10, r1
- subq r31, r9, r3
- srl r0, r2, r2
- ldq r26, 0(r30)
- bis r2, r1, r2
- lda r29, 0(r29)
- stq r0, 0(r11)
- stq r10, 8(r11)
- mulq r2, r3, r2
- srl r2, r10, r3
- umulh r2, r0, r1
- stq r3, 16(r11)
- mulq r2, r0, r3
- ornot r31, r1, r1
- subq r1, r2, r1
- mulq r1, r9, r1
- addq r1, r9, r2
- cmpule r1, r3, r3
- cmoveq r3, r2, r1
- srl r1, r10, r3
- umulh r1, r0, r2
- stq r3, 24(r11)
- mulq r1, r0, r3
- ornot r31, r2, r2
- subq r2, r1, r2
- mulq r2, r9, r2
- addq r2, r9, r1
- cmpule r2, r3, r3
- cmoveq r3, r1, r2
- srl r2, r10, r1
- umulh r2, r0, r3
- stq r1, 32(r11)
- mulq r2, r0, r1
- ornot r31, r3, r3
- subq r3, r2, r3
- mulq r3, r9, r3
- addq r3, r9, r2
- cmpule r3, r1, r1
- cmoveq r1, r2, r3
- srl r3, r10, r2
- umulh r3, r0, r1
- stq r2, 40(r11)
- mulq r3, r0, r0
- ornot r31, r1, r1
- subq r1, r3, r1
- mulq r1, r9, r1
- addq r1, r9, r9
- cmpule r1, r0, r0
- cmoveq r0, r9, r1
- ldq r9, 8(r30)
- srl r1, r10, r1
- ldq r10, 16(r30)
- stq r1, 48(r11)
- ldq r11, 24(r30)
- lda r30, 32(r30)
- ret r31, (r26), 1
-EPILOGUE()
diff --git a/gmp/mpn/alpha/ev6/mul_1.asm b/gmp/mpn/alpha/ev6/mul_1.asm
index 8ee19cd429..841f5083cb 100644
--- a/gmp/mpn/alpha/ev6/mul_1.asm
+++ b/gmp/mpn/alpha/ev6/mul_1.asm
@@ -4,30 +4,19 @@ dnl result in a second limb vector.
dnl Copyright 2000, 2001, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -60,7 +49,7 @@ C r20,r29,r13-r15 scramble
C
C We're doing 7 of the 8 carry propagations with a br fixup code and 1 with a
C put-the-carry-into-hi. The idea is that these branches are very rarely
-C taken, and since a non-taken branch consumes no resources, that is better
+C taken, and since a non-taken branch consumes no resurces, that is better
C than an addq.
C
C Software pipeline: a load in cycle #09, feeds a mul in cycle #16, feeds an
@@ -137,7 +126,7 @@ $L_9_or_more:
mulq r2,r19,r3 C r3 = prod_low
umulh r2,r19,r21 C r21 = prod_high
beq r20,$Le1b C jump if size was == 1
- bis r31, r31, r0 C FIXME: shouldn't need this
+ bis r31, r31, r0 C FIXME: shouldtn't need this
ldq r2,0(r17) C r2 = s1_limb
lda r17,8(r17) C s1_ptr++
lda r20,-1(r20) C size--
diff --git a/gmp/mpn/alpha/ev6/nails/README b/gmp/mpn/alpha/ev6/nails/README
index b214ac50ad..8b3b357a77 100644
--- a/gmp/mpn/alpha/ev6/nails/README
+++ b/gmp/mpn/alpha/ev6/nails/README
@@ -2,29 +2,18 @@ Copyright 2002, 2005 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/.
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_1.asm b/gmp/mpn/alpha/ev6/nails/addmul_1.asm
index 711d4e66e5..149195c6f4 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_1.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_1.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_addmul_1.
dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -36,7 +25,7 @@ C EV5: 18
C EV6: 4
C TODO
-C * Reroll loop for 3.75 c/l with current 4-way unrolling.
+C * Reroll loop for 3.75 c/l with current 4-way unrulling.
C * The loop is overscheduled wrt loads and wrt multiplies, in particular
C umulh.
C * Use FP loop count and multiple exit points, that would simplify feed-in lp0
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_2.asm b/gmp/mpn/alpha/ev6/nails/addmul_2.asm
index 6ff6b3ad6b..9edaed8b3a 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_2.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_2.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_addmul_2.
dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_3.asm b/gmp/mpn/alpha/ev6/nails/addmul_3.asm
index a1ffb680ec..1d89769e13 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_3.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_3.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_addmul_3.
dnl Copyright 2002, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev6/nails/addmul_4.asm b/gmp/mpn/alpha/ev6/nails/addmul_4.asm
index 77e02a4316..f19b0232df 100644
--- a/gmp/mpn/alpha/ev6/nails/addmul_4.asm
+++ b/gmp/mpn/alpha/ev6/nails/addmul_4.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_addmul_4.
dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev6/nails/aors_n.asm b/gmp/mpn/alpha/ev6/nails/aors_n.asm
index f6586773f5..4958e81ed9 100644
--- a/gmp/mpn/alpha/ev6/nails/aors_n.asm
+++ b/gmp/mpn/alpha/ev6/nails/aors_n.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_add_n and mpn_sub_n.
dnl Copyright 2002, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl Runs at 2.5 cycles/limb. It would be possible to reach 2.0 cycles/limb
diff --git a/gmp/mpn/alpha/ev6/nails/gmp-mparam.h b/gmp/mpn/alpha/ev6/nails/gmp-mparam.h
index 7949fe8df8..1bc93b52c6 100644
--- a/gmp/mpn/alpha/ev6/nails/gmp-mparam.h
+++ b/gmp/mpn/alpha/ev6/nails/gmp-mparam.h
@@ -1,43 +1,33 @@
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
/* Generated by tuneup.c, 2004-02-07, gcc 3.3 */
-#define MUL_TOOM22_THRESHOLD 40
-#define MUL_TOOM33_THRESHOLD 236
+#define MUL_KARATSUBA_THRESHOLD 40
+#define MUL_TOOM3_THRESHOLD 236
#define SQR_BASECASE_THRESHOLD 7 /* karatsuba */
-#define SQR_TOOM2_THRESHOLD 0 /* never sqr_basecase */
+#define SQR_KARATSUBA_THRESHOLD 0 /* never sqr_basecase */
#define SQR_TOOM3_THRESHOLD 120
#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* no preinv with nails */
diff --git a/gmp/mpn/alpha/ev6/nails/mul_1.asm b/gmp/mpn/alpha/ev6/nails/mul_1.asm
index da2ee3d099..cac3776ba0 100644
--- a/gmp/mpn/alpha/ev6/nails/mul_1.asm
+++ b/gmp/mpn/alpha/ev6/nails/mul_1.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_mul_1.
dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -36,10 +25,10 @@ C EV5: 18
C EV6: 3.25
C TODO
-C * Reroll loop for 3.0 c/l with current 4-way unrolling.
+C * Reroll loop for 3.0 c/l with current 4-way unrulling.
C * The loop is overscheduled wrt loads and wrt multiplies, in particular
C umulh.
-C * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C * Use FP loop count and multiple exit points, that would simpily feed-in lp0
C and would work since the loop structure is really regular.
C INPUT PARAMETERS
diff --git a/gmp/mpn/alpha/ev6/nails/submul_1.asm b/gmp/mpn/alpha/ev6/nails/submul_1.asm
index f473a59ba8..4242517a4a 100644
--- a/gmp/mpn/alpha/ev6/nails/submul_1.asm
+++ b/gmp/mpn/alpha/ev6/nails/submul_1.asm
@@ -1,32 +1,21 @@
dnl Alpha ev6 nails mpn_submul_1.
dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
@@ -36,10 +25,10 @@ C EV5: 18
C EV6: 4
C TODO
-C * Reroll loop for 3.75 c/l with current 4-way unrolling.
+C * Reroll loop for 3.75 c/l with current 4-way unrulling.
C * The loop is overscheduled wrt loads and wrt multiplies, in particular
C umulh.
-C * Use FP loop count and multiple exit points, that would simplify feed-in lp0
+C * Use FP loop count and multiple exit points, that would simpily feed-in lp0
C and would work since the loop structure is really regular.
C INPUT PARAMETERS
diff --git a/gmp/mpn/alpha/ev6/slot.pl b/gmp/mpn/alpha/ev6/slot.pl
index a4c8a36882..17967e79a2 100755..100644
--- a/gmp/mpn/alpha/ev6/slot.pl
+++ b/gmp/mpn/alpha/ev6/slot.pl
@@ -1,32 +1,21 @@
#!/usr/bin/perl -w
-# Copyright 2000, 2001, 2003-2005, 2011 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of either:
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published
+# by the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
#
-# * the GNU Lesser General Public License as published by the Free
-# Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
#
-# or
-#
-# * the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any
-# later version.
-#
-# or both in parallel, as here.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# for more details.
-#
-# You should have received copies of the GNU General Public License and the
-# GNU Lesser General Public License along with the GNU MP Library. If not,
-# see https://www.gnu.org/licenses/.
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
# Usage: slot.pl [filename.o]...
@@ -51,12 +40,9 @@ my %optable =
(
'addq' => 'E',
'and' => 'E',
- 'andnot' => 'E',
'beq' => 'U',
'bge' => 'U',
'bgt' => 'U',
- 'bic' => 'E',
- 'bis' => 'E',
'blt' => 'U',
'bne' => 'U',
'br' => 'L',
@@ -85,7 +71,6 @@ my %optable =
'ldt' => 'L',
'ret' => 'L',
'mov' => 'E',
- 'mull' => 'U',
'mulq' => 'U',
'negq' => 'E',
'nop' => 'E',
diff --git a/gmp/mpn/alpha/ev6/sqr_diagonal.asm b/gmp/mpn/alpha/ev6/sqr_diagonal.asm
new file mode 100644
index 0000000000..58d086e624
--- /dev/null
+++ b/gmp/mpn/alpha/ev6/sqr_diagonal.asm
@@ -0,0 +1,115 @@
+dnl Alpha mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: ?
+C EV5: ?
+C EV6: 2.3
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C n r18
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ lda r18, -2(r18) C n -= 2
+ ldq r0, 0(r17)
+ mulq r0, r0, r4
+ umulh r0, r0, r20
+ blt r18, L(ex1)
+ ldq r1, 8(r17)
+ mulq r1, r1, r5
+ umulh r1, r1, r21
+ beq r18, L(ex2)
+ lda r18, -2(r18) C n -= 2
+ ldq r0, 16(r17)
+ blt r18, L(ex3)
+ ldq r1, 24(r17)
+ beq r18, L(ex4)
+
+ ALIGN(16)
+L(top): lda r18, -2(r18) C n -= 2
+ stq r4, 0(r16)
+ mulq r0, r0, r4
+ stq r20, 8(r16)
+ umulh r0, r0, r20
+ ldq r0, 32(r17)
+ blt r18, L(x)
+ stq r5, 16(r16)
+ mulq r1, r1, r5
+ stq r21, 24(r16)
+ umulh r1, r1, r21
+ ldq r1, 40(r17)
+ lda r16, 32(r16) C rp += 4
+ lda r17, 16(r17) C up += 2
+ bne r18, L(top)
+
+ ALIGN(16)
+L(ex4): stq r4, 0(r16)
+ mulq r0, r0, r4
+ stq r20, 8(r16)
+ umulh r0, r0, r20
+ stq r5, 16(r16)
+ mulq r1, r1, r5
+ stq r21, 24(r16)
+ umulh r1, r1, r21
+ stq r4, 32(r16)
+ stq r20, 40(r16)
+ stq r5, 48(r16)
+ stq r21, 56(r16)
+ ret r31, (r26), 1
+ ALIGN(16)
+L(x): stq r5, 16(r16)
+ mulq r1, r1, r5
+ stq r21, 24(r16)
+ umulh r1, r1, r21
+ stq r4, 32(r16)
+ mulq r0, r0, r4
+ stq r20, 40(r16)
+ umulh r0, r0, r20
+ stq r5, 48(r16)
+ stq r21, 56(r16)
+ stq r4, 64(r16)
+ stq r20, 72(r16)
+ ret r31, (r26), 1
+L(ex1): stq r4, 0(r16)
+ stq r20, 8(r16)
+ ret r31, (r26), 1
+ ALIGN(16)
+L(ex2): stq r4, 0(r16)
+ stq r20, 8(r16)
+ stq r5, 16(r16)
+ stq r21, 24(r16)
+ ret r31, (r26), 1
+ ALIGN(16)
+L(ex3): stq r4, 0(r16)
+ mulq r0, r0, r4
+ stq r20, 8(r16)
+ umulh r0, r0, r20
+ stq r5, 16(r16)
+ stq r21, 24(r16)
+ stq r4, 32(r16)
+ stq r20, 40(r16)
+ ret r31, (r26), 1
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/alpha/ev6/sub_n.asm b/gmp/mpn/alpha/ev6/sub_n.asm
index a35ba40d34..f23ad44a15 100644
--- a/gmp/mpn/alpha/ev6/sub_n.asm
+++ b/gmp/mpn/alpha/ev6/sub_n.asm
@@ -4,30 +4,19 @@ dnl and store difference in a third limb vector.
dnl Copyright 2000, 2003, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev67/gcd_1.asm b/gmp/mpn/alpha/ev67/gcd_1.asm
index 55fa7d3673..2e6f0a5e22 100644
--- a/gmp/mpn/alpha/ev67/gcd_1.asm
+++ b/gmp/mpn/alpha/ev67/gcd_1.asm
@@ -4,29 +4,18 @@ dnl Copyright 2003, 2004 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev67/hamdist.asm b/gmp/mpn/alpha/ev67/hamdist.asm
index 4b13e9f14f..a72d95e90b 100644
--- a/gmp/mpn/alpha/ev67/hamdist.asm
+++ b/gmp/mpn/alpha/ev67/hamdist.asm
@@ -4,29 +4,18 @@ dnl Copyright 2003, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/ev67/popcount.asm b/gmp/mpn/alpha/ev67/popcount.asm
index 049c1cd239..6ed79cf158 100644
--- a/gmp/mpn/alpha/ev67/popcount.asm
+++ b/gmp/mpn/alpha/ev67/popcount.asm
@@ -4,29 +4,18 @@ dnl Copyright 2003, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/gmp-mparam.h b/gmp/mpn/alpha/gmp-mparam.h
index b850bd24b5..6b6d7bd9c8 100644
--- a/gmp/mpn/alpha/gmp-mparam.h
+++ b/gmp/mpn/alpha/gmp-mparam.h
@@ -1,54 +1,43 @@
/* Alpha EV4 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2009 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2009
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#define GMP_LIMB_BITS 64
-#define GMP_LIMB_BYTES 8
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
/* 175MHz 21064 */
/* Generated by tuneup.c, 2009-01-15, gcc 3.2 */
-#define MUL_TOOM22_THRESHOLD 12
-#define MUL_TOOM33_THRESHOLD 69
+#define MUL_KARATSUBA_THRESHOLD 12
+#define MUL_TOOM3_THRESHOLD 69
#define MUL_TOOM44_THRESHOLD 88
#define SQR_BASECASE_THRESHOLD 4
-#define SQR_TOOM2_THRESHOLD 20
+#define SQR_KARATSUBA_THRESHOLD 20
#define SQR_TOOM3_THRESHOLD 62
#define SQR_TOOM4_THRESHOLD 155
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 40
-#define MULLO_MUL_N_THRESHOLD 202
+#define MULLOW_BASECASE_THRESHOLD 0 /* always */
+#define MULLOW_DC_THRESHOLD 40
+#define MULLOW_MUL_N_THRESHOLD 202
#define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */
#define DIV_DC_THRESHOLD 38
diff --git a/gmp/mpn/alpha/invert_limb.asm b/gmp/mpn/alpha/invert_limb.asm
index afc010f58c..99f51a30d5 100644
--- a/gmp/mpn/alpha/invert_limb.asm
+++ b/gmp/mpn/alpha/invert_limb.asm
@@ -1,95 +1,342 @@
dnl Alpha mpn_invert_limb -- Invert a normalized limb.
-dnl Copyright 1996, 2000-2003, 2007, 2011, 2013 Free Software Foundation, Inc.
-
+dnl Copyright 1996, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
+dnl Inc.
+dnl
dnl This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C EV4: ?
-C EV5: 137/140 (with BWX/without BWX)
-C EV6: 71/72 (with BWX/without BWX)
+C EV4: ~175
+C EV5: ~111-126
+C EV6: ~52-76
-C This was compiler generated, with minimal manual edits. Surely several
-C cycles could be cut with some thought.
+C This is based on ideas of Peter L. Montgomery.
ASM_START()
+
+FLOAT64($C36,9223372036854775808.0) C 2^63
+
PROLOGUE(mpn_invert_limb,gp)
- LEA( r2, approx_tab)
- srl r16, 54, r1
- srl r16, 24, r4
- and r16, 1, r5
- bic r1, 1, r7
- lda r4, 1(r4)
- srl r16, 1, r3
- addq r7, r2, r1
-ifelse(bwx_available_p,1,`
- ldwu r0, -512(r1)
-',`
- ldq_u r0, -512(r1)
- extwl r0, r7, r0
-')
- addq r3, r5, r3
- mull r0, r0, r1
- sll r0, 11, r0
- mulq r1, r4, r1
- srl r1, 40, r1
- subq r0, r1, r0
- lda r0, -1(r0)
- mulq r0, r0, r2
- sll r0, 60, r1
- sll r0, 13, r0
- mulq r2, r4, r2
- subq r1, r2, r1
- srl r1, 47, r1
- addq r0, r1, r0
- mulq r0, r3, r3
- srl r0, 1, r1
- cmoveq r5, 0, r1
- subq r1, r3, r1
- umulh r1, r0, r3
- sll r0, 31, r0
- srl r3, 1, r1
- addq r0, r1, r0
- mulq r0, r16, r2
- umulh r0, r16, r3
- addq r2, r16, r1
- addq r3, r16, r16
- cmpult r1, r2, r1
- addq r16, r1, r3
- subq r0, r3, r0
- ret r31, (r26), 1
-EPILOGUE()
-DATASTART(approx_tab,8)
-forloop(i,256,512-1,dnl
-` .word eval(0x7fd00/i)
-')dnl
- SIZE(approx_tab, 512)
- TYPE(approx_tab, object)
+ lda r30,-16(r30)
+ addq r16,r16,r1
+ bne r1,$73
+ lda r0,-1
+ br r31,$Lend
+$73:
+ srl r16,1,r1
+ stq r1,0(r30)
+ ldt f11,0(r30)
+ cvtqt f11,f1
+ LEA(r1,$C36)
+ ldt f10,0(r1) C f10 = 2^63
+ divt f10,f1,f10 C f10 = 2^63 / (u / 2)
+ LEA(r2,$invtab-4096)
+ srl r16,52,r1 C extract high 12 bits
+ addq r1,r1,r1 C align ...0000bbbbbbbb0
+ addq r1,r2,r1 C compute array offset
+ ldq_u r2,0(r1) C load quadword containing our 16 bits
+bigend(`addq r1,1,r1')
+ extwl r2,r1,r2 C extract desired 16 bits
+ sll r2,48,r0
+ umulh r16,r0,r1
+ addq r16,r1,r3
+ stq r3,0(r30)
+ ldt f11,0(r30)
+ cvtqt f11,f1
+ mult f1,f10,f1
+ cvttqc f1,f1
+ stt f1,0(r30)
+ ldq r4,0(r30)
+ subq r0,r4,r0
+ umulh r16,r0,r1
+ mulq r16,r0,r2
+ addq r16,r1,r3
+ bge r3,$Loop2
+$Loop1: addq r2,r16,r2
+ cmpult r2,r16,r1
+ addq r3,r1,r3
+ addq r0,1,r0
+ blt r3,$Loop1
+$Loop2: cmpult r2,r16,r1
+ subq r0,1,r0
+ subq r3,r1,r3
+ subq r2,r16,r2
+ bge r3,$Loop2
+$Lend:
+ lda r30,16(r30)
+ ret r31,(r26),1
+EPILOGUE(mpn_invert_limb)
+DATASTART($invtab)
+ .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
+ .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
+ .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
+ .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
+ .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
+ .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
+ .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
+ .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
+ .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
+ .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
+ .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
+ .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
+ .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
+ .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
+ .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
+ .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
+ .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
+ .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
+ .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
+ .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
+ .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
+ .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
+ .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
+ .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
+ .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
+ .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
+ .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
+ .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
+ .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
+ .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
+ .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
+ .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
+ .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
+ .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
+ .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
+ .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
+ .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
+ .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
+ .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
+ .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
+ .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
+ .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
+ .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
+ .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
+ .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
+ .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
+ .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
+ .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
+ .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
+ .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
+ .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
+ .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
+ .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
+ .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
+ .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
+ .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
+ .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
+ .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
+ .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
+ .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
+ .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
+ .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
+ .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
+ .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
+ .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
+ .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
+ .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
+ .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
+ .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
+ .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
+ .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
+ .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
+ .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
+ .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
+ .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
+ .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
+ .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
+ .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
+ .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
+ .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
+ .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
+ .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
+ .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
+ .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
+ .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
+ .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
+ .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
+ .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
+ .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
+ .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
+ .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
+ .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
+ .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
+ .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
+ .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
+ .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
+ .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
+ .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
+ .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
+ .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
+ .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
+ .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
+ .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
+ .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
+ .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
+ .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
+ .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
+ .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
+ .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
+ .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
+ .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
+ .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
+ .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
+ .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
+ .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
+ .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
+ .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
+ .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
+ .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
+ .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
+ .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
+ .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
+ .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
+ .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
+ .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
+ .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
+ .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
+ .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
+ .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
+ .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
+ .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
+ .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
+ .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
+ .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
+ .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
+ .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
+ .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
+ .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
+ .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
+ .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
+ .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
+ .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
+ .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
+ .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
+ .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
+ .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
+ .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
+ .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
+ .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
+ .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
+ .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
+ .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
+ .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
+ .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
+ .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
+ .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
+ .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
+ .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
+ .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
+ .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
+ .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
+ .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
+ .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
+ .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
+ .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
+ .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
+ .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
+ .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
+ .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
+ .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
+ .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
+ .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
+ .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
+ .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
+ .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
+ .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
+ .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
+ .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
+ .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
+ .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
+ .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
+ .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
+ .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
+ .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
+ .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
+ .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
+ .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
+ .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
+ .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
+ .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
+ .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
+ .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
+ .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
+ .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
+ .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
+ .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
+ .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
+ .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
+ .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
+ .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
+ .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
+ .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
+ .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
+ .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
+ .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
+ .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
+ .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
+ .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
+ .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
+ .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
+ .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
+ .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
+ .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
+ .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
+ .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
+ .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
+ .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
+ .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
+ .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
+ .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
+ .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
+ .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
+ .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
+ .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
+ .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
+ .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
+ .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
+ .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
+ .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
+ .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
+ .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
+ .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
+ .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
+ .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
+ .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
+ .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
+ .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
+ .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
+ .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
+ .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
+ .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
+ .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
+ .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
+ .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
+ .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
+ .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
+ .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
+ .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
+ .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
+ .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
+ .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
+ .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
+ .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
+ .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
+ .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
+ .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
DATAEND()
ASM_END()
diff --git a/gmp/mpn/alpha/lshift.asm b/gmp/mpn/alpha/lshift.asm
index c62a856aea..eb5b2a0b68 100644
--- a/gmp/mpn/alpha/lshift.asm
+++ b/gmp/mpn/alpha/lshift.asm
@@ -1,39 +1,28 @@
dnl Alpha mpn_lshift -- Shift a number left.
-dnl Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc.
+dnl Copyright 1994, 1995, 2000, 2002, 2003 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C EV4: ?
-C EV5: 3.25
-C EV6: 1.75
+C EV4: 4.75
+C EV5: 4
+C EV6: 2
C INPUT PARAMETERS
C rp r16
@@ -46,137 +35,63 @@ ASM_START()
PROLOGUE(mpn_lshift)
s8addq r18,r17,r17 C make r17 point at end of s1
ldq r4,-8(r17) C load first limb
- subq r31,r19,r20
+ subq r17,8,r17
+ subq r31,r19,r7
s8addq r18,r16,r16 C make r16 point at end of RES
subq r18,1,r18
- and r18,4-1,r28 C number of limbs in first loop
- srl r4,r20,r0 C compute function result
+ and r18,4-1,r20 C number of limbs in first loop
+ srl r4,r7,r0 C compute function result
- beq r28,L(L0)
- subq r18,r28,r18
+ beq r20,$L0
+ subq r18,r20,r18
ALIGN(8)
-L(top0):
- ldq r3,-16(r17)
+$Loop0: ldq r3,-8(r17)
subq r16,8,r16
- sll r4,r19,r5
subq r17,8,r17
- subq r28,1,r28
- srl r3,r20,r6
+ subq r20,1,r20
+ sll r4,r19,r5
+ srl r3,r7,r6
bis r3,r3,r4
bis r5,r6,r8
stq r8,0(r16)
- bne r28,L(top0)
+ bne r20,$Loop0
-L(L0): sll r4,r19,r24
- beq r18,L(end)
-C warm up phase 1
- ldq r1,-16(r17)
- subq r18,4,r18
- ldq r2,-24(r17)
- ldq r3,-32(r17)
- ldq r4,-40(r17)
-C warm up phase 2
- srl r1,r20,r7
- sll r1,r19,r21
- srl r2,r20,r8
- beq r18,L(end1)
- ldq r1,-48(r17)
- sll r2,r19,r22
- ldq r2,-56(r17)
- srl r3,r20,r5
- bis r7,r24,r7
- sll r3,r19,r23
- bis r8,r21,r8
- srl r4,r20,r6
- ldq r3,-64(r17)
- sll r4,r19,r24
- ldq r4,-72(r17)
- subq r18,4,r18
- beq r18,L(end2)
- ALIGN(16)
-C main loop
-L(top): stq r7,-8(r16)
- bis r5,r22,r5
- stq r8,-16(r16)
- bis r6,r23,r6
-
- srl r1,r20,r7
- subq r18,4,r18
- sll r1,r19,r21
- unop C ldq r31,-96(r17)
-
- srl r2,r20,r8
- ldq r1,-80(r17)
- sll r2,r19,r22
- ldq r2,-88(r17)
-
- stq r5,-24(r16)
- bis r7,r24,r7
- stq r6,-32(r16)
- bis r8,r21,r8
-
- srl r3,r20,r5
- unop C ldq r31,-96(r17)
- sll r3,r19,r23
+$L0: beq r18,$Lend
+
+ ALIGN(8)
+$Loop: ldq r3,-8(r17)
subq r16,32,r16
+ subq r18,4,r18
+ sll r4,r19,r5
+ srl r3,r7,r6
+
+ ldq r4,-16(r17)
+ sll r3,r19,r1
+ bis r5,r6,r8
+ stq r8,24(r16)
+ srl r4,r7,r2
+
+ ldq r3,-24(r17)
+ sll r4,r19,r5
+ bis r1,r2,r8
+ stq r8,16(r16)
+ srl r3,r7,r6
- srl r4,r20,r6
- ldq r3,-96(r17)
- sll r4,r19,r24
- ldq r4,-104(r17)
+ ldq r4,-32(r17)
+ sll r3,r19,r1
+ bis r5,r6,r8
+ stq r8,8(r16)
+ srl r4,r7,r2
subq r17,32,r17
- bne r18,L(top)
-C cool down phase 2/1
-L(end2):
- stq r7,-8(r16)
- bis r5,r22,r5
- stq r8,-16(r16)
- bis r6,r23,r6
- srl r1,r20,r7
- sll r1,r19,r21
- srl r2,r20,r8
- sll r2,r19,r22
- stq r5,-24(r16)
- bis r7,r24,r7
- stq r6,-32(r16)
- bis r8,r21,r8
- srl r3,r20,r5
- sll r3,r19,r23
- srl r4,r20,r6
- sll r4,r19,r24
-C cool down phase 2/2
- stq r7,-40(r16)
- bis r5,r22,r5
- stq r8,-48(r16)
- bis r6,r23,r6
- stq r5,-56(r16)
- stq r6,-64(r16)
-C cool down phase 2/3
- stq r24,-72(r16)
- ret r31,(r26),1
+ bis r1,r2,r8
+ stq r8,0(r16)
-C cool down phase 1/1
-L(end1):
- sll r2,r19,r22
- srl r3,r20,r5
- bis r7,r24,r7
- sll r3,r19,r23
- bis r8,r21,r8
- srl r4,r20,r6
- sll r4,r19,r24
-C cool down phase 1/2
- stq r7,-8(r16)
- bis r5,r22,r5
- stq r8,-16(r16)
- bis r6,r23,r6
- stq r5,-24(r16)
- stq r6,-32(r16)
- stq r24,-40(r16)
- ret r31,(r26),1
+ bgt r18,$Loop
-L(end): stq r24,-8(r16)
+$Lend: sll r4,r19,r8
+ stq r8,-8(r16)
ret r31,(r26),1
EPILOGUE(mpn_lshift)
ASM_END()
diff --git a/gmp/mpn/alpha/mod_34lsub1.asm b/gmp/mpn/alpha/mod_34lsub1.asm
index 1b03b637d8..e5c1d221f9 100644
--- a/gmp/mpn/alpha/mod_34lsub1.asm
+++ b/gmp/mpn/alpha/mod_34lsub1.asm
@@ -3,30 +3,19 @@ dnl Alpha mpn_mod_34lsub1.
dnl Copyright 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/mode1o.asm b/gmp/mpn/alpha/mode1o.asm
index 96dccc73ee..0611cd8acb 100644
--- a/gmp/mpn/alpha/mode1o.asm
+++ b/gmp/mpn/alpha/mode1o.asm
@@ -1,32 +1,21 @@
dnl Alpha mpn_modexact_1c_odd -- mpn exact remainder
dnl Copyright 2003, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/mul_1.asm b/gmp/mpn/alpha/mul_1.asm
index a7cdbcf8eb..30b17021ba 100644
--- a/gmp/mpn/alpha/mul_1.asm
+++ b/gmp/mpn/alpha/mul_1.asm
@@ -4,30 +4,19 @@ dnl the result in a second limb vector.
dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/rshift.asm b/gmp/mpn/alpha/rshift.asm
index 6e1e214558..ccedff8071 100644
--- a/gmp/mpn/alpha/rshift.asm
+++ b/gmp/mpn/alpha/rshift.asm
@@ -1,39 +1,28 @@
dnl Alpha mpn_rshift -- Shift a number right.
-dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc.
+dnl Copyright 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C EV4: ?
-C EV5: 3.25
-C EV6: 1.75
+C EV4: 4.75
+C EV5: 3.75
+C EV6: 2
C INPUT PARAMETERS
C rp r16
@@ -45,136 +34,62 @@ C cnt r19
ASM_START()
PROLOGUE(mpn_rshift)
ldq r4,0(r17) C load first limb
- subq r31,r19,r20
+ addq r17,8,r17
+ subq r31,r19,r7
subq r18,1,r18
- and r18,4-1,r28 C number of limbs in first loop
- sll r4,r20,r0 C compute function result
+ and r18,4-1,r20 C number of limbs in first loop
+ sll r4,r7,r0 C compute function result
- beq r28,L(L0)
- subq r18,r28,r18
+ beq r20,$L0
+ subq r18,r20,r18
ALIGN(8)
-L(top0):
- ldq r3,8(r17)
+$Loop0: ldq r3,0(r17)
addq r16,8,r16
- srl r4,r19,r5
addq r17,8,r17
- subq r28,1,r28
- sll r3,r20,r6
+ subq r20,1,r20
+ srl r4,r19,r5
+ sll r3,r7,r6
bis r3,r3,r4
bis r5,r6,r8
stq r8,-8(r16)
- bne r28,L(top0)
+ bne r20,$Loop0
-L(L0): srl r4,r19,r24
- beq r18,L(end)
-C warm up phase 1
- ldq r1,8(r17)
- subq r18,4,r18
- ldq r2,16(r17)
- ldq r3,24(r17)
- ldq r4,32(r17)
-C warm up phase 2
- sll r1,r20,r7
- srl r1,r19,r21
- sll r2,r20,r8
- beq r18,L(end1)
- ldq r1,40(r17)
- srl r2,r19,r22
- ldq r2,48(r17)
- sll r3,r20,r5
- bis r7,r24,r7
- srl r3,r19,r23
- bis r8,r21,r8
- sll r4,r20,r6
- ldq r3,56(r17)
- srl r4,r19,r24
- ldq r4,64(r17)
- subq r18,4,r18
- beq r18,L(end2)
- ALIGN(16)
-C main loop
-L(top): stq r7,0(r16)
- bis r5,r22,r5
- stq r8,8(r16)
- bis r6,r23,r6
-
- sll r1,r20,r7
- subq r18,4,r18
- srl r1,r19,r21
- unop C ldq r31,-96(r17)
-
- sll r2,r20,r8
- ldq r1,72(r17)
- srl r2,r19,r22
- ldq r2,80(r17)
-
- stq r5,16(r16)
- bis r7,r24,r7
- stq r6,24(r16)
- bis r8,r21,r8
-
- sll r3,r20,r5
- unop C ldq r31,-96(r17)
- srl r3,r19,r23
+$L0: beq r18,$Lend
+
+ ALIGN(8)
+$Loop: ldq r3,0(r17)
addq r16,32,r16
+ subq r18,4,r18
+ srl r4,r19,r5
+ sll r3,r7,r6
+
+ ldq r4,8(r17)
+ srl r3,r19,r1
+ bis r5,r6,r8
+ stq r8,-32(r16)
+ sll r4,r7,r2
+
+ ldq r3,16(r17)
+ srl r4,r19,r5
+ bis r1,r2,r8
+ stq r8,-24(r16)
+ sll r3,r7,r6
- sll r4,r20,r6
- ldq r3,88(r17)
- srl r4,r19,r24
- ldq r4,96(r17)
+ ldq r4,24(r17)
+ srl r3,r19,r1
+ bis r5,r6,r8
+ stq r8,-16(r16)
+ sll r4,r7,r2
addq r17,32,r17
- bne r18,L(top)
-C cool down phase 2/1
-L(end2):
- stq r7,0(r16)
- bis r5,r22,r5
- stq r8,8(r16)
- bis r6,r23,r6
- sll r1,r20,r7
- srl r1,r19,r21
- sll r2,r20,r8
- srl r2,r19,r22
- stq r5,16(r16)
- bis r7,r24,r7
- stq r6,24(r16)
- bis r8,r21,r8
- sll r3,r20,r5
- srl r3,r19,r23
- sll r4,r20,r6
- srl r4,r19,r24
-C cool down phase 2/2
- stq r7,32(r16)
- bis r5,r22,r5
- stq r8,40(r16)
- bis r6,r23,r6
- stq r5,48(r16)
- stq r6,56(r16)
-C cool down phase 2/3
- stq r24,64(r16)
- ret r31,(r26),1
+ bis r1,r2,r8
+ stq r8,-8(r16)
-C cool down phase 1/1
-L(end1):
- srl r2,r19,r22
- sll r3,r20,r5
- bis r7,r24,r7
- srl r3,r19,r23
- bis r8,r21,r8
- sll r4,r20,r6
- srl r4,r19,r24
-C cool down phase 1/2
- stq r7,0(r16)
- bis r5,r22,r5
- stq r8,8(r16)
- bis r6,r23,r6
- stq r5,16(r16)
- stq r6,24(r16)
- stq r24,32(r16)
- ret r31,(r26),1
+ bgt r18,$Loop
-L(end): stq r24,0(r16)
+$Lend: srl r4,r19,r8
+ stq r8,0(r16)
ret r31,(r26),1
EPILOGUE(mpn_rshift)
ASM_END()
diff --git a/gmp/mpn/alpha/sec_tabselect.asm b/gmp/mpn/alpha/sec_tabselect.asm
deleted file mode 100644
index 679b16926e..0000000000
--- a/gmp/mpn/alpha/sec_tabselect.asm
+++ /dev/null
@@ -1,137 +0,0 @@
-dnl Alpha mpn_sec_tabselect.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2011-2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: ?
-C EV5: 2.25
-C EV6: 1.64
-
-define(`rp', `r16')
-define(`tp', `r17')
-define(`n', `r18')
-define(`nents', `r19')
-define(`which', `r20')
-
-define(`i', `r21')
-define(`j', `r22')
-define(`stride', `r23')
-define(`mask', `r24')
-define(`k', `r25')
-
-
-ASM_START()
-PROLOGUE(mpn_sec_tabselect)
- subq n, 4, j C outer loop induction variable
-
- blt j, L(outer_end)
-L(outer_top):
- mov tp, r8
- lda r0, 0(r31)
- lda r1, 0(r31)
- lda r2, 0(r31)
- lda r3, 0(r31)
- subq j, 4, j C outer loop induction variable
- subq nents, which, k
- mov nents, i
-
- ALIGN(16)
-L(top): ldq r4, 0(tp)
- ldq r5, 8(tp)
- cmpeq k, i, mask
- subq i, 1, i
- subq r31, mask, mask
- ldq r6, 16(tp)
- ldq r7, 24(tp)
- and r4, mask, r4
- and r5, mask, r5
- or r0, r4, r0
- or r1, r5, r1
- and r6, mask, r6
- and r7, mask, r7
- or r2, r6, r2
- or r3, r7, r3
- s8addq n, tp, tp
- bne i, L(top)
-
- stq r0, 0(rp)
- stq r1, 8(rp)
- stq r2, 16(rp)
- stq r3, 24(rp)
- addq r8, 32, tp
- addq rp, 32, rp
- bge j, L(outer_top)
-L(outer_end):
-
- and n, 2, r0
- beq r0, L(b0x)
-L(b1x): mov tp, r8
- lda r0, 0(r31)
- lda r1, 0(r31)
- subq nents, which, k
- mov nents, i
- ALIGN(16)
-L(tp2): ldq r4, 0(tp)
- ldq r5, 8(tp)
- cmpeq k, i, mask
- subq i, 1, i
- subq r31, mask, mask
- and r4, mask, r4
- and r5, mask, r5
- or r0, r4, r0
- or r1, r5, r1
- s8addq n, tp, tp
- bne i, L(tp2)
- stq r0, 0(rp)
- stq r1, 8(rp)
- addq r8, 16, tp
- addq rp, 16, rp
-
-L(b0x): and n, 1, r0
- beq r0, L(b00)
-L(b01): lda r0, 0(r31)
- subq nents, which, k
- mov nents, i
- ALIGN(16)
-L(tp1): ldq r4, 0(tp)
- cmpeq k, i, mask
- subq i, 1, i
- subq r31, mask, mask
- and r4, mask, r4
- or r0, r4, r0
- s8addq n, tp, tp
- bne i, L(tp1)
- stq r0, 0(rp)
-
-L(b00): ret r31, (r26), 1
-EPILOGUE()
diff --git a/gmp/mpn/alpha/sqr_diag_addlsh1.asm b/gmp/mpn/alpha/sqr_diag_addlsh1.asm
deleted file mode 100644
index ee219ef7e8..0000000000
--- a/gmp/mpn/alpha/sqr_diag_addlsh1.asm
+++ /dev/null
@@ -1,93 +0,0 @@
-dnl Alpha mpn_sqr_diag_addlsh1.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C EV4: ?
-C EV5: 10.2
-C EV6: 4.5
-
-C Ideally, one-way code could run at 9 c/l (limited by mulq+umulh) on ev5 and
-C about 3.75 c/l on ev6. Two-way code could run at about 3.25 c/l on ev6.
-
-C Algorithm: We allow ourselves to propagate carry to a product high word
-C without worrying for carry out, since (B-1)^2 = B^2-2B+1 has a high word of
-C B-2, i.e, will not spill. We propagate carry similarly to a product low word
-C since the problem value B-1 is a quadratic non-residue mod B, but our
-C products are squares.
-
-define(`rp', `r16')
-define(`tp', `r17')
-define(`up', `r18')
-define(`n', `r19')
-
-ASM_START()
-PROLOGUE(mpn_sqr_diag_addlsh1)
- ldq r0, 0(up)
- bis r31, r31, r21
- bis r31, r31, r3
- mulq r0, r0, r7
- stq r7, 0(rp)
- umulh r0, r0, r6
- lda n, -1(n)
-
- ALIGN(16)
-L(top): ldq r0, 8(up)
- lda up, 8(up)
- ldq r8, 0(tp)
- ldq r20, 8(tp)
- mulq r0, r0, r7
- lda tp, 16(tp)
- sll r8, 1, r23
- srl r8, 63, r22
- or r21, r23, r23
- sll r20, 1, r24
- addq r3, r6, r6 C cannot carry per comment above
- or r22, r24, r24
- addq r23, r6, r21
- umulh r0, r0, r6
- cmpult r21, r23, r1
- addq r1, r7, r7 C cannot carry per comment above
- stq r21, 8(rp)
- addq r24, r7, r22
- stq r22, 16(rp)
- lda n, -1(n)
- cmpult r22, r7, r3
- srl r20, 63, r21
- lda rp, 16(rp)
- bne n, L(top)
-
- addq r3, r6, r6 C cannot carry per comment above
- addq r21, r6, r21
- stq r21, 8(rp)
- ret r31, (r26), 1
-EPILOGUE()
-ASM_END()
diff --git a/gmp/mpn/alpha/sqr_diagonal.asm b/gmp/mpn/alpha/sqr_diagonal.asm
new file mode 100644
index 0000000000..2aa7f2e597
--- /dev/null
+++ b/gmp/mpn/alpha/sqr_diagonal.asm
@@ -0,0 +1,65 @@
+dnl Alpha mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C EV4: 42
+C EV5: 18
+C EV6: 3.45
+
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C n r18
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ ldq r2,0(r17) C r2 = s1_limb
+ lda r18,-2(r18) C size -= 2
+ mulq r2,r2,r3 C r3 = prod_low
+ umulh r2,r2,r4 C r4 = prod_high
+ blt r18,$Lend1 C jump if size was == 1
+ ldq r2,8(r17) C r2 = s1_limb
+ beq r18,$Lend2 C jump if size was == 2
+
+ ALIGN(8)
+$Loop: stq r3,0(r16)
+ mulq r2,r2,r3 C r3 = prod_low
+ lda r18,-1(r18) C size--
+ stq r4,8(r16)
+ umulh r2,r2,r4 C r4 = cy_limb
+ ldq r2,16(r17) C r2 = s1_limb
+ lda r17,8(r17) C s1_ptr++
+ lda r16,16(r16) C res_ptr++
+ bne r18,$Loop
+
+$Lend2: stq r3,0(r16)
+ mulq r2,r2,r3 C r3 = prod_low
+ stq r4,8(r16)
+ umulh r2,r2,r4 C r4 = cy_limb
+ stq r3,16(r16)
+ stq r4,24(r16)
+ ret r31,(r26),1
+$Lend1: stq r3,0(r16)
+ stq r4,8(r16)
+ ret r31,(r26),1
+EPILOGUE(mpn_sqr_diagonal)
+ASM_END()
diff --git a/gmp/mpn/alpha/sub_n.asm b/gmp/mpn/alpha/sub_n.asm
index 1bb72263f8..842a4f0b54 100644
--- a/gmp/mpn/alpha/sub_n.asm
+++ b/gmp/mpn/alpha/sub_n.asm
@@ -1,164 +1,117 @@
-dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
-dnl and store difference in a third limb vector.
+dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
-dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
+dnl Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
-C EV4: ?
-C EV5: 4.75
-C EV6: 3
+C EV4: 7.75
+C EV5: 5.75
+C EV6: 4
-dnl INPUT PARAMETERS
-dnl res_ptr r16
-dnl s1_ptr r17
-dnl s2_ptr r18
-dnl size r19
+C INPUT PARAMETERS
+C rp r16
+C up r17
+C vp r18
+C n r19
ASM_START()
-PROLOGUE(mpn_sub_nc)
- bis r31,r20,r25
- br L(com)
-EPILOGUE()
PROLOGUE(mpn_sub_n)
- bis r31,r31,r25 C clear cy
-L(com): subq r19,4,r19 C decr loop cnt
- blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
-C Start software pipeline for 1st loop
- ldq r0,0(r18)
- ldq r4,0(r17)
- ldq r1,8(r18)
- ldq r5,8(r17)
- addq r17,32,r17 C update s1_ptr
- subq r4,r0,r28 C 1st main subtract
- ldq r2,16(r18)
- subq r28,r25,r20 C 1st carry subtract
- ldq r3,24(r18)
- cmpult r4,r0,r8 C compute cy from last subtract
- ldq r6,-16(r17)
- cmpult r28,r25,r25 C compute cy from last subtract
- ldq r7,-8(r17)
- bis r8,r25,r25 C combine cy from the two subtracts
- subq r19,4,r19 C decr loop cnt
- subq r5,r1,r28 C 2nd main subtract
- addq r18,32,r18 C update s2_ptr
- subq r28,r25,r21 C 2nd carry subtract
- cmpult r5,r1,r8 C compute cy from last subtract
- blt r19,$Lend1 C if less than 4 limbs remain, jump
-C 1st loop handles groups of 4 limbs in a software pipeline
- ALIGN(16)
-$Loop: cmpult r28,r25,r25 C compute cy from last subtract
- ldq r0,0(r18)
- bis r8,r25,r25 C combine cy from the two subtracts
- ldq r1,8(r18)
- subq r6,r2,r28 C 3rd main subtract
- ldq r4,0(r17)
- subq r28,r25,r22 C 3rd carry subtract
- ldq r5,8(r17)
- cmpult r6,r2,r8 C compute cy from last subtract
- cmpult r28,r25,r25 C compute cy from last subtract
- stq r20,0(r16)
- bis r8,r25,r25 C combine cy from the two subtracts
- stq r21,8(r16)
- subq r7,r3,r28 C 4th main subtract
- subq r28,r25,r23 C 4th carry subtract
- cmpult r7,r3,r8 C compute cy from last subtract
- cmpult r28,r25,r25 C compute cy from last subtract
- addq r17,32,r17 C update s1_ptr
- bis r8,r25,r25 C combine cy from the two subtracts
- addq r16,32,r16 C update res_ptr
- subq r4,r0,r28 C 1st main subtract
- ldq r2,16(r18)
- subq r28,r25,r20 C 1st carry subtract
- ldq r3,24(r18)
- cmpult r4,r0,r8 C compute cy from last subtract
- ldq r6,-16(r17)
- cmpult r28,r25,r25 C compute cy from last subtract
- ldq r7,-8(r17)
- bis r8,r25,r25 C combine cy from the two subtracts
- subq r19,4,r19 C decr loop cnt
- stq r22,-16(r16)
- subq r5,r1,r28 C 2nd main subtract
- stq r23,-8(r16)
- subq r28,r25,r21 C 2nd carry subtract
- addq r18,32,r18 C update s2_ptr
- cmpult r5,r1,r8 C compute cy from last subtract
- bge r19,$Loop
-C Finish software pipeline for 1st loop
-$Lend1: cmpult r28,r25,r25 C compute cy from last subtract
- bis r8,r25,r25 C combine cy from the two subtracts
- subq r6,r2,r28 C cy add
- subq r28,r25,r22 C 3rd main subtract
- cmpult r6,r2,r8 C compute cy from last subtract
- cmpult r28,r25,r25 C compute cy from last subtract
- stq r20,0(r16)
- bis r8,r25,r25 C combine cy from the two subtracts
- stq r21,8(r16)
- subq r7,r3,r28 C cy add
- subq r28,r25,r23 C 4th main subtract
- cmpult r7,r3,r8 C compute cy from last subtract
- cmpult r28,r25,r25 C compute cy from last subtract
- bis r8,r25,r25 C combine cy from the two subtracts
- addq r16,32,r16 C update res_ptr
- stq r22,-16(r16)
- stq r23,-8(r16)
-$Lend2: addq r19,4,r19 C restore loop cnt
- beq r19,$Lret
-C Start software pipeline for 2nd loop
- ldq r0,0(r18)
- ldq r4,0(r17)
+ ldq r3,0(r17)
+ ldq r4,0(r18)
+
subq r19,1,r19
- beq r19,$Lend0
-C 2nd loop handles remaining 1-3 limbs
- ALIGN(16)
-$Loop0: subq r4,r0,r28 C main subtract
- cmpult r4,r0,r8 C compute cy from last subtract
- ldq r0,8(r18)
- ldq r4,8(r17)
- subq r28,r25,r20 C carry subtract
- addq r18,8,r18
+ and r19,4-1,r2 C number of limbs in first loop
+ bis r31,r31,r0
+ beq r2,$L0 C if multiple of 4 limbs, skip first loop
+
+ subq r19,r2,r19
+
+$Loop0: subq r2,1,r2
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
addq r17,8,r17
- stq r20,0(r16)
- cmpult r28,r25,r25 C compute cy from last subtract
- subq r19,1,r19 C decr loop cnt
- bis r8,r25,r25 C combine cy from the two subtracts
+ addq r18,8,r18
+ bis r5,r5,r3
+ bis r6,r6,r4
addq r16,8,r16
- bne r19,$Loop0
-$Lend0: subq r4,r0,r28 C main subtract
- subq r28,r25,r20 C carry subtract
- cmpult r4,r0,r8 C compute cy from last subtract
- cmpult r28,r25,r25 C compute cy from last subtract
- stq r20,0(r16)
- bis r8,r25,r25 C combine cy from the two subtracts
-
-$Lret: bis r25,r31,r0 C return cy
+ bne r2,$Loop0
+
+$L0: beq r19,$Lend
+
+ ALIGN(8)
+$Loop: subq r19,4,r19
+
+ ldq r5,8(r17)
+ addq r4,r0,r4
+ ldq r6,8(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
+
+ ldq r3,16(r17)
+ addq r6,r0,r6
+ ldq r4,16(r18)
+ cmpult r6,r0,r1
+ subq r5,r6,r6
+ cmpult r5,r6,r0
+ stq r6,8(r16)
+ bis r0,r1,r0
+
+ ldq r5,24(r17)
+ addq r4,r0,r4
+ ldq r6,24(r18)
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,16(r16)
+ bis r0,r1,r0
+
+ ldq r3,32(r17)
+ addq r6,r0,r6
+ ldq r4,32(r18)
+ cmpult r6,r0,r1
+ subq r5,r6,r6
+ cmpult r5,r6,r0
+ stq r6,24(r16)
+ bis r0,r1,r0
+
+ addq r17,32,r17
+ addq r18,32,r18
+ addq r16,32,r16
+ bne r19,$Loop
+
+$Lend: addq r4,r0,r4
+ cmpult r4,r0,r1
+ subq r3,r4,r4
+ cmpult r3,r4,r0
+ stq r4,0(r16)
+ bis r0,r1,r0
ret r31,(r26),1
-EPILOGUE()
+EPILOGUE(mpn_sub_n)
ASM_END()
diff --git a/gmp/mpn/alpha/submul_1.asm b/gmp/mpn/alpha/submul_1.asm
index 2b63b52fa4..554ccf51b6 100644
--- a/gmp/mpn/alpha/submul_1.asm
+++ b/gmp/mpn/alpha/submul_1.asm
@@ -4,30 +4,19 @@ dnl the result from a second limb vector.
dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/umul.asm b/gmp/mpn/alpha/umul.asm
index 039081ed48..7fa3f008f1 100644
--- a/gmp/mpn/alpha/umul.asm
+++ b/gmp/mpn/alpha/umul.asm
@@ -3,30 +3,19 @@ dnl mpn_umul_ppmm -- 1x1->2 limb multiplication
dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
diff --git a/gmp/mpn/alpha/unicos.m4 b/gmp/mpn/alpha/unicos.m4
index e05cf5cca6..f1f41c18e4 100644
--- a/gmp/mpn/alpha/unicos.m4
+++ b/gmp/mpn/alpha/unicos.m4
@@ -3,33 +3,22 @@ divert(-1)
dnl m4 macros for alpha assembler on unicos.
-dnl Copyright 2000, 2002-2004, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
+dnl Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
dnl Note that none of the standard GMP_ASM_ autoconf tests are done for
@@ -86,9 +75,8 @@ m4_assert_numargs(1)
` .extern $1')
define(`DATASTART',
-m4_assert_numargs_range(1,2)
+m4_assert_numargs(1)
` .psect $1@crud,data
- ALIGN(ifelse($#,1,2,$2))
$1:')
define(`DATAEND',