diff options
Diffstat (limited to 'gmp/mpn/alpha')
55 files changed, 2127 insertions, 2806 deletions
diff --git a/gmp/mpn/alpha/README b/gmp/mpn/alpha/README index 09c2f04047..3578c53b85 100644 --- a/gmp/mpn/alpha/README +++ b/gmp/mpn/alpha/README @@ -1,30 +1,20 @@ -Copyright 1996, 1997, 1999-2005 Free Software Foundation, Inc. +Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software +Foundation, Inc. This file is part of the GNU MP Library. -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +The GNU MP Library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License along +with the GNU MP Library. If not, see http://www.gnu.org/licenses/. @@ -46,7 +36,7 @@ Cray T3 code is very very different... them to "$6" or "$f6" where necessary. "0x" introduces a hex constant in gas and DEC as, but on Unicos "^X" is -required. The X() macro accommodates this difference. +required. The X() macro accomodates this difference. "cvttqc" is required by DEC as, "cvttq/c" is required by Unicos, and gas will accept either. We use cvttqc and have an m4 define expand to cvttq/c where @@ -70,7 +60,7 @@ RELEVANT OPTIMIZATION ISSUES EV4 1. This chip has very limited store bandwidth. The on-chip L1 cache is write- - through, and a cache line is transferred from the store buffer to the off- + through, and a cache line is transfered from the store buffer to the off- chip L2 in as much 15 cycles on most systems. This delay hurts mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift. diff --git a/gmp/mpn/alpha/add_n.asm b/gmp/mpn/alpha/add_n.asm index bc572a57a9..77d4cad2ef 100644 --- a/gmp/mpn/alpha/add_n.asm +++ b/gmp/mpn/alpha/add_n.asm @@ -1,164 +1,117 @@ dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and dnl store sum in a third limb vector. -dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. +dnl Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C EV4: ? -C EV5: 4.75 -C EV6: 3 +C EV4: 7.75 +C EV5: 5.75 +C EV6: 4 -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl s2_ptr r18 -dnl size r19 +C INPUT PARAMETERS +C rp r16 +C up r17 +C vp r18 +C n r19 ASM_START() -PROLOGUE(mpn_add_nc) - bis r20,r31,r25 - br L(com) -EPILOGUE() PROLOGUE(mpn_add_n) - bis r31,r31,r25 C clear cy -L(com): subq r19,4,r19 C decr loop cnt - blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop -C Start software pipeline for 1st loop - ldq r0,0(r18) - ldq r4,0(r17) - ldq r1,8(r18) - ldq r5,8(r17) - addq r17,32,r17 C update s1_ptr - addq r0,r4,r28 C 1st main add - ldq r2,16(r18) - addq r25,r28,r20 C 1st carry add - ldq r3,24(r18) - cmpult r28,r4,r8 C compute cy from last add - ldq r6,-16(r17) - cmpult r20,r28,r25 C compute cy from last add - ldq r7,-8(r17) - bis r8,r25,r25 C combine cy from the two adds - subq r19,4,r19 C decr loop cnt - addq r1,r5,r28 C 2nd main add - addq r18,32,r18 C update s2_ptr - addq r28,r25,r21 C 2nd carry add - cmpult r28,r5,r8 C compute cy from last add - blt r19,$Lend1 C if less than 4 limbs remain, jump -C 1st loop handles groups of 4 limbs in a software pipeline - ALIGN(16) -$Loop: cmpult r21,r28,r25 C compute cy from last add - ldq r0,0(r18) - bis r8,r25,r25 C combine cy from the two adds - ldq r1,8(r18) - addq r2,r6,r28 C 3rd main add - ldq r4,0(r17) - addq r28,r25,r22 C 3rd carry add - ldq r5,8(r17) - cmpult r28,r6,r8 C compute cy from last add - cmpult r22,r28,r25 C compute cy from last add - stq r20,0(r16) - bis r8,r25,r25 C combine cy from the two adds - stq r21,8(r16) - addq r3,r7,r28 C 4th main add - addq r28,r25,r23 C 4th carry add - cmpult r28,r7,r8 C compute cy from last add - cmpult r23,r28,r25 C compute cy from last add - addq r17,32,r17 C update s1_ptr - bis r8,r25,r25 C combine cy from the two adds - addq r16,32,r16 C update res_ptr - addq r0,r4,r28 C 1st main add - ldq r2,16(r18) - addq r25,r28,r20 C 1st carry add - ldq r3,24(r18) - cmpult r28,r4,r8 C compute cy from last add - ldq r6,-16(r17) - cmpult r20,r28,r25 C compute cy from last add - ldq r7,-8(r17) - bis r8,r25,r25 C combine cy from the two adds - subq r19,4,r19 C decr loop cnt - stq r22,-16(r16) - addq r1,r5,r28 C 2nd main add - stq r23,-8(r16) - addq r25,r28,r21 C 2nd carry add - addq r18,32,r18 C update s2_ptr - cmpult r28,r5,r8 C compute cy from last add - bge r19,$Loop -C Finish software pipeline for 1st loop -$Lend1: cmpult r21,r28,r25 C compute cy from last add - bis r8,r25,r25 C combine cy from the two adds - addq r2,r6,r28 C 3rd main add - addq r28,r25,r22 C 3rd carry add - cmpult r28,r6,r8 C compute cy from last add - cmpult r22,r28,r25 C compute cy from last add - stq r20,0(r16) - bis r8,r25,r25 C combine cy from the two adds - stq r21,8(r16) - addq r3,r7,r28 C 4th main add - addq r28,r25,r23 C 4th carry add - cmpult r28,r7,r8 C compute cy from last add - cmpult r23,r28,r25 C compute cy from last add - bis r8,r25,r25 C combine cy from the two adds - addq r16,32,r16 C update res_ptr - stq r22,-16(r16) - stq r23,-8(r16) -$Lend2: addq r19,4,r19 C restore loop cnt - beq r19,$Lret -C Start software pipeline for 2nd loop - ldq r0,0(r18) - ldq r4,0(r17) + ldq r3,0(r17) + ldq r4,0(r18) + subq r19,1,r19 - beq r19,$Lend0 -C 2nd loop handles remaining 1-3 limbs - ALIGN(16) -$Loop0: addq r0,r4,r28 C main add - ldq r0,8(r18) - cmpult r28,r4,r8 C compute cy from last add - ldq r4,8(r17) - addq r28,r25,r20 C carry add - addq r18,8,r18 + and r19,4-1,r2 C number of limbs in first loop + bis r31,r31,r0 + beq r2,$L0 C if multiple of 4 limbs, skip first loop + + subq r19,r2,r19 + +$Loop0: subq r2,1,r2 + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,0(r16) + bis r0,r1,r0 + addq r17,8,r17 - stq r20,0(r16) - cmpult r20,r28,r25 C compute cy from last add - subq r19,1,r19 C decr loop cnt - bis r8,r25,r25 C combine cy from the two adds + addq r18,8,r18 + bis r5,r5,r3 + bis r6,r6,r4 addq r16,8,r16 - bne r19,$Loop0 -$Lend0: addq r0,r4,r28 C main add - addq r28,r25,r20 C carry add - cmpult r28,r4,r8 C compute cy from last add - cmpult r20,r28,r25 C compute cy from last add - stq r20,0(r16) - bis r8,r25,r25 C combine cy from the two adds - -$Lret: bis r25,r31,r0 C return cy + bne r2,$Loop0 + +$L0: beq r19,$Lend + + ALIGN(8) +$Loop: subq r19,4,r19 + + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,0(r16) + bis r0,r1,r0 + + ldq r3,16(r17) + addq r6,r0,r6 + ldq r4,16(r18) + cmpult r6,r0,r1 + addq r5,r6,r6 + cmpult r6,r5,r0 + stq r6,8(r16) + bis r0,r1,r0 + + ldq r5,24(r17) + addq r4,r0,r4 + ldq r6,24(r18) + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,16(r16) + bis r0,r1,r0 + + ldq r3,32(r17) + addq r6,r0,r6 + ldq r4,32(r18) + cmpult r6,r0,r1 + addq r5,r6,r6 + cmpult r6,r5,r0 + stq r6,24(r16) + bis r0,r1,r0 + + addq r17,32,r17 + addq r18,32,r18 + addq r16,32,r16 + bne r19,$Loop + +$Lend: addq r4,r0,r4 + cmpult r4,r0,r1 + addq r3,r4,r4 + cmpult r4,r3,r0 + stq r4,0(r16) + bis r0,r1,r0 ret r31,(r26),1 -EPILOGUE() +EPILOGUE(mpn_add_n) ASM_END() diff --git a/gmp/mpn/alpha/addmul_1.asm b/gmp/mpn/alpha/addmul_1.asm index c4e6834b61..22c41a5c74 100644 --- a/gmp/mpn/alpha/addmul_1.asm +++ b/gmp/mpn/alpha/addmul_1.asm @@ -4,30 +4,19 @@ dnl result to a second limb vector. dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/alpha-defs.m4 b/gmp/mpn/alpha/alpha-defs.m4 index af34c9294c..b2f9a242a8 100644 --- a/gmp/mpn/alpha/alpha-defs.m4 +++ b/gmp/mpn/alpha/alpha-defs.m4 @@ -3,32 +3,21 @@ divert(-1) dnl m4 macros for Alpha assembler. dnl Copyright 2003, 2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl Usage: ASSERT([reg] [,code]) diff --git a/gmp/mpn/alpha/aorslsh1_n.asm b/gmp/mpn/alpha/aorslsh1_n.asm index 9525e669db..3694f78761 100644 --- a/gmp/mpn/alpha/aorslsh1_n.asm +++ b/gmp/mpn/alpha/aorslsh1_n.asm @@ -1,40 +1,36 @@ dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). -dnl Copyright 2003, 2013 Free Software Foundation, Inc. +dnl Copyright 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C EV4: ? +C EV4: 12.5 C EV5: 6.25 -C EV6: 4.5 +C EV6: 4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875) +C TODO +C * Write special version for ev6, as this is a slowdown for 100 < n < 2200 +C compared to separate mpn_lshift and mpn_add_n. +C * Use addq instead of sll for left shift, and similarly cmplt instead of srl +C for right shift. + +dnl INPUT PARAMETERS define(`rp',`r16') define(`up',`r17') define(`vp',`r18') @@ -42,8 +38,12 @@ define(`n', `r19') define(`u0', `r8') define(`u1', `r1') +define(`u2', `r2') +define(`u3', `r3') define(`v0', `r4') define(`v1', `r5') +define(`v2', `r6') +define(`v3', `r7') define(`cy0', `r0') define(`cy1', `r20') @@ -67,98 +67,168 @@ MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) ASM_START() PROLOGUE(func) - and n, 2, cy0 - blbs n, L(bx1) -L(bx0): ldq v1, 0(vp) + lda n, -4(n) + bis r31, r31, cy1 + and n, 3, r1 + beq r1, $Lb00 + cmpeq r1, 1, r2 + bne r2, $Lb01 + cmpeq r1, 2, r2 + bne r2, $Lb10 +$Lb11: C n = 3, 7, 11, ... + ldq v0, 0(vp) + ldq u0, 0(up) + ldq v1, 8(vp) + ldq u1, 8(up) + ldq v2, 16(vp) + ldq u2, 16(up) + lda vp, 24(vp) + lda up, 24(up) + bge n, $Loop + br r31, $Lcj3 +$Lb10: C n = 2, 6, 10, ... + bis r31, r31, cy0 + ldq v1, 0(vp) ldq u1, 0(up) - nop - bne cy0, L(b10) - -L(b00): lda vp, 48(vp) - lda up, -16(up) + ldq v2, 8(vp) + ldq u2, 8(up) lda rp, -8(rp) - br r31, L(lo0) - -L(b10): lda vp, 32(vp) + blt n, $Lcj2 + ldq v3, 16(vp) + ldq u3, 16(up) + lda vp, 48(vp) + lda up, 16(up) + br r31, $LL10 +$Lb01: C n = 1, 5, 9, ... + ldq v2, 0(vp) + ldq u2, 0(up) + lda rp, -16(rp) + blt n, $Lcj1 + ldq v3, 8(vp) + ldq u3, 8(up) + ldq v0, 16(vp) + ldq u0, 16(up) + lda vp, 40(vp) + lda up, 8(up) + lda rp, 32(rp) + br r31, $LL01 +$Lb00: C n = 4, 8, 12, ... + bis r31, r31, cy0 + ldq v3, 0(vp) + ldq u3, 0(up) + ldq v0, 8(vp) + ldq u0, 8(up) + ldq v1, 16(vp) + ldq u1, 16(up) + lda vp, 32(vp) lda rp, 8(rp) - lda cy0, 0(r31) - br r31, L(lo2) - -L(bx1): ldq v0, 0(vp) - ldq u0, 0(up) - lda cy1, 0(r31) - beq cy0, L(b01) - -L(b11): lda vp, 40(vp) - lda up, -24(up) - lda rp, 16(rp) - br r31, L(lo3) - -L(b01): lda n, -4(n) - ble n, L(end) - lda vp, 24(vp) - lda up, -8(up) - + br r31, $LL00x ALIGN(16) -L(top): addq v0, v0, sl C left shift vlimb - ldq v1, -16(vp) +C 0 +$Loop: sll v0, 1, sl C left shift vlimb + ldq v3, 0(vp) +C 1 ADDSUB u0, sl, ps C ulimb + (vlimb << 1) - cmplt v0, r31, cy0 C carry out #1 - ldq u1, 16(up) + ldq u3, 0(up) +C 2 ADDSUB ps, cy1, rr C consume carry from previous operation + srl v0, 63, cy0 C carry out #1 +C 3 CARRY( ps, u0, cy) C carry out #2 stq rr, 0(rp) +C 4 addq cy, cy0, cy0 C combine carry out #1 and #2 CARRY( rr, ps, cy) C carry out #3 +C 5 addq cy, cy0, cy0 C final carry out lda vp, 32(vp) C bookkeeping -L(lo0): addq v1, v1, sl - ldq v0, -40(vp) +C 6 +$LL10: sll v1, 1, sl + ldq v0, -24(vp) +C 7 ADDSUB u1, sl, ps - cmplt v1, r31, cy1 - ldq u0, 24(up) + ldq u0, 8(up) +C 8 ADDSUB ps, cy0, rr + srl v1, 63, cy1 +C 9 CARRY( ps, u1, cy) stq rr, 8(rp) +C 10 addq cy, cy1, cy1 CARRY( rr, ps, cy) +C 11 addq cy, cy1, cy1 lda rp, 32(rp) C bookkeeping -L(lo3): addq v0, v0, sl - ldq v1, -32(vp) - ADDSUB u0, sl, ps - cmplt v0, r31, cy0 - ldq u1, 32(up) +C 12 +$LL01: sll v2, 1, sl + ldq v1, -16(vp) +C 13 + ADDSUB u2, sl, ps + ldq u1, 16(up) +C 14 ADDSUB ps, cy1, rr - CARRY( ps, u0, cy) + srl v2, 63, cy0 +C 15 + CARRY( ps, u2, cy) stq rr, -16(rp) +C 16 addq cy, cy0, cy0 CARRY( rr, ps, cy) +C 17 addq cy, cy0, cy0 - lda up, 32(up) C bookkeeping -L(lo2): addq v1, v1, sl - ldq v0, -24(vp) - ADDSUB u1, sl, ps - cmplt v1, r31, cy1 - ldq u0, 8(up) +$LL00x: lda up, 32(up) C bookkeeping +C 18 + sll v3, 1, sl + ldq v2, -8(vp) +C 19 + ADDSUB u3, sl, ps + ldq u2, -8(up) +C 20 ADDSUB ps, cy0, rr - CARRY( ps, u1, cy) + srl v3, 63, cy1 +C 21 + CARRY( ps, u3, cy) stq rr, -8(rp) +C 22 addq cy, cy1, cy1 CARRY( rr, ps, cy) +C 23 addq cy, cy1, cy1 lda n, -4(n) C bookkeeping - bgt n, L(top) +C 24 + bge n, $Loop -L(end): addq v0, v0, sl +$Lcj3: sll v0, 1, sl ADDSUB u0, sl, ps ADDSUB ps, cy1, rr - cmplt v0, r31, cy0 + srl v0, 63, cy0 CARRY( ps, u0, cy) stq rr, 0(rp) addq cy, cy0, cy0 CARRY( rr, ps, cy) - addq cy, cy0, r0 + addq cy, cy0, cy0 + +$Lcj2: sll v1, 1, sl + ADDSUB u1, sl, ps + ADDSUB ps, cy0, rr + srl v1, 63, cy1 + CARRY( ps, u1, cy) + stq rr, 8(rp) + addq cy, cy1, cy1 + CARRY( rr, ps, cy) + addq cy, cy1, cy1 + +$Lcj1: sll v2, 1, sl + ADDSUB u2, sl, ps + ADDSUB ps, cy1, rr + srl v2, 63, cy0 + CARRY( ps, u2, cy) + stq rr, 16(rp) + addq cy, cy0, cy0 + CARRY( rr, ps, cy) + addq cy, cy0, cy0 + ret r31,(r26),1 EPILOGUE() ASM_END() diff --git a/gmp/mpn/alpha/aorslsh2_n.asm b/gmp/mpn/alpha/aorslsh2_n.asm deleted file mode 100644 index bdee1d6d02..0000000000 --- a/gmp/mpn/alpha/aorslsh2_n.asm +++ /dev/null @@ -1,167 +0,0 @@ -dnl Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2). - -dnl Copyright 2003, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C EV4: ? -C EV5: 6 -C EV6: 3.75 - -C TODO -C * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5. - -define(`rp',`r16') -define(`up',`r17') -define(`vp',`r18') -define(`n', `r19') - -define(`u0', `r8') -define(`u1', `r1') -define(`v0', `r4') -define(`v1', `r5') - -define(`cy0', `r0') -define(`cy1', `r20') -define(`cy', `r22') -define(`rr', `r24') -define(`ps', `r25') -define(`sl', `r28') - -ifdef(`OPERATION_addlsh2_n',` - define(ADDSUB, addq) - define(CARRY, `cmpult $1,$2,$3') - define(func, mpn_addlsh2_n) -') -ifdef(`OPERATION_sublsh2_n',` - define(ADDSUB, subq) - define(CARRY, `cmpult $2,$1,$3') - define(func, mpn_sublsh2_n) -') - -MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n) - -ASM_START() -PROLOGUE(func) - and n, 2, cy0 - blbs n, L(bx1) -L(bx0): ldq v1, 0(vp) - ldq u1, 0(up) - bis r31, r31, r2 - bne cy0, L(b10) - -L(b00): lda vp, 48(vp) - lda up, -16(up) - lda rp, -8(rp) - s4addq v1, r31, sl - br r31, L(lo0) - -L(b10): lda vp, 32(vp) - lda rp, 8(rp) - lda cy0, 0(r31) - br r31, L(lo2) - -L(bx1): ldq v0, 0(vp) - ldq u0, 0(up) - lda cy1, 0(r31) - bis r31, r31, r3 - nop - beq cy0, L(b01) - -L(b11): lda vp, 40(vp) - lda up, -24(up) - lda rp, 16(rp) - br r31, L(lo3) - -L(b01): lda n, -4(n) - ble n, L(end) - lda vp, 24(vp) - lda up, -8(up) - - ALIGN(16) -L(top): s4addq v0, r3, sl C combined vlimb - ldq v1, -16(vp) - ADDSUB u0, sl, ps C ulimb + (vlimb << 1) - ldq u1, 16(up) - srl v0, 62, r2 C high v bits - ADDSUB ps, cy1, rr C consume carry from previous operation - CARRY( ps, u0, cy0) C carry out #2 - stq rr, 0(rp) - CARRY( rr, ps, cy) C carry out #3 - lda vp, 32(vp) C bookkeeping - addq cy, cy0, cy0 C final carry out - s4addq v1, r2, sl -L(lo0): ldq v0, -40(vp) - ADDSUB u1, sl, ps - ldq u0, 24(up) - srl v1, 62, r3 - ADDSUB ps, cy0, rr - CARRY( ps, u1, cy1) - stq rr, 8(rp) - CARRY( rr, ps, cy) - lda rp, 32(rp) C bookkeeping - addq cy, cy1, cy1 -L(lo3): s4addq v0, r3, sl - ldq v1, -32(vp) - ADDSUB u0, sl, ps - ldq u1, 32(up) - srl v0, 62, r2 - ADDSUB ps, cy1, rr - CARRY( ps, u0, cy0) - stq rr, -16(rp) - CARRY( rr, ps, cy) - lda up, 32(up) C bookkeeping - addq cy, cy0, cy0 -L(lo2): s4addq v1, r2, sl - ldq v0, -24(vp) - ADDSUB u1, sl, ps - ldq u0, 8(up) - srl v1, 62, r3 - ADDSUB ps, cy0, rr - CARRY( ps, u1, cy1) - stq rr, -8(rp) - CARRY( rr, ps, cy) - lda n, -4(n) C bookkeeping - addq cy, cy1, cy1 - bgt n, L(top) - -L(end): s4addq v0, r3, sl - ADDSUB u0, sl, ps - srl v0, 62, r2 - ADDSUB ps, cy1, rr - CARRY( ps, u0, cy0) - stq rr, 0(rp) - CARRY( rr, ps, cy) - addq cy, cy0, cy0 - addq cy0, r2, r0 - - ret r31,(r26),1 -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/alpha/bdiv_dbm1c.asm b/gmp/mpn/alpha/bdiv_dbm1c.asm index 472966ca98..e5f11dbf48 100644 --- a/gmp/mpn/alpha/bdiv_dbm1c.asm +++ b/gmp/mpn/alpha/bdiv_dbm1c.asm @@ -3,30 +3,19 @@ dnl Alpha mpn_bdiv_dbm1c. dnl Copyright 2008 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/cntlz.asm b/gmp/mpn/alpha/cntlz.asm index 25af19b131..2bfd923e5e 100644 --- a/gmp/mpn/alpha/cntlz.asm +++ b/gmp/mpn/alpha/cntlz.asm @@ -3,30 +3,19 @@ dnl Alpha auxiliary for longlong.h's count_leading_zeros dnl Copyright 1997, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/copyd.asm b/gmp/mpn/alpha/copyd.asm index b41b5366cc..ba8fa1c633 100644 --- a/gmp/mpn/alpha/copyd.asm +++ b/gmp/mpn/alpha/copyd.asm @@ -3,30 +3,19 @@ dnl Alpha mpn_copyd -- copy, decrementing. dnl Copyright 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/copyi.asm b/gmp/mpn/alpha/copyi.asm index f7e2ad6f6a..425804127e 100644 --- a/gmp/mpn/alpha/copyi.asm +++ b/gmp/mpn/alpha/copyi.asm @@ -3,30 +3,19 @@ dnl Alpha mpn_copyi -- copy, incrementing. dnl Copyright 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/default.m4 b/gmp/mpn/alpha/default.m4 index 8fe7c4e122..e7aae2eeea 100644 --- a/gmp/mpn/alpha/default.m4 +++ b/gmp/mpn/alpha/default.m4 @@ -3,33 +3,22 @@ divert(-1) dnl m4 macros for alpha assembler (everywhere except unicos). -dnl Copyright 2000, 2002-2004, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl Usage: ASM_START() @@ -64,9 +53,8 @@ ifelse(`$2',noalign,,` ALIGN(16)') .globl $1 .ent $1 $1: - .frame r30,0,r26,0 -ifelse(`$2',gp,` ldgp r29, 0(r27) -`$'$1..ng:') +ifelse(`$2',gp,` ldgp r29,0(r27)') + .frame r30,0,r26 .prologue ifelse(`$2',gp,1,0)') define(`EPILOGUE_cpu', @@ -102,13 +90,12 @@ forloop(i,0,31,`defreg(`r'i,$i)') forloop(i,0,31,`deflit(`f'i,``$f''i)') -dnl Usage: DATASTART(name,align) or DATASTART(name) +dnl Usage: DATASTART(name) dnl DATAEND() define(`DATASTART', -m4_assert_numargs_range(1,2) -` RODATA - ALIGN(ifelse($#,1,2,$2)) +m4_assert_numargs(1) +` DATA $1:') define(`DATAEND', m4_assert_numargs(0) @@ -117,7 +104,7 @@ m4_assert_numargs(0) dnl Load a symbolic address into a register define(`LEA', m4_assert_numargs(2) -`lda $1, $2') +`lda $1, $2') dnl Usage: ASM_END() define(`ASM_END', diff --git a/gmp/mpn/alpha/dive_1.c b/gmp/mpn/alpha/dive_1.c index 88b82db2f7..a915c58a9e 100644 --- a/gmp/mpn/alpha/dive_1.c +++ b/gmp/mpn/alpha/dive_1.c @@ -4,33 +4,22 @@ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES. -Copyright 2000-2003 Free Software Foundation, Inc. +Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "gmp.h" #include "gmp-impl.h" diff --git a/gmp/mpn/alpha/ev5/diveby3.asm b/gmp/mpn/alpha/diveby3.asm index 3758188e02..e2d1c6beee 100644 --- a/gmp/mpn/alpha/ev5/diveby3.asm +++ b/gmp/mpn/alpha/diveby3.asm @@ -1,42 +1,32 @@ dnl Alpha mpn_divexact_by3c -- mpn division by 3, expecting no remainder. -dnl Copyright 2004, 2005, 2009 Free Software Foundation, Inc. +dnl Copyright 2004, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb C EV4: 22 C EV5: 11.5 -C EV6: 6.3 Note that mpn_bdiv_dbm1c is faster +C EV6: 6.3 C TODO -C * Remove the unops, they benefit just ev6, which no longer uses this file. +C * Trim this to 6.0 c/l for ev6. +C * Write special ev5 version, should reach 9 c/l, and could be smaller. C * Try prefetch for destination, using lds. C * Improve feed-in code, by moving initial mulq earlier; make initial load C to u0/u0 to save some copying. @@ -50,7 +40,7 @@ define(`cy', `r19') ASM_START() -DATASTART(L(LC),8) +DATASTART(L(LC)) .quad 0xAAAAAAAAAAAAAAAB .quad 0x5555555555555555 .quad 0xAAAAAAAAAAAAAAAA diff --git a/gmp/mpn/alpha/divrem_2.asm b/gmp/mpn/alpha/divrem_2.asm index 046b246a95..b68468bca0 100644 --- a/gmp/mpn/alpha/divrem_2.asm +++ b/gmp/mpn/alpha/divrem_2.asm @@ -1,32 +1,21 @@ dnl Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. -dnl Copyright 2007, 2008, 2013 Free Software Foundation, Inc. +dnl Copyright 2007, 2008 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -52,7 +41,8 @@ define(`un_param', `r19') define(`dp', `r20') ASM_START() -PROLOGUE(mpn_divrem_2,gp) +PROLOGUE(mpn_divrem_2) + ldgp r29, 0(r27) lda r30, -80(r30) stq r26, 0(r30) stq r9, 8(r30) @@ -90,7 +80,7 @@ L(L8): stq r3, 72(r30) blt r19, L(L10) bis r31, r12, r16 jsr r26, mpn_invert_limb - LDGP( r29, 0(r26)) + ldgp r29, 0(r26) mulq r0, r12, r4 C t0 = LO(di * d1) umulh r0, r10, r2 C s1 = HI(di * d0) addq r4, r10, r4 C t0 += d0 diff --git a/gmp/mpn/alpha/ev5/add_n.asm b/gmp/mpn/alpha/ev5/add_n.asm new file mode 100644 index 0000000000..626e713ccb --- /dev/null +++ b/gmp/mpn/alpha/ev5/add_n.asm @@ -0,0 +1,146 @@ +dnl Alpha EV5 mpn_add_n -- Add two limb vectors of the same length > 0 and +dnl store sum in a third limb vector. + +dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: ? +C EV5: 4.75 +C EV6: 3 + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_add_n) + bis r31,r31,r25 C clear cy + subq r19,4,r19 C decr loop cnt + blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop +C Start software pipeline for 1st loop + ldq r0,0(r18) + ldq r4,0(r17) + ldq r1,8(r18) + ldq r5,8(r17) + addq r17,32,r17 C update s1_ptr + ldq r2,16(r18) + addq r0,r4,r20 C 1st main add + ldq r3,24(r18) + subq r19,4,r19 C decr loop cnt + ldq r6,-16(r17) + cmpult r20,r0,r25 C compute cy from last add + ldq r7,-8(r17) + addq r1,r5,r28 C 2nd main add + addq r18,32,r18 C update s2_ptr + addq r28,r25,r21 C 2nd carry add + cmpult r28,r5,r8 C compute cy from last add + blt r19,$Lend1 C if less than 4 limbs remain, jump +C 1st loop handles groups of 4 limbs in a software pipeline + ALIGN(16) +$Loop: cmpult r21,r28,r25 C compute cy from last add + ldq r0,0(r18) + bis r8,r25,r25 C combine cy from the two adds + ldq r1,8(r18) + addq r2,r6,r28 C 3rd main add + ldq r4,0(r17) + addq r28,r25,r22 C 3rd carry add + ldq r5,8(r17) + cmpult r28,r6,r8 C compute cy from last add + cmpult r22,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + stq r21,8(r16) + addq r3,r7,r28 C 4th main add + addq r28,r25,r23 C 4th carry add + cmpult r28,r7,r8 C compute cy from last add + cmpult r23,r28,r25 C compute cy from last add + addq r17,32,r17 C update s1_ptr + bis r8,r25,r25 C combine cy from the two adds + addq r16,32,r16 C update res_ptr + addq r0,r4,r28 C 1st main add + ldq r2,16(r18) + addq r25,r28,r20 C 1st carry add + ldq r3,24(r18) + cmpult r28,r4,r8 C compute cy from last add + ldq r6,-16(r17) + cmpult r20,r28,r25 C compute cy from last add + ldq r7,-8(r17) + bis r8,r25,r25 C combine cy from the two adds + subq r19,4,r19 C decr loop cnt + stq r22,-16(r16) + addq r1,r5,r28 C 2nd main add + stq r23,-8(r16) + addq r25,r28,r21 C 2nd carry add + addq r18,32,r18 C update s2_ptr + cmpult r28,r5,r8 C compute cy from last add + bge r19,$Loop +C Finish software pipeline for 1st loop +$Lend1: cmpult r21,r28,r25 C compute cy from last add + bis r8,r25,r25 C combine cy from the two adds + addq r2,r6,r28 C 3rd main add + addq r28,r25,r22 C 3rd carry add + cmpult r28,r6,r8 C compute cy from last add + cmpult r22,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + stq r21,8(r16) + addq r3,r7,r28 C 4th main add + addq r28,r25,r23 C 4th carry add + cmpult r28,r7,r8 C compute cy from last add + cmpult r23,r28,r25 C compute cy from last add + bis r8,r25,r25 C combine cy from the two adds + addq r16,32,r16 C update res_ptr + stq r22,-16(r16) + stq r23,-8(r16) +$Lend2: addq r19,4,r19 C restore loop cnt + beq r19,$Lret +C Start software pipeline for 2nd loop + ldq r0,0(r18) + ldq r4,0(r17) + subq r19,1,r19 + beq r19,$Lend0 +C 2nd loop handles remaining 1-3 limbs + ALIGN(16) +$Loop0: addq r0,r4,r28 C main add + ldq r0,8(r18) + cmpult r28,r4,r8 C compute cy from last add + ldq r4,8(r17) + addq r28,r25,r20 C carry add + addq r18,8,r18 + addq r17,8,r17 + stq r20,0(r16) + cmpult r20,r28,r25 C compute cy from last add + subq r19,1,r19 C decr loop cnt + bis r8,r25,r25 C combine cy from the two adds + addq r16,8,r16 + bne r19,$Loop0 +$Lend0: addq r0,r4,r28 C main add + addq r28,r25,r20 C carry add + cmpult r28,r4,r8 C compute cy from last add + cmpult r20,r28,r25 C compute cy from last add + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two adds + +$Lret: bis r25,r31,r0 C return cy + ret r31,(r26),1 +EPILOGUE(mpn_add_n) +ASM_END() diff --git a/gmp/mpn/alpha/com.asm b/gmp/mpn/alpha/ev5/com_n.asm index f084ab5e96..979e711eb8 100644 --- a/gmp/mpn/alpha/com.asm +++ b/gmp/mpn/alpha/ev5/com_n.asm @@ -1,32 +1,21 @@ -dnl Alpha mpn_com -- mpn one's complement. +dnl Alpha EV5 mpn_com_n -- mpn one's complement. dnl Copyright 2003 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -37,7 +26,7 @@ C EV5: 2.0 C EV6: 1.5 -C mp_limb_t mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size); +C mp_limb_t mpn_com_n (mp_ptr dst, mp_srcptr src, mp_size_t size); C C For ev5 the main loop is 7 cycles plus 1 taken branch bubble, for a total C 2.0 c/l. In general, a pattern like this unrolled to N limbs per loop @@ -71,7 +60,7 @@ FLOAT64(L(dat), 2.0) ALIGN(16) -PROLOGUE(mpn_com,gp) +PROLOGUE(mpn_com_n,gp) C r16 dst C r17 src diff --git a/gmp/mpn/alpha/ev5/gmp-mparam.h b/gmp/mpn/alpha/ev5/gmp-mparam.h index b560c20afe..cbedd4f173 100644 --- a/gmp/mpn/alpha/ev5/gmp-mparam.h +++ b/gmp/mpn/alpha/ev5/gmp-mparam.h @@ -1,187 +1,81 @@ /* Alpha EV5 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2008-2010, 2014 Free -Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009 +Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 -#define GMP_LIMB_BITS 64 -#define GMP_LIMB_BYTES 8 /* 600 MHz 21164A */ -/* FFT tuning limit = 5000000 */ -/* Generated by tuneup.c, 2014-03-12, gcc 4.5 */ - -#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */ -#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1_1P_METHOD 2 -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 3 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 6 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 22 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */ -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15 -#define USE_PREINV_DIVREM_1 1 /* preinv always */ -#define DIV_QR_1N_PI1_METHOD 1 -#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always */ -#define BMOD_1_TO_MOD_1_THRESHOLD 76 - -#define MUL_TOOM22_THRESHOLD 14 -#define MUL_TOOM33_THRESHOLD 50 -#define MUL_TOOM44_THRESHOLD 118 -#define MUL_TOOM6H_THRESHOLD 157 -#define MUL_TOOM8H_THRESHOLD 236 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 77 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 70 - -#define SQR_BASECASE_THRESHOLD 0 /* always */ -#define SQR_TOOM2_THRESHOLD 22 -#define SQR_TOOM3_THRESHOLD 73 -#define SQR_TOOM4_THRESHOLD 178 -#define SQR_TOOM6_THRESHOLD 0 /* always */ -#define SQR_TOOM8_THRESHOLD 260 - -#define MULMID_TOOM42_THRESHOLD 18 - -#define MULMOD_BNM1_THRESHOLD 9 -#define SQRMOD_BNM1_THRESHOLD 12 - -#define MUL_FFT_MODF_THRESHOLD 284 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 284, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \ - { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \ - { 15, 7}, { 8, 6}, { 17, 7}, { 13, 8}, \ - { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \ - { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \ - { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \ - { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \ - { 23, 8}, { 47,10}, { 15, 9}, { 39,10}, \ - { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \ - { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \ - { 95,10}, { 55,11}, { 31,10}, { 63, 8}, \ - { 255, 7}, { 511,10}, { 71, 9}, { 143, 8}, \ - { 287, 7}, { 575, 9}, { 159, 8}, { 319,11}, \ - { 47,12}, { 31,11}, { 63, 9}, { 255, 8}, \ - { 511,10}, { 143, 9}, { 287,11}, { 79,10}, \ - { 159, 9}, { 319,10}, { 175, 9}, { 351, 8}, \ - { 703,10}, { 191, 9}, { 383,10}, { 207, 9}, \ - { 415,12}, { 63,10}, { 255,11}, { 143,10}, \ - { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \ - { 639,11}, { 175,12}, { 95,11}, { 191,10}, \ - { 383,11}, { 207,10}, { 415,11}, { 223,13}, \ - { 63,11}, { 287,10}, { 575,12}, { 159,11}, \ - { 319,10}, { 639,11}, { 351,12}, { 191,11}, \ - { 415,12}, { 223,11}, { 447,10}, { 895,11}, \ - { 479,12}, { 287,11}, { 575,12}, { 351,13}, \ - { 191,12}, { 479,13}, { 255,12}, { 575,13}, \ - { 319,12}, { 703,13}, { 383,12}, { 831,13}, \ - { 447,14}, { 255,13}, { 8192,14}, { 16384,15}, \ - { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ - { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ - {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 121 -#define MUL_FFT_THRESHOLD 4224 - -#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 240, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \ - { 14, 5}, { 29, 7}, { 9, 6}, { 19, 7}, \ - { 13, 6}, { 27, 8}, { 7, 7}, { 21, 8}, \ - { 11, 7}, { 29, 8}, { 19, 9}, { 11, 8}, \ - { 27,10}, { 7, 9}, { 15, 8}, { 33, 9}, \ - { 19, 8}, { 39, 9}, { 23, 8}, { 47,10}, \ - { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \ - { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \ - { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \ - { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \ - { 287,10}, { 79,11}, { 47,12}, { 31,11}, \ - { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \ - { 287,11}, { 79,10}, { 159, 9}, { 319,10}, \ - { 175,11}, { 95,10}, { 191, 9}, { 383,10}, \ - { 207, 9}, { 415,11}, { 111,10}, { 223,12}, \ - { 63,11}, { 175,12}, { 95,11}, { 207,13}, \ - { 63,12}, { 127,11}, { 287,12}, { 159,11}, \ - { 351,12}, { 191,11}, { 415,12}, { 223,11}, \ - { 447,13}, { 127,12}, { 351,13}, { 191,12}, \ - { 383,11}, { 767,12}, { 415,11}, { 831,12}, \ - { 447,14}, { 127,13}, { 255,12}, { 511,11}, \ - { 1087,12}, { 575,13}, { 319,12}, { 703,13}, \ - { 383,12}, { 831,13}, { 447,14}, { 255,13}, \ - { 511,12}, { 1023,13}, { 8192,14}, { 16384,15}, \ - { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \ - { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \ - {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 105 -#define SQR_FFT_THRESHOLD 3968 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 45 -#define MULLO_MUL_N_THRESHOLD 8397 - -#define DC_DIV_QR_THRESHOLD 47 -#define DC_DIVAPPR_Q_THRESHOLD 168 -#define DC_BDIV_QR_THRESHOLD 47 -#define DC_BDIV_Q_THRESHOLD 110 - -#define INV_MULMOD_BNM1_THRESHOLD 26 -#define INV_NEWTON_THRESHOLD 189 -#define INV_APPR_THRESHOLD 181 - -#define BINV_NEWTON_THRESHOLD 196 -#define REDC_1_TO_REDC_N_THRESHOLD 51 - -#define MU_DIV_QR_THRESHOLD 1558 -#define MU_DIVAPPR_Q_THRESHOLD 1558 -#define MUPI_DIV_QR_THRESHOLD 90 -#define MU_BDIV_QR_THRESHOLD 855 -#define MU_BDIV_Q_THRESHOLD 1078 - -#define POWM_SEC_TABLE 1,16,90,452,1221 - -#define MATRIX22_STRASSEN_THRESHOLD 11 -#define HGCD_THRESHOLD 99 -#define HGCD_APPR_THRESHOLD 103 -#define HGCD_REDUCE_THRESHOLD 2899 -#define GCD_DC_THRESHOLD 283 -#define GCDEXT_DC_THRESHOLD 201 -#define JACOBI_BASE_METHOD 3 - -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 28 -#define SET_STR_DC_THRESHOLD 426 -#define SET_STR_PRECOMPUTE_THRESHOLD 1505 - -#define FAC_DSC_THRESHOLD 1404 -#define FAC_ODD_THRESHOLD 0 /* always */ + +/* Generated by tuneup.c, 2009-01-15, gcc 3.4 */ + +#define MUL_KARATSUBA_THRESHOLD 14 +#define MUL_TOOM3_THRESHOLD 74 +#define MUL_TOOM44_THRESHOLD 118 + +#define SQR_BASECASE_THRESHOLD 4 +#define SQR_KARATSUBA_THRESHOLD 28 +#define SQR_TOOM3_THRESHOLD 77 +#define SQR_TOOM4_THRESHOLD 136 + +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 44 +#define MULLOW_MUL_N_THRESHOLD 246 + +#define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */ +#define DIV_DC_THRESHOLD 53 +#define POWM_THRESHOLD 85 + +#define MATRIX22_STRASSEN_THRESHOLD 17 +#define HGCD_THRESHOLD 104 +#define GCD_DC_THRESHOLD 321 +#define GCDEXT_DC_THRESHOLD 298 +#define JACOBI_BASE_METHOD 3 + +#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */ +#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_1_THRESHOLD 13 +#define MOD_1_2_THRESHOLD 14 +#define MOD_1_4_THRESHOLD 16 +#define USE_PREINV_DIVREM_1 1 /* preinv always */ +#define USE_PREINV_MOD_1 1 /* preinv always */ +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 20 +#define GET_STR_PRECOMPUTE_THRESHOLD 32 +#define SET_STR_DC_THRESHOLD 532 +#define SET_STR_PRECOMPUTE_THRESHOLD 1501 + +#define MUL_FFT_TABLE { 240, 480, 1344, 1792, 5120, 20480, 81920, 196608, 0 } +#define MUL_FFT_MODF_THRESHOLD 240 +#define MUL_FFT_THRESHOLD 1920 + +#define SQR_FFT_TABLE { 240, 480, 1216, 1792, 5120, 12288, 81920, 196608, 0 } +#define SQR_FFT_MODF_THRESHOLD 208 +#define SQR_FFT_THRESHOLD 1408 + +/* These tables need to be updated. */ + +#define MUL_FFT_TABLE2 {{1, 4}, {177, 5}, {193, 4}, {209, 5}, {353, 6}, {385, 5}, {417, 6}, {833, 7}, {897, 6}, {961, 7}, {1025, 6}, {1089, 7}, {1665, 8}, {1793, 7}, {2177, 8}, {2305, 7}, {2433, 8}, {2817, 7}, {2945, 8}, {3329, 9}, {3457, 8}, {4865, 9}, {5633, 8}, {6401, 10}, {7169, 9}, {11777, 10}, {12801, 9}, {13825, 10}, {15361, 9}, {19969, 10}, {23553, 9}, {24065, 11}, {30721, 10}, {48129, 11}, {63489, 10}, {72705, 11}, {96257, 12}, {126977, 11}, {194561, 12}, {258049, 11}, {325633, 12}, {389121, 13}, {516097, 12}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFT_TABLE2 {{1, 4}, {177, 5}, {193, 4}, {209, 5}, {353, 6}, {385, 5}, {417, 6}, {961, 7}, {1025, 6}, {1089, 7}, {1153, 6}, {1217, 7}, {1665, 8}, {1793, 7}, {2177, 8}, {2305, 7}, {2561, 8}, {2817, 7}, {2945, 8}, {3329, 9}, {3585, 8}, {5377, 9}, {5633, 8}, {6401, 9}, {6657, 10}, {6913, 9}, {11777, 10}, {13313, 9}, {13825, 10}, {15361, 9}, {18945, 10}, {19457, 9}, {19969, 10}, {23553, 9}, {24065, 11}, {30721, 10}, {48129, 11}, {53249, 10}, {56321, 11}, {63489, 10}, {72705, 11}, {73729, 10}, {79873, 11}, {96257, 12}, {126977, 11}, {194561, 12}, {258049, 11}, {325633, 12}, {389121, 13}, {516097, 12}, {1699841, 13}, {1708033, 12}, {1732609, 13}, {1748993, 12}, {1757185, 13}, {1773569, 12}, {1777665, 13}, {1781761, 12}, {1789953, 13}, {1806337, 12}, {1818625, 13}, {1822721, 12}, {1826817, 13}, {1830913, 12}, {1961985, 13}, {MP_SIZE_T_MAX,0}} diff --git a/gmp/mpn/alpha/ev5/lshift.asm b/gmp/mpn/alpha/ev5/lshift.asm new file mode 100644 index 0000000000..04385d3484 --- /dev/null +++ b/gmp/mpn/alpha/ev5/lshift.asm @@ -0,0 +1,171 @@ +dnl Alpha EV5 mpn_lshift -- Shift a number left. + +dnl Copyright 1994, 1995, 2000, 2003 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: ? +C EV5: 3.25 +C EV6: 1.75 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 +C cnt r19 + + +ASM_START() +PROLOGUE(mpn_lshift) + s8addq r18,r17,r17 C make r17 point at end of s1 + ldq r4,-8(r17) C load first limb + subq r31,r19,r20 + s8addq r18,r16,r16 C make r16 point at end of RES + subq r18,1,r18 + and r18,4-1,r28 C number of limbs in first loop + srl r4,r20,r0 C compute function result + + beq r28,$L0 + subq r18,r28,r18 + + ALIGN(8) +$Loop0: ldq r3,-16(r17) + subq r16,8,r16 + sll r4,r19,r5 + subq r17,8,r17 + subq r28,1,r28 + srl r3,r20,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,0(r16) + bne r28,$Loop0 + +$L0: sll r4,r19,r24 + beq r18,$Lend +C warm up phase 1 + ldq r1,-16(r17) + subq r18,4,r18 + ldq r2,-24(r17) + ldq r3,-32(r17) + ldq r4,-40(r17) + beq r18,$Lend1 +C warm up phase 2 + srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + ldq r1,-48(r17) + sll r2,r19,r22 + ldq r2,-56(r17) + srl r3,r20,r5 + bis r7,r24,r7 + sll r3,r19,r23 + bis r8,r21,r8 + srl r4,r20,r6 + ldq r3,-64(r17) + sll r4,r19,r24 + ldq r4,-72(r17) + subq r18,4,r18 + beq r18,$Lend2 + ALIGN(16) +C main loop +$Loop: stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + + srl r1,r20,r7 + subq r18,4,r18 + sll r1,r19,r21 + unop C ldq r31,-96(r17) + + srl r2,r20,r8 + ldq r1,-80(r17) + sll r2,r19,r22 + ldq r2,-88(r17) + + stq r5,-24(r16) + bis r7,r24,r7 + stq r6,-32(r16) + bis r8,r21,r8 + + srl r3,r20,r5 + unop C ldq r31,-96(r17) + sll r3,r19,r23 + subq r16,32,r16 + + srl r4,r20,r6 + ldq r3,-96(r17) + sll r4,r19,r24 + ldq r4,-104(r17) + + subq r17,32,r17 + bne r18,$Loop +C cool down phase 2/1 +$Lend2: stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + sll r2,r19,r22 + stq r5,-24(r16) + bis r7,r24,r7 + stq r6,-32(r16) + bis r8,r21,r8 + srl r3,r20,r5 + sll r3,r19,r23 + srl r4,r20,r6 + sll r4,r19,r24 +C cool down phase 2/2 + stq r7,-40(r16) + bis r5,r22,r5 + stq r8,-48(r16) + bis r6,r23,r6 + stq r5,-56(r16) + stq r6,-64(r16) +C cool down phase 2/3 + stq r24,-72(r16) + ret r31,(r26),1 + +C cool down phase 1/1 +$Lend1: srl r1,r20,r7 + sll r1,r19,r21 + srl r2,r20,r8 + sll r2,r19,r22 + srl r3,r20,r5 + bis r7,r24,r7 + sll r3,r19,r23 + bis r8,r21,r8 + srl r4,r20,r6 + sll r4,r19,r24 +C cool down phase 1/2 + stq r7,-8(r16) + bis r5,r22,r5 + stq r8,-16(r16) + bis r6,r23,r6 + stq r5,-24(r16) + stq r6,-32(r16) + stq r24,-40(r16) + ret r31,(r26),1 + +$Lend: stq r24,-8(r16) + ret r31,(r26),1 +EPILOGUE(mpn_lshift) +ASM_END() diff --git a/gmp/mpn/alpha/ev5/rshift.asm b/gmp/mpn/alpha/ev5/rshift.asm new file mode 100644 index 0000000000..0244da35a5 --- /dev/null +++ b/gmp/mpn/alpha/ev5/rshift.asm @@ -0,0 +1,169 @@ +dnl Alpha EV5 mpn_rshift -- Shift a number right. + +dnl Copyright 1994, 1995, 2000 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: ? +C EV5: 3.25 +C EV6: 1.75 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 +C cnt r19 + + +ASM_START() +PROLOGUE(mpn_rshift) + ldq r4,0(r17) C load first limb + subq r31,r19,r20 + subq r18,1,r18 + and r18,4-1,r28 C number of limbs in first loop + sll r4,r20,r0 C compute function result + + beq r28,$L0 + subq r18,r28,r18 + + ALIGN(8) +$Loop0: ldq r3,8(r17) + addq r16,8,r16 + srl r4,r19,r5 + addq r17,8,r17 + subq r28,1,r28 + sll r3,r20,r6 + bis r3,r3,r4 + bis r5,r6,r8 + stq r8,-8(r16) + bne r28,$Loop0 + +$L0: srl r4,r19,r24 + beq r18,$Lend +C warm up phase 1 + ldq r1,8(r17) + subq r18,4,r18 + ldq r2,16(r17) + ldq r3,24(r17) + ldq r4,32(r17) + beq r18,$Lend1 +C warm up phase 2 + sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + ldq r1,40(r17) + srl r2,r19,r22 + ldq r2,48(r17) + sll r3,r20,r5 + bis r7,r24,r7 + srl r3,r19,r23 + bis r8,r21,r8 + sll r4,r20,r6 + ldq r3,56(r17) + srl r4,r19,r24 + ldq r4,64(r17) + subq r18,4,r18 + beq r18,$Lend2 + ALIGN(16) +C main loop +$Loop: stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + + sll r1,r20,r7 + subq r18,4,r18 + srl r1,r19,r21 + unop C ldq r31,-96(r17) + + sll r2,r20,r8 + ldq r1,72(r17) + srl r2,r19,r22 + ldq r2,80(r17) + + stq r5,16(r16) + bis r7,r24,r7 + stq r6,24(r16) + bis r8,r21,r8 + + sll r3,r20,r5 + unop C ldq r31,-96(r17) + srl r3,r19,r23 + addq r16,32,r16 + + sll r4,r20,r6 + ldq r3,88(r17) + srl r4,r19,r24 + ldq r4,96(r17) + + addq r17,32,r17 + bne r18,$Loop +C cool down phase 2/1 +$Lend2: stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + srl r2,r19,r22 + stq r5,16(r16) + bis r7,r24,r7 + stq r6,24(r16) + bis r8,r21,r8 + sll r3,r20,r5 + srl r3,r19,r23 + sll r4,r20,r6 + srl r4,r19,r24 +C cool down phase 2/2 + stq r7,32(r16) + bis r5,r22,r5 + stq r8,40(r16) + bis r6,r23,r6 + stq r5,48(r16) + stq r6,56(r16) +C cool down phase 2/3 + stq r24,64(r16) + ret r31,(r26),1 + +C cool down phase 1/1 +$Lend1: sll r1,r20,r7 + srl r1,r19,r21 + sll r2,r20,r8 + srl r2,r19,r22 + sll r3,r20,r5 + bis r7,r24,r7 + srl r3,r19,r23 + bis r8,r21,r8 + sll r4,r20,r6 + srl r4,r19,r24 +C cool down phase 1/2 + stq r7,0(r16) + bis r5,r22,r5 + stq r8,8(r16) + bis r6,r23,r6 + stq r5,16(r16) + stq r6,24(r16) + stq r24,32(r16) + ret r31,(r26),1 + +$Lend: stq r24,0(r16) + ret r31,(r26),1 +EPILOGUE(mpn_rshift) +ASM_END() diff --git a/gmp/mpn/alpha/ev5/sub_n.asm b/gmp/mpn/alpha/ev5/sub_n.asm new file mode 100644 index 0000000000..2c25fad400 --- /dev/null +++ b/gmp/mpn/alpha/ev5/sub_n.asm @@ -0,0 +1,146 @@ +dnl Alpha EV5 mpn_sub_n -- Subtract two limb vectors of the same length > 0 +dnl and store difference in a third limb vector. + +dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: ? +C EV5: 4.75 +C EV6: 3 + +dnl INPUT PARAMETERS +dnl res_ptr r16 +dnl s1_ptr r17 +dnl s2_ptr r18 +dnl size r19 + +ASM_START() +PROLOGUE(mpn_sub_n) + bis r31,r31,r25 C clear cy + subq r19,4,r19 C decr loop cnt + blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop +C Start software pipeline for 1st loop + ldq r0,0(r18) + ldq r4,0(r17) + ldq r1,8(r18) + ldq r5,8(r17) + addq r17,32,r17 C update s1_ptr + ldq r2,16(r18) + subq r4,r0,r20 C 1st main subtract + ldq r3,24(r18) + subq r19,4,r19 C decr loop cnt + ldq r6,-16(r17) + cmpult r4,r0,r25 C compute cy from last subtract + ldq r7,-8(r17) + subq r5,r1,r28 C 2nd main subtract + addq r18,32,r18 C update s2_ptr + subq r28,r25,r21 C 2nd carry subtract + cmpult r5,r1,r8 C compute cy from last subtract + blt r19,$Lend1 C if less than 4 limbs remain, jump +C 1st loop handles groups of 4 limbs in a software pipeline + ALIGN(16) +$Loop: cmpult r28,r25,r25 C compute cy from last subtract + ldq r0,0(r18) + bis r8,r25,r25 C combine cy from the two subtracts + ldq r1,8(r18) + subq r6,r2,r28 C 3rd main subtract + ldq r4,0(r17) + subq r28,r25,r22 C 3rd carry subtract + ldq r5,8(r17) + cmpult r6,r2,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + stq r21,8(r16) + subq r7,r3,r28 C 4th main subtract + subq r28,r25,r23 C 4th carry subtract + cmpult r7,r3,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + addq r17,32,r17 C update s1_ptr + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,32,r16 C update res_ptr + subq r4,r0,r28 C 1st main subtract + ldq r2,16(r18) + subq r28,r25,r20 C 1st carry subtract + ldq r3,24(r18) + cmpult r4,r0,r8 C compute cy from last subtract + ldq r6,-16(r17) + cmpult r28,r25,r25 C compute cy from last subtract + ldq r7,-8(r17) + bis r8,r25,r25 C combine cy from the two subtracts + subq r19,4,r19 C decr loop cnt + stq r22,-16(r16) + subq r5,r1,r28 C 2nd main subtract + stq r23,-8(r16) + subq r28,r25,r21 C 2nd carry subtract + addq r18,32,r18 C update s2_ptr + cmpult r5,r1,r8 C compute cy from last subtract + bge r19,$Loop +C Finish software pipeline for 1st loop +$Lend1: cmpult r28,r25,r25 C compute cy from last subtract + bis r8,r25,r25 C combine cy from the two subtracts + subq r6,r2,r28 C cy add + subq r28,r25,r22 C 3rd main subtract + cmpult r6,r2,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + stq r21,8(r16) + subq r7,r3,r28 C cy add + subq r28,r25,r23 C 4th main subtract + cmpult r7,r3,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,32,r16 C update res_ptr + stq r22,-16(r16) + stq r23,-8(r16) +$Lend2: addq r19,4,r19 C restore loop cnt + beq r19,$Lret +C Start software pipeline for 2nd loop + ldq r0,0(r18) + ldq r4,0(r17) + subq r19,1,r19 + beq r19,$Lend0 +C 2nd loop handles remaining 1-3 limbs + ALIGN(16) +$Loop0: subq r4,r0,r28 C main subtract + cmpult r4,r0,r8 C compute cy from last subtract + ldq r0,8(r18) + ldq r4,8(r17) + subq r28,r25,r20 C carry subtract + addq r18,8,r18 + addq r17,8,r17 + stq r20,0(r16) + cmpult r28,r25,r25 C compute cy from last subtract + subq r19,1,r19 C decr loop cnt + bis r8,r25,r25 C combine cy from the two subtracts + addq r16,8,r16 + bne r19,$Loop0 +$Lend0: subq r4,r0,r28 C main subtract + subq r28,r25,r20 C carry subtract + cmpult r4,r0,r8 C compute cy from last subtract + cmpult r28,r25,r25 C compute cy from last subtract + stq r20,0(r16) + bis r8,r25,r25 C combine cy from the two subtracts + +$Lret: bis r25,r31,r0 C return cy + ret r31,(r26),1 +EPILOGUE(mpn_sub_n) +ASM_END() diff --git a/gmp/mpn/alpha/ev6/add_n.asm b/gmp/mpn/alpha/ev6/add_n.asm index 9261f31b8a..114af73aa0 100644 --- a/gmp/mpn/alpha/ev6/add_n.asm +++ b/gmp/mpn/alpha/ev6/add_n.asm @@ -4,30 +4,19 @@ dnl store sum in a third limb vector. dnl Copyright 2000, 2003, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev6/aorslsh1_n.asm b/gmp/mpn/alpha/ev6/aorslsh1_n.asm deleted file mode 100644 index cb966ce021..0000000000 --- a/gmp/mpn/alpha/ev6/aorslsh1_n.asm +++ /dev/null @@ -1,172 +0,0 @@ -dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). - -dnl Copyright 2003, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C EV4: ? -C EV5: 7 -C EV6: 4 - -C TODO -C * Tune to reach 3.75 c/l on ev6. - -define(`rp',`r16') -define(`up',`r17') -define(`vp',`r18') -define(`n', `r19') - -define(`u0', `r8') -define(`u1', `r1') -define(`v0', `r4') -define(`v1', `r5') - -define(`cy0', `r0') -define(`cy1', `r20') -define(`cy', `r22') -define(`rr', `r24') -define(`ps', `r25') -define(`sl', `r28') - -ifdef(`OPERATION_addlsh1_n',` - define(ADDSUB, addq) - define(CARRY, `cmpult $1,$2,$3') - define(func, mpn_addlsh1_n) -') -ifdef(`OPERATION_sublsh1_n',` - define(ADDSUB, subq) - define(CARRY, `cmpult $2,$1,$3') - define(func, mpn_sublsh1_n) -') - -MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) - -ASM_START() -PROLOGUE(func) - and n, 2, cy0 - blbs n, L(bx1) -L(bx0): ldq v1, 0(vp) - ldq u1, 0(up) - lda r2, 0(r31) - bne cy0, L(b10) - -L(b00): lda vp, 48(vp) - lda up, -16(up) - lda rp, -8(rp) - lda cy0, 0(r31) - br r31, L(lo0) - -L(b10): lda vp, 32(vp) - lda rp, 8(rp) - lda cy0, 0(r31) - br r31, L(lo2) - -L(bx1): ldq v0, 0(vp) - ldq u0, 0(up) - lda r3, 0(r31) - beq cy0, L(b01) - -L(b11): lda vp, 40(vp) - lda up, -24(up) - lda rp, 16(rp) - lda cy1, 0(r31) - br r31, L(lo3) - -L(b01): lda n, -4(n) - lda cy1, 0(r31) - ble n, L(end) - lda vp, 24(vp) - lda up, -8(up) - - ALIGN(16) -L(top): addq v0, v0, r6 - ldq v1, -16(vp) - addq r6, r3, sl C combined vlimb - ldq u1, 16(up) - ADDSUB u0, sl, ps C ulimb + (vlimb << 1) - cmplt v0, r31, r2 C high v bits - ADDSUB ps, cy1, rr C consume carry from previous operation - CARRY( ps, u0, cy0) C carry out #2 - stq rr, 0(rp) - CARRY( rr, ps, cy) C carry out #3 - lda vp, 32(vp) C bookkeeping - addq cy, cy0, cy0 C final carry out -L(lo0): addq v1, v1, r7 - ldq v0, -40(vp) - addq r7, r2, sl - ldq u0, 24(up) - ADDSUB u1, sl, ps - cmplt v1, r31, r3 - ADDSUB ps, cy0, rr - CARRY( ps, u1, cy1) - stq rr, 8(rp) - CARRY( rr, ps, cy) - lda rp, 32(rp) C bookkeeping - addq cy, cy1, cy1 -L(lo3): addq v0, v0, r6 - ldq v1, -32(vp) - addq r6, r3, sl - ldq u1, 32(up) - ADDSUB u0, sl, ps - cmplt v0, r31, r2 - ADDSUB ps, cy1, rr - CARRY( ps, u0, cy0) - stq rr, -16(rp) - CARRY( rr, ps, cy) - lda up, 32(up) C bookkeeping - addq cy, cy0, cy0 -L(lo2): addq v1, v1, r7 - ldq v0, -24(vp) - addq r7, r2, sl - ldq u0, 8(up) - ADDSUB u1, sl, ps - cmplt v1, r31, r3 - ADDSUB ps, cy0, rr - CARRY( ps, u1, cy1) - stq rr, -8(rp) - CARRY( rr, ps, cy) - lda n, -4(n) C bookkeeping - addq cy, cy1, cy1 - bgt n, L(top) - -L(end): addq v0, v0, r6 - addq r6, r3, sl - ADDSUB u0, sl, ps - cmplt v0, r31, r2 - ADDSUB ps, cy1, rr - CARRY( ps, u0, cy0) - stq rr, 0(rp) - CARRY( rr, ps, cy) - addq cy, cy0, cy0 - addq cy0, r2, r0 - - ret r31,(r26),1 -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/alpha/ev6/aorsmul_1.asm b/gmp/mpn/alpha/ev6/aorsmul_1.asm index 0e68e6e7ad..eda092b2d5 100644 --- a/gmp/mpn/alpha/ev6/aorsmul_1.asm +++ b/gmp/mpn/alpha/ev6/aorsmul_1.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 mpn_addmul_1 and mpn_submul_1. -dnl Copyright 2000, 2003-2005, 2008 Free Software Foundation, Inc. +dnl Copyright 2000, 2003, 2004, 2005, 2008 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev6/gmp-mparam.h b/gmp/mpn/alpha/ev6/gmp-mparam.h index e51d6b0d15..a01e977433 100644 --- a/gmp/mpn/alpha/ev6/gmp-mparam.h +++ b/gmp/mpn/alpha/ev6/gmp-mparam.h @@ -1,209 +1,76 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2008-2010, 2014 Free -Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2009 +Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 64 -#define GMP_LIMB_BYTES 8 +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 #define DIVEXACT_BY3_METHOD 0 /* override ../diveby3.asm */ -/* 500 MHz 21164 (agnesi.math.su.se) */ -/* FFT tuning limit = 20000000 */ -/* Generated by tuneup.c, 2014-03-14, gcc 3.3 */ - -#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */ -#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1_1P_METHOD 2 -#define MOD_1_NORM_THRESHOLD 0 /* always */ -#define MOD_1_UNNORM_THRESHOLD 0 /* always */ -#define MOD_1N_TO_MOD_1_1_THRESHOLD 4 -#define MOD_1U_TO_MOD_1_1_THRESHOLD 2 -#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10 -#define MOD_1_2_TO_MOD_1_4_THRESHOLD 21 -#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7 -#define USE_PREINV_DIVREM_1 1 /* preinv always */ -#define DIV_QR_1N_PI1_METHOD 2 -#define DIV_QR_1_NORM_THRESHOLD 5 -#define DIV_QR_1_UNNORM_THRESHOLD 1 -#define DIV_QR_2_PI2_THRESHOLD 8 -#define DIVEXACT_1_THRESHOLD 0 /* always */ -#define BMOD_1_TO_MOD_1_THRESHOLD 20 - -#define MUL_TOOM22_THRESHOLD 32 -#define MUL_TOOM33_THRESHOLD 117 -#define MUL_TOOM44_THRESHOLD 124 -#define MUL_TOOM6H_THRESHOLD 230 -#define MUL_TOOM8H_THRESHOLD 357 - -#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97 -#define MUL_TOOM32_TO_TOOM53_THRESHOLD 107 -#define MUL_TOOM42_TO_TOOM53_THRESHOLD 88 -#define MUL_TOOM42_TO_TOOM63_THRESHOLD 105 -#define MUL_TOOM43_TO_TOOM54_THRESHOLD 136 - -#define SQR_BASECASE_THRESHOLD 0 /* always */ -#define SQR_TOOM2_THRESHOLD 59 -#define SQR_TOOM3_THRESHOLD 123 -#define SQR_TOOM4_THRESHOLD 163 -#define SQR_TOOM6_THRESHOLD 333 -#define SQR_TOOM8_THRESHOLD 0 /* always */ - -#define MULMID_TOOM42_THRESHOLD 52 - -#define MULMOD_BNM1_THRESHOLD 19 -#define SQRMOD_BNM1_THRESHOLD 5 - -#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */ -#define MUL_FFT_TABLE3 \ - { { 468, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ - { 11, 5}, { 23, 6}, { 19, 7}, { 10, 6}, \ - { 24, 7}, { 13, 6}, { 27, 7}, { 14, 6}, \ - { 29, 7}, { 17, 6}, { 35, 7}, { 29, 8}, \ - { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \ - { 19, 7}, { 39, 8}, { 29, 9}, { 15, 8}, \ - { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \ - { 51, 9}, { 27, 8}, { 55, 9}, { 35, 8}, \ - { 71, 9}, { 39,10}, { 23, 9}, { 55,10}, \ - { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \ - { 47, 9}, { 95,10}, { 55,11}, { 31,10}, \ - { 79,11}, { 47,10}, { 103,12}, { 31,11}, \ - { 63,10}, { 135,11}, { 79,10}, { 167,11}, \ - { 95,10}, { 199,11}, { 111,12}, { 63,11}, \ - { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \ - { 319,12}, { 95,11}, { 191,10}, { 383,11}, \ - { 207,13}, { 63,12}, { 127,11}, { 255,10}, \ - { 511,11}, { 271,10}, { 543,11}, { 287,10}, \ - { 575,12}, { 159,11}, { 319,10}, { 639,11}, \ - { 335,10}, { 671,11}, { 351,10}, { 703,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,12}, \ - { 223,11}, { 447,13}, { 127,12}, { 255,11}, \ - { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \ - { 607,12}, { 319,11}, { 671,12}, { 351,11}, \ - { 703,13}, { 191,12}, { 383,11}, { 767,12}, \ - { 415,11}, { 831,12}, { 447,14}, { 127,13}, \ - { 255,12}, { 575,11}, { 1151,12}, { 607,13}, \ - { 319,12}, { 735,13}, { 383,12}, { 767,11}, \ - { 1535,12}, { 831,13}, { 447,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \ - { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \ - { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \ - { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \ - { 511,13}, { 1215,14}, { 639,13}, { 1407,14}, \ - { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \ - { 511,14}, { 16384,15}, { 32768,16}, { 65536,17}, \ - { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ - {2097152,22}, {4194304,23}, {8388608,24} } -#define MUL_FFT_TABLE3_SIZE 151 -#define MUL_FFT_THRESHOLD 5760 - -#define SQR_FFT_MODF_THRESHOLD 412 /* k = 5 */ -#define SQR_FFT_TABLE3 \ - { { 412, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \ - { 11, 5}, { 23, 6}, { 12, 5}, { 25, 6}, \ - { 27, 7}, { 14, 6}, { 29, 7}, { 28, 8}, \ - { 15, 7}, { 31, 8}, { 17, 7}, { 36, 8}, \ - { 19, 7}, { 39, 8}, { 29, 9}, { 15, 8}, \ - { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \ - { 49, 9}, { 27,10}, { 15, 9}, { 39,10}, \ - { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \ - { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \ - { 95,10}, { 55,11}, { 31,10}, { 79,11}, \ - { 47,10}, { 95,12}, { 31,11}, { 63,10}, \ - { 127, 9}, { 255,11}, { 79,10}, { 159, 9}, \ - { 319,10}, { 167,11}, { 95,10}, { 191, 9}, \ - { 383,11}, { 111,12}, { 63,11}, { 127,10}, \ - { 271,11}, { 143,10}, { 287, 9}, { 575,10}, \ - { 303,11}, { 159,10}, { 319,12}, { 95,11}, \ - { 191,10}, { 383,11}, { 207,13}, { 63,12}, \ - { 127,11}, { 255,10}, { 511,11}, { 271,10}, \ - { 543,11}, { 287,10}, { 575,11}, { 303,12}, \ - { 159,11}, { 319,10}, { 639,11}, { 335,10}, \ - { 671,11}, { 351,10}, { 703,11}, { 367,12}, \ - { 191,11}, { 383,10}, { 767,11}, { 415,12}, \ - { 223,11}, { 447,13}, { 127,12}, { 255,11}, \ - { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \ - { 607,12}, { 319,11}, { 639,10}, { 1279,11}, \ - { 671,12}, { 351,11}, { 703,13}, { 191,12}, \ - { 383,11}, { 767,12}, { 415,11}, { 831,12}, \ - { 447,11}, { 895,12}, { 479,14}, { 127,13}, \ - { 255,12}, { 575,11}, { 1151,12}, { 607,13}, \ - { 319,12}, { 703,11}, { 1407,12}, { 735,13}, \ - { 383,12}, { 831,13}, { 447,12}, { 959,14}, \ - { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \ - { 1151,13}, { 639,12}, { 1279,13}, { 703,12}, \ - { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \ - { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \ - { 511,13}, { 1215,14}, { 639,13}, { 1407,14}, \ - { 767,13}, { 1663,14}, { 895,13}, { 1791,15}, \ - { 511,14}, { 16384,15}, { 32768,16}, { 65536,17}, \ - { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \ - {2097152,22}, {4194304,23}, {8388608,24} } -#define SQR_FFT_TABLE3_SIZE 159 -#define SQR_FFT_THRESHOLD 5056 - -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 100 -#define MULLO_MUL_N_THRESHOLD 11355 - -#define DC_DIV_QR_THRESHOLD 124 -#define DC_DIVAPPR_Q_THRESHOLD 438 -#define DC_BDIV_QR_THRESHOLD 153 -#define DC_BDIV_Q_THRESHOLD 318 - -#define INV_MULMOD_BNM1_THRESHOLD 62 -#define INV_NEWTON_THRESHOLD 384 -#define INV_APPR_THRESHOLD 402 - -#define BINV_NEWTON_THRESHOLD 381 -#define REDC_1_TO_REDC_N_THRESHOLD 110 - -#define MU_DIV_QR_THRESHOLD 1752 -#define MU_DIVAPPR_Q_THRESHOLD 1895 -#define MUPI_DIV_QR_THRESHOLD 174 -#define MU_BDIV_QR_THRESHOLD 1387 -#define MU_BDIV_Q_THRESHOLD 1787 - -#define POWM_SEC_TABLE 1,13,66,82,579 - -#define MATRIX22_STRASSEN_THRESHOLD 15 -#define HGCD_THRESHOLD 318 -#define HGCD_APPR_THRESHOLD 363 -#define HGCD_REDUCE_THRESHOLD 2384 -#define GCD_DC_THRESHOLD 2504 -#define GCDEXT_DC_THRESHOLD 671 -#define JACOBI_BASE_METHOD 3 - -#define GET_STR_DC_THRESHOLD 14 -#define GET_STR_PRECOMPUTE_THRESHOLD 25 -#define SET_STR_DC_THRESHOLD 3754 -#define SET_STR_PRECOMPUTE_THRESHOLD 8097 - -#define FAC_DSC_THRESHOLD 951 -#define FAC_ODD_THRESHOLD 24 +/* 500 MHz 21164 */ + +/* Generated by tuneup.c, 2009-01-12, gcc 3.3 */ + +#define MUL_KARATSUBA_THRESHOLD 31 +#define MUL_TOOM3_THRESHOLD 101 +#define MUL_TOOM44_THRESHOLD 168 + +#define SQR_BASECASE_THRESHOLD 6 +#define SQR_KARATSUBA_THRESHOLD 60 +#define SQR_TOOM3_THRESHOLD 102 +#define SQR_TOOM4_THRESHOLD 172 + +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 102 +#define MULLOW_MUL_N_THRESHOLD 399 + +#define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */ +#define DIV_DC_THRESHOLD 134 +#define POWM_THRESHOLD 257 + +#define MATRIX22_STRASSEN_THRESHOLD 19 +#define HGCD_THRESHOLD 303 +#define GCD_DC_THRESHOLD 1258 +#define GCDEXT_DC_THRESHOLD 807 +#define JACOBI_BASE_METHOD 3 + +#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */ +#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define MOD_1_1_THRESHOLD 13 +#define MOD_1_2_THRESHOLD 14 +#define MOD_1_4_THRESHOLD 40 +#define USE_PREINV_DIVREM_1 1 /* preinv always */ +#define USE_PREINV_MOD_1 1 /* preinv always */ +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always */ + +#define GET_STR_DC_THRESHOLD 16 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 4615 +#define SET_STR_PRECOMPUTE_THRESHOLD 8178 + +#define MUL_FFT_TABLE { 432, 864, 1856, 3840, 11264, 28672, 81920, 327680, 0 } +#define MUL_FFT_MODF_THRESHOLD 448 +#define MUL_FFT_THRESHOLD 4992 + +#define SQR_FFT_TABLE { 432, 864, 1728, 3840, 9216, 20480, 81920, 327680, 786432, 0 } +#define SQR_FFT_MODF_THRESHOLD 344 +#define SQR_FFT_THRESHOLD 3712 diff --git a/gmp/mpn/alpha/ev6/mod_1_4.asm b/gmp/mpn/alpha/ev6/mod_1_4.asm deleted file mode 100644 index 836de07c0f..0000000000 --- a/gmp/mpn/alpha/ev6/mod_1_4.asm +++ /dev/null @@ -1,337 +0,0 @@ -dnl Alpha mpn_mod_1s_4p - -dnl Contributed to the GNU project by Torbjorn Granlund. - -dnl Copyright 2009, 2010 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C TODO: -C * Optimise. 2.75 c/l should be possible. -C * Write a proper mpn_mod_1s_4p_cps. The code below was compiler generated. -C * Optimise feed-in code, starting the sw pipeline in switch code. -C * Shorten software pipeline. The mul instructions are scheduled too far -C from their users. Fixing this will allow us to use fewer registers. -C * If we cannot reduce register usage, write perhaps small-n basecase. -C * Does this work for PIC? - -C cycles/limb -C EV4: ? -C EV5: 23 -C EV6: 3 - -define(`ap', `r16') -define(`n', `r17') -define(`pl', `r24') -define(`ph', `r25') -define(`rl', `r6') -define(`rh', `r7') -define(`B1modb', `r1') -define(`B2modb', `r2') -define(`B3modb', `r3') -define(`B4modb', `r4') -define(`B5modb', `r5') - -ASM_START() -PROLOGUE(mpn_mod_1s_4p) - lda r30, -64(r30) - stq r9, 8(r30) - ldq B1modb, 16(r19) - stq r10, 16(r30) - ldq B2modb, 24(r19) - stq r11, 24(r30) - ldq B3modb, 32(r19) - stq r12, 32(r30) - ldq B4modb, 40(r19) - stq r13, 40(r30) - ldq B5modb, 48(r19) - s8addq n, ap, ap C point ap at vector end - - and n, 3, r0 - lda n, -4(n) - beq r0, L(b0) - lda r6, -2(r0) - blt r6, L(b1) - beq r6, L(b2) - -L(b3): ldq r21, -16(ap) - ldq r22, -8(ap) - ldq r20, -24(ap) - mulq r21, B1modb, r8 - umulh r21, B1modb, r12 - mulq r22, B2modb, r9 - umulh r22, B2modb, r13 - addq r8, r20, pl - cmpult pl, r8, r0 - addq r0, r12, ph - addq r9, pl, rl - cmpult rl, r9, r0 - addq r13, ph, ph - addq r0, ph, rh - lda ap, -56(ap) - br L(com) - -L(b0): ldq r21, -24(ap) - ldq r22, -16(ap) - ldq r23, -8(ap) - ldq r20, -32(ap) - mulq r21, B1modb, r8 - umulh r21, B1modb, r12 - mulq r22, B2modb, r9 - umulh r22, B2modb, r13 - mulq r23, B3modb, r10 - umulh r23, B3modb, r27 - addq r8, r20, pl - cmpult pl, r8, r0 - addq r0, r12, ph - addq r9, pl, pl - cmpult pl, r9, r0 - addq r13, ph, ph - addq r0, ph, ph - addq r10, pl, rl - cmpult rl, r10, r0 - addq r27, ph, ph - addq r0, ph, rh - lda ap, -64(ap) - br L(com) - -L(b1): bis r31, r31, rh - ldq rl, -8(ap) - lda ap, -40(ap) - br L(com) - -L(b2): ldq rh, -8(ap) - ldq rl, -16(ap) - lda ap, -48(ap) - -L(com): ble n, L(ed3) - ldq r21, 8(ap) - ldq r22, 16(ap) - ldq r23, 24(ap) - ldq r20, 0(ap) - lda n, -4(n) - lda ap, -32(ap) - mulq r21, B1modb, r8 - umulh r21, B1modb, r12 - mulq r22, B2modb, r9 - umulh r22, B2modb, r13 - mulq r23, B3modb, r10 - umulh r23, B3modb, r27 - mulq rl, B4modb, r11 - umulh rl, B4modb, r28 - ble n, L(ed2) - - ALIGN(16) -L(top): ldq r21, 8(ap) - mulq rh, B5modb, rl - addq r8, r20, pl - ldq r22, 16(ap) - cmpult pl, r8, r0 - umulh rh, B5modb, rh - ldq r23, 24(ap) - addq r0, r12, ph - addq r9, pl, pl - mulq r21, B1modb, r8 - cmpult pl, r9, r0 - addq r13, ph, ph - umulh r21, B1modb, r12 - lda ap, -32(ap) - addq r0, ph, ph - addq r10, pl, pl - mulq r22, B2modb, r9 - cmpult pl, r10, r0 - addq r27, ph, ph - addq r11, pl, pl - umulh r22, B2modb, r13 - addq r0, ph, ph - cmpult pl, r11, r0 - addq r28, ph, ph - mulq r23, B3modb, r10 - ldq r20, 32(ap) - addq pl, rl, rl - umulh r23, B3modb, r27 - addq r0, ph, ph - cmpult rl, pl, r0 - mulq rl, B4modb, r11 - addq ph, rh, rh - umulh rl, B4modb, r28 - addq r0, rh, rh - lda n, -4(n) - bgt n, L(top) - -L(ed2): mulq rh, B5modb, rl - addq r8, r20, pl - umulh rh, B5modb, rh - cmpult pl, r8, r0 - addq r0, r12, ph - addq r9, pl, pl - cmpult pl, r9, r0 - addq r13, ph, ph - addq r0, ph, ph - addq r10, pl, pl - cmpult pl, r10, r0 - addq r27, ph, ph - addq r11, pl, pl - addq r0, ph, ph - cmpult pl, r11, r0 - addq r28, ph, ph - addq pl, rl, rl - addq r0, ph, ph - cmpult rl, pl, r0 - addq ph, rh, rh - addq r0, rh, rh - -L(ed3): mulq rh, B1modb, r8 - umulh rh, B1modb, rh - addq r8, rl, rl - cmpult rl, r8, r0 - addq r0, rh, rh - - ldq r24, 8(r19) C cnt - sll rh, r24, rh - subq r31, r24, r25 - srl rl, r25, r2 - sll rl, r24, rl - or r2, rh, rh - - ldq r23, 0(r19) C bi - mulq rh, r23, r8 - umulh rh, r23, r9 - addq rh, 1, r7 - addq r8, rl, r8 C ql - cmpult r8, rl, r0 - addq r9, r7, r9 - addq r0, r9, r9 C qh - mulq r9, r18, r21 C qh * b - subq rl, r21, rl - cmpult r8, rl, r0 C rl > ql - negq r0, r0 - and r0, r18, r0 - addq rl, r0, rl - cmpule r18, rl, r0 C rl >= b - negq r0, r0 - and r0, r18, r0 - subq rl, r0, rl - - srl rl, r24, r0 - - ldq r9, 8(r30) - ldq r10, 16(r30) - ldq r11, 24(r30) - ldq r12, 32(r30) - ldq r13, 40(r30) - lda r30, 64(r30) - ret r31, (r26), 1 -EPILOGUE() - -PROLOGUE(mpn_mod_1s_4p_cps,gp) - lda r30, -32(r30) - stq r26, 0(r30) - stq r9, 8(r30) - stq r10, 16(r30) - stq r11, 24(r30) - mov r16, r11 - LEA( r4, __clz_tab) - lda r10, 65(r31) - cmpbge r31, r17, r1 - srl r1, 1, r1 - xor r1, 127, r1 - addq r1, r4, r1 - ldq_u r2, 0(r1) - extbl r2, r1, r2 - s8subq r2, 7, r2 - srl r17, r2, r3 - subq r10, r2, r10 - addq r3, r4, r3 - ldq_u r1, 0(r3) - extbl r1, r3, r1 - subq r10, r1, r10 - sll r17, r10, r9 - mov r9, r16 - jsr r26, mpn_invert_limb - ldah r29, 0(r26) - subq r31, r10, r2 - lda r1, 1(r31) - sll r1, r10, r1 - subq r31, r9, r3 - srl r0, r2, r2 - ldq r26, 0(r30) - bis r2, r1, r2 - lda r29, 0(r29) - stq r0, 0(r11) - stq r10, 8(r11) - mulq r2, r3, r2 - srl r2, r10, r3 - umulh r2, r0, r1 - stq r3, 16(r11) - mulq r2, r0, r3 - ornot r31, r1, r1 - subq r1, r2, r1 - mulq r1, r9, r1 - addq r1, r9, r2 - cmpule r1, r3, r3 - cmoveq r3, r2, r1 - srl r1, r10, r3 - umulh r1, r0, r2 - stq r3, 24(r11) - mulq r1, r0, r3 - ornot r31, r2, r2 - subq r2, r1, r2 - mulq r2, r9, r2 - addq r2, r9, r1 - cmpule r2, r3, r3 - cmoveq r3, r1, r2 - srl r2, r10, r1 - umulh r2, r0, r3 - stq r1, 32(r11) - mulq r2, r0, r1 - ornot r31, r3, r3 - subq r3, r2, r3 - mulq r3, r9, r3 - addq r3, r9, r2 - cmpule r3, r1, r1 - cmoveq r1, r2, r3 - srl r3, r10, r2 - umulh r3, r0, r1 - stq r2, 40(r11) - mulq r3, r0, r0 - ornot r31, r1, r1 - subq r1, r3, r1 - mulq r1, r9, r1 - addq r1, r9, r9 - cmpule r1, r0, r0 - cmoveq r0, r9, r1 - ldq r9, 8(r30) - srl r1, r10, r1 - ldq r10, 16(r30) - stq r1, 48(r11) - ldq r11, 24(r30) - lda r30, 32(r30) - ret r31, (r26), 1 -EPILOGUE() diff --git a/gmp/mpn/alpha/ev6/mul_1.asm b/gmp/mpn/alpha/ev6/mul_1.asm index 8ee19cd429..841f5083cb 100644 --- a/gmp/mpn/alpha/ev6/mul_1.asm +++ b/gmp/mpn/alpha/ev6/mul_1.asm @@ -4,30 +4,19 @@ dnl result in a second limb vector. dnl Copyright 2000, 2001, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -60,7 +49,7 @@ C r20,r29,r13-r15 scramble C C We're doing 7 of the 8 carry propagations with a br fixup code and 1 with a C put-the-carry-into-hi. The idea is that these branches are very rarely -C taken, and since a non-taken branch consumes no resources, that is better +C taken, and since a non-taken branch consumes no resurces, that is better C than an addq. C C Software pipeline: a load in cycle #09, feeds a mul in cycle #16, feeds an @@ -137,7 +126,7 @@ $L_9_or_more: mulq r2,r19,r3 C r3 = prod_low umulh r2,r19,r21 C r21 = prod_high beq r20,$Le1b C jump if size was == 1 - bis r31, r31, r0 C FIXME: shouldn't need this + bis r31, r31, r0 C FIXME: shouldtn't need this ldq r2,0(r17) C r2 = s1_limb lda r17,8(r17) C s1_ptr++ lda r20,-1(r20) C size-- diff --git a/gmp/mpn/alpha/ev6/nails/README b/gmp/mpn/alpha/ev6/nails/README index b214ac50ad..8b3b357a77 100644 --- a/gmp/mpn/alpha/ev6/nails/README +++ b/gmp/mpn/alpha/ev6/nails/README @@ -2,29 +2,18 @@ Copyright 2002, 2005 Free Software Foundation, Inc. This file is part of the GNU MP Library. -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +The GNU MP Library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the +Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. +You should have received a copy of the GNU Lesser General Public License along +with the GNU MP Library. If not, see http://www.gnu.org/licenses/. diff --git a/gmp/mpn/alpha/ev6/nails/addmul_1.asm b/gmp/mpn/alpha/ev6/nails/addmul_1.asm index 711d4e66e5..149195c6f4 100644 --- a/gmp/mpn/alpha/ev6/nails/addmul_1.asm +++ b/gmp/mpn/alpha/ev6/nails/addmul_1.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_addmul_1. dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -36,7 +25,7 @@ C EV5: 18 C EV6: 4 C TODO -C * Reroll loop for 3.75 c/l with current 4-way unrolling. +C * Reroll loop for 3.75 c/l with current 4-way unrulling. C * The loop is overscheduled wrt loads and wrt multiplies, in particular C umulh. C * Use FP loop count and multiple exit points, that would simplify feed-in lp0 diff --git a/gmp/mpn/alpha/ev6/nails/addmul_2.asm b/gmp/mpn/alpha/ev6/nails/addmul_2.asm index 6ff6b3ad6b..9edaed8b3a 100644 --- a/gmp/mpn/alpha/ev6/nails/addmul_2.asm +++ b/gmp/mpn/alpha/ev6/nails/addmul_2.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_addmul_2. dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev6/nails/addmul_3.asm b/gmp/mpn/alpha/ev6/nails/addmul_3.asm index a1ffb680ec..1d89769e13 100644 --- a/gmp/mpn/alpha/ev6/nails/addmul_3.asm +++ b/gmp/mpn/alpha/ev6/nails/addmul_3.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_addmul_3. dnl Copyright 2002, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev6/nails/addmul_4.asm b/gmp/mpn/alpha/ev6/nails/addmul_4.asm index 77e02a4316..f19b0232df 100644 --- a/gmp/mpn/alpha/ev6/nails/addmul_4.asm +++ b/gmp/mpn/alpha/ev6/nails/addmul_4.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_addmul_4. dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev6/nails/aors_n.asm b/gmp/mpn/alpha/ev6/nails/aors_n.asm index f6586773f5..4958e81ed9 100644 --- a/gmp/mpn/alpha/ev6/nails/aors_n.asm +++ b/gmp/mpn/alpha/ev6/nails/aors_n.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_add_n and mpn_sub_n. dnl Copyright 2002, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl Runs at 2.5 cycles/limb. It would be possible to reach 2.0 cycles/limb diff --git a/gmp/mpn/alpha/ev6/nails/gmp-mparam.h b/gmp/mpn/alpha/ev6/nails/gmp-mparam.h index 7949fe8df8..1bc93b52c6 100644 --- a/gmp/mpn/alpha/ev6/nails/gmp-mparam.h +++ b/gmp/mpn/alpha/ev6/nails/gmp-mparam.h @@ -1,43 +1,33 @@ /* gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2004 Free Software Foundation, Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004 Free Software +Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 64 -#define GMP_LIMB_BYTES 8 +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 /* Generated by tuneup.c, 2004-02-07, gcc 3.3 */ -#define MUL_TOOM22_THRESHOLD 40 -#define MUL_TOOM33_THRESHOLD 236 +#define MUL_KARATSUBA_THRESHOLD 40 +#define MUL_TOOM3_THRESHOLD 236 #define SQR_BASECASE_THRESHOLD 7 /* karatsuba */ -#define SQR_TOOM2_THRESHOLD 0 /* never sqr_basecase */ +#define SQR_KARATSUBA_THRESHOLD 0 /* never sqr_basecase */ #define SQR_TOOM3_THRESHOLD 120 #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* no preinv with nails */ diff --git a/gmp/mpn/alpha/ev6/nails/mul_1.asm b/gmp/mpn/alpha/ev6/nails/mul_1.asm index da2ee3d099..cac3776ba0 100644 --- a/gmp/mpn/alpha/ev6/nails/mul_1.asm +++ b/gmp/mpn/alpha/ev6/nails/mul_1.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_mul_1. dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -36,10 +25,10 @@ C EV5: 18 C EV6: 3.25 C TODO -C * Reroll loop for 3.0 c/l with current 4-way unrolling. +C * Reroll loop for 3.0 c/l with current 4-way unrulling. C * The loop is overscheduled wrt loads and wrt multiplies, in particular C umulh. -C * Use FP loop count and multiple exit points, that would simplify feed-in lp0 +C * Use FP loop count and multiple exit points, that would simpily feed-in lp0 C and would work since the loop structure is really regular. C INPUT PARAMETERS diff --git a/gmp/mpn/alpha/ev6/nails/submul_1.asm b/gmp/mpn/alpha/ev6/nails/submul_1.asm index f473a59ba8..4242517a4a 100644 --- a/gmp/mpn/alpha/ev6/nails/submul_1.asm +++ b/gmp/mpn/alpha/ev6/nails/submul_1.asm @@ -1,32 +1,21 @@ dnl Alpha ev6 nails mpn_submul_1. dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') @@ -36,10 +25,10 @@ C EV5: 18 C EV6: 4 C TODO -C * Reroll loop for 3.75 c/l with current 4-way unrolling. +C * Reroll loop for 3.75 c/l with current 4-way unrulling. C * The loop is overscheduled wrt loads and wrt multiplies, in particular C umulh. -C * Use FP loop count and multiple exit points, that would simplify feed-in lp0 +C * Use FP loop count and multiple exit points, that would simpily feed-in lp0 C and would work since the loop structure is really regular. C INPUT PARAMETERS diff --git a/gmp/mpn/alpha/ev6/slot.pl b/gmp/mpn/alpha/ev6/slot.pl index a4c8a36882..17967e79a2 100755..100644 --- a/gmp/mpn/alpha/ev6/slot.pl +++ b/gmp/mpn/alpha/ev6/slot.pl @@ -1,32 +1,21 @@ #!/usr/bin/perl -w -# Copyright 2000, 2001, 2003-2005, 2011 Free Software Foundation, Inc. +# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. # -# This file is part of the GNU MP Library. +# This file is part of the GNU MP Library. # -# The GNU MP Library is free software; you can redistribute it and/or modify -# it under the terms of either: +# The GNU MP Library is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; either version 3 of the License, or (at +# your option) any later version. # -# * the GNU Lesser General Public License as published by the Free -# Software Foundation; either version 3 of the License, or (at your -# option) any later version. +# The GNU MP Library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. # -# or -# -# * the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any -# later version. -# -# or both in parallel, as here. -# -# The GNU MP Library is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# for more details. -# -# You should have received copies of the GNU General Public License and the -# GNU Lesser General Public License along with the GNU MP Library. If not, -# see https://www.gnu.org/licenses/. +# You should have received a copy of the GNU Lesser General Public License +# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. # Usage: slot.pl [filename.o]... @@ -51,12 +40,9 @@ my %optable = ( 'addq' => 'E', 'and' => 'E', - 'andnot' => 'E', 'beq' => 'U', 'bge' => 'U', 'bgt' => 'U', - 'bic' => 'E', - 'bis' => 'E', 'blt' => 'U', 'bne' => 'U', 'br' => 'L', @@ -85,7 +71,6 @@ my %optable = 'ldt' => 'L', 'ret' => 'L', 'mov' => 'E', - 'mull' => 'U', 'mulq' => 'U', 'negq' => 'E', 'nop' => 'E', diff --git a/gmp/mpn/alpha/ev6/sqr_diagonal.asm b/gmp/mpn/alpha/ev6/sqr_diagonal.asm new file mode 100644 index 0000000000..58d086e624 --- /dev/null +++ b/gmp/mpn/alpha/ev6/sqr_diagonal.asm @@ -0,0 +1,115 @@ +dnl Alpha mpn_sqr_diagonal. + +dnl Copyright 2001, 2002, 2006 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: ? +C EV5: ? +C EV6: 2.3 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 + + +ASM_START() +PROLOGUE(mpn_sqr_diagonal) + lda r18, -2(r18) C n -= 2 + ldq r0, 0(r17) + mulq r0, r0, r4 + umulh r0, r0, r20 + blt r18, L(ex1) + ldq r1, 8(r17) + mulq r1, r1, r5 + umulh r1, r1, r21 + beq r18, L(ex2) + lda r18, -2(r18) C n -= 2 + ldq r0, 16(r17) + blt r18, L(ex3) + ldq r1, 24(r17) + beq r18, L(ex4) + + ALIGN(16) +L(top): lda r18, -2(r18) C n -= 2 + stq r4, 0(r16) + mulq r0, r0, r4 + stq r20, 8(r16) + umulh r0, r0, r20 + ldq r0, 32(r17) + blt r18, L(x) + stq r5, 16(r16) + mulq r1, r1, r5 + stq r21, 24(r16) + umulh r1, r1, r21 + ldq r1, 40(r17) + lda r16, 32(r16) C rp += 4 + lda r17, 16(r17) C up += 2 + bne r18, L(top) + + ALIGN(16) +L(ex4): stq r4, 0(r16) + mulq r0, r0, r4 + stq r20, 8(r16) + umulh r0, r0, r20 + stq r5, 16(r16) + mulq r1, r1, r5 + stq r21, 24(r16) + umulh r1, r1, r21 + stq r4, 32(r16) + stq r20, 40(r16) + stq r5, 48(r16) + stq r21, 56(r16) + ret r31, (r26), 1 + ALIGN(16) +L(x): stq r5, 16(r16) + mulq r1, r1, r5 + stq r21, 24(r16) + umulh r1, r1, r21 + stq r4, 32(r16) + mulq r0, r0, r4 + stq r20, 40(r16) + umulh r0, r0, r20 + stq r5, 48(r16) + stq r21, 56(r16) + stq r4, 64(r16) + stq r20, 72(r16) + ret r31, (r26), 1 +L(ex1): stq r4, 0(r16) + stq r20, 8(r16) + ret r31, (r26), 1 + ALIGN(16) +L(ex2): stq r4, 0(r16) + stq r20, 8(r16) + stq r5, 16(r16) + stq r21, 24(r16) + ret r31, (r26), 1 + ALIGN(16) +L(ex3): stq r4, 0(r16) + mulq r0, r0, r4 + stq r20, 8(r16) + umulh r0, r0, r20 + stq r5, 16(r16) + stq r21, 24(r16) + stq r4, 32(r16) + stq r20, 40(r16) + ret r31, (r26), 1 +EPILOGUE() +ASM_END() diff --git a/gmp/mpn/alpha/ev6/sub_n.asm b/gmp/mpn/alpha/ev6/sub_n.asm index a35ba40d34..f23ad44a15 100644 --- a/gmp/mpn/alpha/ev6/sub_n.asm +++ b/gmp/mpn/alpha/ev6/sub_n.asm @@ -4,30 +4,19 @@ dnl and store difference in a third limb vector. dnl Copyright 2000, 2003, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev67/gcd_1.asm b/gmp/mpn/alpha/ev67/gcd_1.asm index 55fa7d3673..2e6f0a5e22 100644 --- a/gmp/mpn/alpha/ev67/gcd_1.asm +++ b/gmp/mpn/alpha/ev67/gcd_1.asm @@ -4,29 +4,18 @@ dnl Copyright 2003, 2004 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev67/hamdist.asm b/gmp/mpn/alpha/ev67/hamdist.asm index 4b13e9f14f..a72d95e90b 100644 --- a/gmp/mpn/alpha/ev67/hamdist.asm +++ b/gmp/mpn/alpha/ev67/hamdist.asm @@ -4,29 +4,18 @@ dnl Copyright 2003, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/ev67/popcount.asm b/gmp/mpn/alpha/ev67/popcount.asm index 049c1cd239..6ed79cf158 100644 --- a/gmp/mpn/alpha/ev67/popcount.asm +++ b/gmp/mpn/alpha/ev67/popcount.asm @@ -4,29 +4,18 @@ dnl Copyright 2003, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/gmp-mparam.h b/gmp/mpn/alpha/gmp-mparam.h index b850bd24b5..6b6d7bd9c8 100644 --- a/gmp/mpn/alpha/gmp-mparam.h +++ b/gmp/mpn/alpha/gmp-mparam.h @@ -1,54 +1,43 @@ /* Alpha EV4 gmp-mparam.h -- Compiler/machine parameter header file. -Copyright 1991, 1993, 1994, 1999-2002, 2004, 2005, 2009 Free Software -Foundation, Inc. +Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2009 +Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - -or - - * the GNU General Public License as published by the Free Software - Foundation; either version 2 of the License, or (at your option) any - later version. - -or both in parallel, as here. +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 3 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. -You should have received copies of the GNU General Public License and the -GNU Lesser General Public License along with the GNU MP Library. If not, -see https://www.gnu.org/licenses/. */ +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ -#define GMP_LIMB_BITS 64 -#define GMP_LIMB_BYTES 8 +#define BITS_PER_MP_LIMB 64 +#define BYTES_PER_MP_LIMB 8 /* 175MHz 21064 */ /* Generated by tuneup.c, 2009-01-15, gcc 3.2 */ -#define MUL_TOOM22_THRESHOLD 12 -#define MUL_TOOM33_THRESHOLD 69 +#define MUL_KARATSUBA_THRESHOLD 12 +#define MUL_TOOM3_THRESHOLD 69 #define MUL_TOOM44_THRESHOLD 88 #define SQR_BASECASE_THRESHOLD 4 -#define SQR_TOOM2_THRESHOLD 20 +#define SQR_KARATSUBA_THRESHOLD 20 #define SQR_TOOM3_THRESHOLD 62 #define SQR_TOOM4_THRESHOLD 155 -#define MULLO_BASECASE_THRESHOLD 0 /* always */ -#define MULLO_DC_THRESHOLD 40 -#define MULLO_MUL_N_THRESHOLD 202 +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 40 +#define MULLOW_MUL_N_THRESHOLD 202 #define DIV_SB_PREINV_THRESHOLD 0 /* preinv always */ #define DIV_DC_THRESHOLD 38 diff --git a/gmp/mpn/alpha/invert_limb.asm b/gmp/mpn/alpha/invert_limb.asm index afc010f58c..99f51a30d5 100644 --- a/gmp/mpn/alpha/invert_limb.asm +++ b/gmp/mpn/alpha/invert_limb.asm @@ -1,95 +1,342 @@ dnl Alpha mpn_invert_limb -- Invert a normalized limb. -dnl Copyright 1996, 2000-2003, 2007, 2011, 2013 Free Software Foundation, Inc. - +dnl Copyright 1996, 2000, 2001, 2002, 2003, 2007 Free Software Foundation, +dnl Inc. +dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C EV4: ? -C EV5: 137/140 (with BWX/without BWX) -C EV6: 71/72 (with BWX/without BWX) +C EV4: ~175 +C EV5: ~111-126 +C EV6: ~52-76 -C This was compiler generated, with minimal manual edits. Surely several -C cycles could be cut with some thought. +C This is based on ideas of Peter L. Montgomery. ASM_START() + +FLOAT64($C36,9223372036854775808.0) C 2^63 + PROLOGUE(mpn_invert_limb,gp) - LEA( r2, approx_tab) - srl r16, 54, r1 - srl r16, 24, r4 - and r16, 1, r5 - bic r1, 1, r7 - lda r4, 1(r4) - srl r16, 1, r3 - addq r7, r2, r1 -ifelse(bwx_available_p,1,` - ldwu r0, -512(r1) -',` - ldq_u r0, -512(r1) - extwl r0, r7, r0 -') - addq r3, r5, r3 - mull r0, r0, r1 - sll r0, 11, r0 - mulq r1, r4, r1 - srl r1, 40, r1 - subq r0, r1, r0 - lda r0, -1(r0) - mulq r0, r0, r2 - sll r0, 60, r1 - sll r0, 13, r0 - mulq r2, r4, r2 - subq r1, r2, r1 - srl r1, 47, r1 - addq r0, r1, r0 - mulq r0, r3, r3 - srl r0, 1, r1 - cmoveq r5, 0, r1 - subq r1, r3, r1 - umulh r1, r0, r3 - sll r0, 31, r0 - srl r3, 1, r1 - addq r0, r1, r0 - mulq r0, r16, r2 - umulh r0, r16, r3 - addq r2, r16, r1 - addq r3, r16, r16 - cmpult r1, r2, r1 - addq r16, r1, r3 - subq r0, r3, r0 - ret r31, (r26), 1 -EPILOGUE() -DATASTART(approx_tab,8) -forloop(i,256,512-1,dnl -` .word eval(0x7fd00/i) -')dnl - SIZE(approx_tab, 512) - TYPE(approx_tab, object) + lda r30,-16(r30) + addq r16,r16,r1 + bne r1,$73 + lda r0,-1 + br r31,$Lend +$73: + srl r16,1,r1 + stq r1,0(r30) + ldt f11,0(r30) + cvtqt f11,f1 + LEA(r1,$C36) + ldt f10,0(r1) C f10 = 2^63 + divt f10,f1,f10 C f10 = 2^63 / (u / 2) + LEA(r2,$invtab-4096) + srl r16,52,r1 C extract high 12 bits + addq r1,r1,r1 C align ...0000bbbbbbbb0 + addq r1,r2,r1 C compute array offset + ldq_u r2,0(r1) C load quadword containing our 16 bits +bigend(`addq r1,1,r1') + extwl r2,r1,r2 C extract desired 16 bits + sll r2,48,r0 + umulh r16,r0,r1 + addq r16,r1,r3 + stq r3,0(r30) + ldt f11,0(r30) + cvtqt f11,f1 + mult f1,f10,f1 + cvttqc f1,f1 + stt f1,0(r30) + ldq r4,0(r30) + subq r0,r4,r0 + umulh r16,r0,r1 + mulq r16,r0,r2 + addq r16,r1,r3 + bge r3,$Loop2 +$Loop1: addq r2,r16,r2 + cmpult r2,r16,r1 + addq r3,r1,r3 + addq r0,1,r0 + blt r3,$Loop1 +$Loop2: cmpult r2,r16,r1 + subq r0,1,r0 + subq r3,r1,r3 + subq r2,r16,r2 + bge r3,$Loop2 +$Lend: + lda r30,16(r30) + ret r31,(r26),1 +EPILOGUE(mpn_invert_limb) +DATASTART($invtab) + .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41 + .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46 + .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50 + .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d + .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e + .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483 + .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c + .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8 + .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8 + .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb + .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22 + .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d + .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b + .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad + .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2 + .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a + .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056 + .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95 + .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7 + .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d + .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965 + .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1 + .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600 + .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452 + .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7 + .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100 + .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b + .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9 + .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a + .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e + .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5 + .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f + .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb + .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a + .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c + .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111 + .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89 + .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03 + .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80 + .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff + .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981 + .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806 + .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d + .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516 + .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2 + .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231 + .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2 + .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55 + .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb + .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83 + .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e + .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb + .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a + .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb + .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f + .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445 + .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed + .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197 + .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044 + .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2 + .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3 + .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56 + .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b + .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2 + .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b + .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736 + .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3 + .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3 + .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374 + .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237 + .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc + .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3 + .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b + .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56 + .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23 + .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1 + .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1 + .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893 + .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767 + .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d + .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514 + .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee + .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9 + .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5 + .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084 + .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64 + .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45 + .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29 + .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e + .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5 + .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd + .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7 + .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2 + .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0 + .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e + .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e + .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370 + .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264 + .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158 + .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f + .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46 + .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40 + .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a + .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37 + .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34 + .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33 + .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934 + .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836 + .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739 + .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e + .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544 + .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b + .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354 + .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e + .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169 + .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076 + .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84 + .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93 + .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4 + .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6 + .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9 + .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add + .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3 + .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a + .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822 + .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b + .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656 + .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571 + .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e + .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac + .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb + .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec + .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d + .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030 + .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54 + .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79 + .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f + .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6 + .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee + .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18 + .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42 + .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e + .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a + .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8 + .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6 + .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626 + .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557 + .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489 + .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc + .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef + .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224 + .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a + .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091 + .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9 + .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01 + .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b + .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76 + .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1 + .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee + .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b + .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a + .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9 + .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea + .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b + .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d + .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0 + .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4 + .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539 + .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e + .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5 + .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c + .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255 + .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e + .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8 + .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033 + .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e + .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb + .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18 + .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66 + .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5 + .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05 + .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56 + .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7 + .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9 + .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c + .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0 + .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5 + .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a + .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0 + .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7 + .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e + .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7 + .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400 + .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a + .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4 + .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210 + .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c + .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8 + .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026 + .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84 + .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3 + .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42 + .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3 + .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04 + .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65 + .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8 + .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b + .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f + .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3 + .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958 + .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be + .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824 + .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b + .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3 + .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b + .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4 + .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e + .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498 + .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403 + .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f + .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db + .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247 + .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5 + .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123 + .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091 + .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001 + .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70 + .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1 + .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52 + .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3 + .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35 + .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8 + .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c + .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f + .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04 + .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79 + .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee + .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965 + .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db + .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853 + .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca + .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743 + .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc + .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635 + .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af + .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a + .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5 + .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420 + .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c + .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319 + .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296 + .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214 + .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192 + .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111 + .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090 + .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010 DATAEND() ASM_END() diff --git a/gmp/mpn/alpha/lshift.asm b/gmp/mpn/alpha/lshift.asm index c62a856aea..eb5b2a0b68 100644 --- a/gmp/mpn/alpha/lshift.asm +++ b/gmp/mpn/alpha/lshift.asm @@ -1,39 +1,28 @@ dnl Alpha mpn_lshift -- Shift a number left. -dnl Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc. +dnl Copyright 1994, 1995, 2000, 2002, 2003 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C EV4: ? -C EV5: 3.25 -C EV6: 1.75 +C EV4: 4.75 +C EV5: 4 +C EV6: 2 C INPUT PARAMETERS C rp r16 @@ -46,137 +35,63 @@ ASM_START() PROLOGUE(mpn_lshift) s8addq r18,r17,r17 C make r17 point at end of s1 ldq r4,-8(r17) C load first limb - subq r31,r19,r20 + subq r17,8,r17 + subq r31,r19,r7 s8addq r18,r16,r16 C make r16 point at end of RES subq r18,1,r18 - and r18,4-1,r28 C number of limbs in first loop - srl r4,r20,r0 C compute function result + and r18,4-1,r20 C number of limbs in first loop + srl r4,r7,r0 C compute function result - beq r28,L(L0) - subq r18,r28,r18 + beq r20,$L0 + subq r18,r20,r18 ALIGN(8) -L(top0): - ldq r3,-16(r17) +$Loop0: ldq r3,-8(r17) subq r16,8,r16 - sll r4,r19,r5 subq r17,8,r17 - subq r28,1,r28 - srl r3,r20,r6 + subq r20,1,r20 + sll r4,r19,r5 + srl r3,r7,r6 bis r3,r3,r4 bis r5,r6,r8 stq r8,0(r16) - bne r28,L(top0) + bne r20,$Loop0 -L(L0): sll r4,r19,r24 - beq r18,L(end) -C warm up phase 1 - ldq r1,-16(r17) - subq r18,4,r18 - ldq r2,-24(r17) - ldq r3,-32(r17) - ldq r4,-40(r17) -C warm up phase 2 - srl r1,r20,r7 - sll r1,r19,r21 - srl r2,r20,r8 - beq r18,L(end1) - ldq r1,-48(r17) - sll r2,r19,r22 - ldq r2,-56(r17) - srl r3,r20,r5 - bis r7,r24,r7 - sll r3,r19,r23 - bis r8,r21,r8 - srl r4,r20,r6 - ldq r3,-64(r17) - sll r4,r19,r24 - ldq r4,-72(r17) - subq r18,4,r18 - beq r18,L(end2) - ALIGN(16) -C main loop -L(top): stq r7,-8(r16) - bis r5,r22,r5 - stq r8,-16(r16) - bis r6,r23,r6 - - srl r1,r20,r7 - subq r18,4,r18 - sll r1,r19,r21 - unop C ldq r31,-96(r17) - - srl r2,r20,r8 - ldq r1,-80(r17) - sll r2,r19,r22 - ldq r2,-88(r17) - - stq r5,-24(r16) - bis r7,r24,r7 - stq r6,-32(r16) - bis r8,r21,r8 - - srl r3,r20,r5 - unop C ldq r31,-96(r17) - sll r3,r19,r23 +$L0: beq r18,$Lend + + ALIGN(8) +$Loop: ldq r3,-8(r17) subq r16,32,r16 + subq r18,4,r18 + sll r4,r19,r5 + srl r3,r7,r6 + + ldq r4,-16(r17) + sll r3,r19,r1 + bis r5,r6,r8 + stq r8,24(r16) + srl r4,r7,r2 + + ldq r3,-24(r17) + sll r4,r19,r5 + bis r1,r2,r8 + stq r8,16(r16) + srl r3,r7,r6 - srl r4,r20,r6 - ldq r3,-96(r17) - sll r4,r19,r24 - ldq r4,-104(r17) + ldq r4,-32(r17) + sll r3,r19,r1 + bis r5,r6,r8 + stq r8,8(r16) + srl r4,r7,r2 subq r17,32,r17 - bne r18,L(top) -C cool down phase 2/1 -L(end2): - stq r7,-8(r16) - bis r5,r22,r5 - stq r8,-16(r16) - bis r6,r23,r6 - srl r1,r20,r7 - sll r1,r19,r21 - srl r2,r20,r8 - sll r2,r19,r22 - stq r5,-24(r16) - bis r7,r24,r7 - stq r6,-32(r16) - bis r8,r21,r8 - srl r3,r20,r5 - sll r3,r19,r23 - srl r4,r20,r6 - sll r4,r19,r24 -C cool down phase 2/2 - stq r7,-40(r16) - bis r5,r22,r5 - stq r8,-48(r16) - bis r6,r23,r6 - stq r5,-56(r16) - stq r6,-64(r16) -C cool down phase 2/3 - stq r24,-72(r16) - ret r31,(r26),1 + bis r1,r2,r8 + stq r8,0(r16) -C cool down phase 1/1 -L(end1): - sll r2,r19,r22 - srl r3,r20,r5 - bis r7,r24,r7 - sll r3,r19,r23 - bis r8,r21,r8 - srl r4,r20,r6 - sll r4,r19,r24 -C cool down phase 1/2 - stq r7,-8(r16) - bis r5,r22,r5 - stq r8,-16(r16) - bis r6,r23,r6 - stq r5,-24(r16) - stq r6,-32(r16) - stq r24,-40(r16) - ret r31,(r26),1 + bgt r18,$Loop -L(end): stq r24,-8(r16) +$Lend: sll r4,r19,r8 + stq r8,-8(r16) ret r31,(r26),1 EPILOGUE(mpn_lshift) ASM_END() diff --git a/gmp/mpn/alpha/mod_34lsub1.asm b/gmp/mpn/alpha/mod_34lsub1.asm index 1b03b637d8..e5c1d221f9 100644 --- a/gmp/mpn/alpha/mod_34lsub1.asm +++ b/gmp/mpn/alpha/mod_34lsub1.asm @@ -3,30 +3,19 @@ dnl Alpha mpn_mod_34lsub1. dnl Copyright 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/mode1o.asm b/gmp/mpn/alpha/mode1o.asm index 96dccc73ee..0611cd8acb 100644 --- a/gmp/mpn/alpha/mode1o.asm +++ b/gmp/mpn/alpha/mode1o.asm @@ -1,32 +1,21 @@ dnl Alpha mpn_modexact_1c_odd -- mpn exact remainder dnl Copyright 2003, 2004 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/mul_1.asm b/gmp/mpn/alpha/mul_1.asm index a7cdbcf8eb..30b17021ba 100644 --- a/gmp/mpn/alpha/mul_1.asm +++ b/gmp/mpn/alpha/mul_1.asm @@ -4,30 +4,19 @@ dnl the result in a second limb vector. dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/rshift.asm b/gmp/mpn/alpha/rshift.asm index 6e1e214558..ccedff8071 100644 --- a/gmp/mpn/alpha/rshift.asm +++ b/gmp/mpn/alpha/rshift.asm @@ -1,39 +1,28 @@ dnl Alpha mpn_rshift -- Shift a number right. -dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc. +dnl Copyright 1994, 1995, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C EV4: ? -C EV5: 3.25 -C EV6: 1.75 +C EV4: 4.75 +C EV5: 3.75 +C EV6: 2 C INPUT PARAMETERS C rp r16 @@ -45,136 +34,62 @@ C cnt r19 ASM_START() PROLOGUE(mpn_rshift) ldq r4,0(r17) C load first limb - subq r31,r19,r20 + addq r17,8,r17 + subq r31,r19,r7 subq r18,1,r18 - and r18,4-1,r28 C number of limbs in first loop - sll r4,r20,r0 C compute function result + and r18,4-1,r20 C number of limbs in first loop + sll r4,r7,r0 C compute function result - beq r28,L(L0) - subq r18,r28,r18 + beq r20,$L0 + subq r18,r20,r18 ALIGN(8) -L(top0): - ldq r3,8(r17) +$Loop0: ldq r3,0(r17) addq r16,8,r16 - srl r4,r19,r5 addq r17,8,r17 - subq r28,1,r28 - sll r3,r20,r6 + subq r20,1,r20 + srl r4,r19,r5 + sll r3,r7,r6 bis r3,r3,r4 bis r5,r6,r8 stq r8,-8(r16) - bne r28,L(top0) + bne r20,$Loop0 -L(L0): srl r4,r19,r24 - beq r18,L(end) -C warm up phase 1 - ldq r1,8(r17) - subq r18,4,r18 - ldq r2,16(r17) - ldq r3,24(r17) - ldq r4,32(r17) -C warm up phase 2 - sll r1,r20,r7 - srl r1,r19,r21 - sll r2,r20,r8 - beq r18,L(end1) - ldq r1,40(r17) - srl r2,r19,r22 - ldq r2,48(r17) - sll r3,r20,r5 - bis r7,r24,r7 - srl r3,r19,r23 - bis r8,r21,r8 - sll r4,r20,r6 - ldq r3,56(r17) - srl r4,r19,r24 - ldq r4,64(r17) - subq r18,4,r18 - beq r18,L(end2) - ALIGN(16) -C main loop -L(top): stq r7,0(r16) - bis r5,r22,r5 - stq r8,8(r16) - bis r6,r23,r6 - - sll r1,r20,r7 - subq r18,4,r18 - srl r1,r19,r21 - unop C ldq r31,-96(r17) - - sll r2,r20,r8 - ldq r1,72(r17) - srl r2,r19,r22 - ldq r2,80(r17) - - stq r5,16(r16) - bis r7,r24,r7 - stq r6,24(r16) - bis r8,r21,r8 - - sll r3,r20,r5 - unop C ldq r31,-96(r17) - srl r3,r19,r23 +$L0: beq r18,$Lend + + ALIGN(8) +$Loop: ldq r3,0(r17) addq r16,32,r16 + subq r18,4,r18 + srl r4,r19,r5 + sll r3,r7,r6 + + ldq r4,8(r17) + srl r3,r19,r1 + bis r5,r6,r8 + stq r8,-32(r16) + sll r4,r7,r2 + + ldq r3,16(r17) + srl r4,r19,r5 + bis r1,r2,r8 + stq r8,-24(r16) + sll r3,r7,r6 - sll r4,r20,r6 - ldq r3,88(r17) - srl r4,r19,r24 - ldq r4,96(r17) + ldq r4,24(r17) + srl r3,r19,r1 + bis r5,r6,r8 + stq r8,-16(r16) + sll r4,r7,r2 addq r17,32,r17 - bne r18,L(top) -C cool down phase 2/1 -L(end2): - stq r7,0(r16) - bis r5,r22,r5 - stq r8,8(r16) - bis r6,r23,r6 - sll r1,r20,r7 - srl r1,r19,r21 - sll r2,r20,r8 - srl r2,r19,r22 - stq r5,16(r16) - bis r7,r24,r7 - stq r6,24(r16) - bis r8,r21,r8 - sll r3,r20,r5 - srl r3,r19,r23 - sll r4,r20,r6 - srl r4,r19,r24 -C cool down phase 2/2 - stq r7,32(r16) - bis r5,r22,r5 - stq r8,40(r16) - bis r6,r23,r6 - stq r5,48(r16) - stq r6,56(r16) -C cool down phase 2/3 - stq r24,64(r16) - ret r31,(r26),1 + bis r1,r2,r8 + stq r8,-8(r16) -C cool down phase 1/1 -L(end1): - srl r2,r19,r22 - sll r3,r20,r5 - bis r7,r24,r7 - srl r3,r19,r23 - bis r8,r21,r8 - sll r4,r20,r6 - srl r4,r19,r24 -C cool down phase 1/2 - stq r7,0(r16) - bis r5,r22,r5 - stq r8,8(r16) - bis r6,r23,r6 - stq r5,16(r16) - stq r6,24(r16) - stq r24,32(r16) - ret r31,(r26),1 + bgt r18,$Loop -L(end): stq r24,0(r16) +$Lend: srl r4,r19,r8 + stq r8,0(r16) ret r31,(r26),1 EPILOGUE(mpn_rshift) ASM_END() diff --git a/gmp/mpn/alpha/sec_tabselect.asm b/gmp/mpn/alpha/sec_tabselect.asm deleted file mode 100644 index 679b16926e..0000000000 --- a/gmp/mpn/alpha/sec_tabselect.asm +++ /dev/null @@ -1,137 +0,0 @@ -dnl Alpha mpn_sec_tabselect. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2011-2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C EV4: ? -C EV5: 2.25 -C EV6: 1.64 - -define(`rp', `r16') -define(`tp', `r17') -define(`n', `r18') -define(`nents', `r19') -define(`which', `r20') - -define(`i', `r21') -define(`j', `r22') -define(`stride', `r23') -define(`mask', `r24') -define(`k', `r25') - - -ASM_START() -PROLOGUE(mpn_sec_tabselect) - subq n, 4, j C outer loop induction variable - - blt j, L(outer_end) -L(outer_top): - mov tp, r8 - lda r0, 0(r31) - lda r1, 0(r31) - lda r2, 0(r31) - lda r3, 0(r31) - subq j, 4, j C outer loop induction variable - subq nents, which, k - mov nents, i - - ALIGN(16) -L(top): ldq r4, 0(tp) - ldq r5, 8(tp) - cmpeq k, i, mask - subq i, 1, i - subq r31, mask, mask - ldq r6, 16(tp) - ldq r7, 24(tp) - and r4, mask, r4 - and r5, mask, r5 - or r0, r4, r0 - or r1, r5, r1 - and r6, mask, r6 - and r7, mask, r7 - or r2, r6, r2 - or r3, r7, r3 - s8addq n, tp, tp - bne i, L(top) - - stq r0, 0(rp) - stq r1, 8(rp) - stq r2, 16(rp) - stq r3, 24(rp) - addq r8, 32, tp - addq rp, 32, rp - bge j, L(outer_top) -L(outer_end): - - and n, 2, r0 - beq r0, L(b0x) -L(b1x): mov tp, r8 - lda r0, 0(r31) - lda r1, 0(r31) - subq nents, which, k - mov nents, i - ALIGN(16) -L(tp2): ldq r4, 0(tp) - ldq r5, 8(tp) - cmpeq k, i, mask - subq i, 1, i - subq r31, mask, mask - and r4, mask, r4 - and r5, mask, r5 - or r0, r4, r0 - or r1, r5, r1 - s8addq n, tp, tp - bne i, L(tp2) - stq r0, 0(rp) - stq r1, 8(rp) - addq r8, 16, tp - addq rp, 16, rp - -L(b0x): and n, 1, r0 - beq r0, L(b00) -L(b01): lda r0, 0(r31) - subq nents, which, k - mov nents, i - ALIGN(16) -L(tp1): ldq r4, 0(tp) - cmpeq k, i, mask - subq i, 1, i - subq r31, mask, mask - and r4, mask, r4 - or r0, r4, r0 - s8addq n, tp, tp - bne i, L(tp1) - stq r0, 0(rp) - -L(b00): ret r31, (r26), 1 -EPILOGUE() diff --git a/gmp/mpn/alpha/sqr_diag_addlsh1.asm b/gmp/mpn/alpha/sqr_diag_addlsh1.asm deleted file mode 100644 index ee219ef7e8..0000000000 --- a/gmp/mpn/alpha/sqr_diag_addlsh1.asm +++ /dev/null @@ -1,93 +0,0 @@ -dnl Alpha mpn_sqr_diag_addlsh1. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C EV4: ? -C EV5: 10.2 -C EV6: 4.5 - -C Ideally, one-way code could run at 9 c/l (limited by mulq+umulh) on ev5 and -C about 3.75 c/l on ev6. Two-way code could run at about 3.25 c/l on ev6. - -C Algorithm: We allow ourselves to propagate carry to a product high word -C without worrying for carry out, since (B-1)^2 = B^2-2B+1 has a high word of -C B-2, i.e, will not spill. We propagate carry similarly to a product low word -C since the problem value B-1 is a quadratic non-residue mod B, but our -C products are squares. - -define(`rp', `r16') -define(`tp', `r17') -define(`up', `r18') -define(`n', `r19') - -ASM_START() -PROLOGUE(mpn_sqr_diag_addlsh1) - ldq r0, 0(up) - bis r31, r31, r21 - bis r31, r31, r3 - mulq r0, r0, r7 - stq r7, 0(rp) - umulh r0, r0, r6 - lda n, -1(n) - - ALIGN(16) -L(top): ldq r0, 8(up) - lda up, 8(up) - ldq r8, 0(tp) - ldq r20, 8(tp) - mulq r0, r0, r7 - lda tp, 16(tp) - sll r8, 1, r23 - srl r8, 63, r22 - or r21, r23, r23 - sll r20, 1, r24 - addq r3, r6, r6 C cannot carry per comment above - or r22, r24, r24 - addq r23, r6, r21 - umulh r0, r0, r6 - cmpult r21, r23, r1 - addq r1, r7, r7 C cannot carry per comment above - stq r21, 8(rp) - addq r24, r7, r22 - stq r22, 16(rp) - lda n, -1(n) - cmpult r22, r7, r3 - srl r20, 63, r21 - lda rp, 16(rp) - bne n, L(top) - - addq r3, r6, r6 C cannot carry per comment above - addq r21, r6, r21 - stq r21, 8(rp) - ret r31, (r26), 1 -EPILOGUE() -ASM_END() diff --git a/gmp/mpn/alpha/sqr_diagonal.asm b/gmp/mpn/alpha/sqr_diagonal.asm new file mode 100644 index 0000000000..2aa7f2e597 --- /dev/null +++ b/gmp/mpn/alpha/sqr_diagonal.asm @@ -0,0 +1,65 @@ +dnl Alpha mpn_sqr_diagonal. + +dnl Copyright 2001, 2002 Free Software Foundation, Inc. + +dnl This file is part of the GNU MP Library. + +dnl The GNU MP Library is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + +dnl The GNU MP Library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. + +include(`../config.m4') + +C cycles/limb +C EV4: 42 +C EV5: 18 +C EV6: 3.45 + +C INPUT PARAMETERS +C rp r16 +C up r17 +C n r18 + + +ASM_START() +PROLOGUE(mpn_sqr_diagonal) + ldq r2,0(r17) C r2 = s1_limb + lda r18,-2(r18) C size -= 2 + mulq r2,r2,r3 C r3 = prod_low + umulh r2,r2,r4 C r4 = prod_high + blt r18,$Lend1 C jump if size was == 1 + ldq r2,8(r17) C r2 = s1_limb + beq r18,$Lend2 C jump if size was == 2 + + ALIGN(8) +$Loop: stq r3,0(r16) + mulq r2,r2,r3 C r3 = prod_low + lda r18,-1(r18) C size-- + stq r4,8(r16) + umulh r2,r2,r4 C r4 = cy_limb + ldq r2,16(r17) C r2 = s1_limb + lda r17,8(r17) C s1_ptr++ + lda r16,16(r16) C res_ptr++ + bne r18,$Loop + +$Lend2: stq r3,0(r16) + mulq r2,r2,r3 C r3 = prod_low + stq r4,8(r16) + umulh r2,r2,r4 C r4 = cy_limb + stq r3,16(r16) + stq r4,24(r16) + ret r31,(r26),1 +$Lend1: stq r3,0(r16) + stq r4,8(r16) + ret r31,(r26),1 +EPILOGUE(mpn_sqr_diagonal) +ASM_END() diff --git a/gmp/mpn/alpha/sub_n.asm b/gmp/mpn/alpha/sub_n.asm index 1bb72263f8..842a4f0b54 100644 --- a/gmp/mpn/alpha/sub_n.asm +++ b/gmp/mpn/alpha/sub_n.asm @@ -1,164 +1,117 @@ -dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 -dnl and store difference in a third limb vector. +dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 and +dnl store difference in a third limb vector. -dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. +dnl Copyright 1995, 2000, 2002, 2005 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb -C EV4: ? -C EV5: 4.75 -C EV6: 3 +C EV4: 7.75 +C EV5: 5.75 +C EV6: 4 -dnl INPUT PARAMETERS -dnl res_ptr r16 -dnl s1_ptr r17 -dnl s2_ptr r18 -dnl size r19 +C INPUT PARAMETERS +C rp r16 +C up r17 +C vp r18 +C n r19 ASM_START() -PROLOGUE(mpn_sub_nc) - bis r31,r20,r25 - br L(com) -EPILOGUE() PROLOGUE(mpn_sub_n) - bis r31,r31,r25 C clear cy -L(com): subq r19,4,r19 C decr loop cnt - blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop -C Start software pipeline for 1st loop - ldq r0,0(r18) - ldq r4,0(r17) - ldq r1,8(r18) - ldq r5,8(r17) - addq r17,32,r17 C update s1_ptr - subq r4,r0,r28 C 1st main subtract - ldq r2,16(r18) - subq r28,r25,r20 C 1st carry subtract - ldq r3,24(r18) - cmpult r4,r0,r8 C compute cy from last subtract - ldq r6,-16(r17) - cmpult r28,r25,r25 C compute cy from last subtract - ldq r7,-8(r17) - bis r8,r25,r25 C combine cy from the two subtracts - subq r19,4,r19 C decr loop cnt - subq r5,r1,r28 C 2nd main subtract - addq r18,32,r18 C update s2_ptr - subq r28,r25,r21 C 2nd carry subtract - cmpult r5,r1,r8 C compute cy from last subtract - blt r19,$Lend1 C if less than 4 limbs remain, jump -C 1st loop handles groups of 4 limbs in a software pipeline - ALIGN(16) -$Loop: cmpult r28,r25,r25 C compute cy from last subtract - ldq r0,0(r18) - bis r8,r25,r25 C combine cy from the two subtracts - ldq r1,8(r18) - subq r6,r2,r28 C 3rd main subtract - ldq r4,0(r17) - subq r28,r25,r22 C 3rd carry subtract - ldq r5,8(r17) - cmpult r6,r2,r8 C compute cy from last subtract - cmpult r28,r25,r25 C compute cy from last subtract - stq r20,0(r16) - bis r8,r25,r25 C combine cy from the two subtracts - stq r21,8(r16) - subq r7,r3,r28 C 4th main subtract - subq r28,r25,r23 C 4th carry subtract - cmpult r7,r3,r8 C compute cy from last subtract - cmpult r28,r25,r25 C compute cy from last subtract - addq r17,32,r17 C update s1_ptr - bis r8,r25,r25 C combine cy from the two subtracts - addq r16,32,r16 C update res_ptr - subq r4,r0,r28 C 1st main subtract - ldq r2,16(r18) - subq r28,r25,r20 C 1st carry subtract - ldq r3,24(r18) - cmpult r4,r0,r8 C compute cy from last subtract - ldq r6,-16(r17) - cmpult r28,r25,r25 C compute cy from last subtract - ldq r7,-8(r17) - bis r8,r25,r25 C combine cy from the two subtracts - subq r19,4,r19 C decr loop cnt - stq r22,-16(r16) - subq r5,r1,r28 C 2nd main subtract - stq r23,-8(r16) - subq r28,r25,r21 C 2nd carry subtract - addq r18,32,r18 C update s2_ptr - cmpult r5,r1,r8 C compute cy from last subtract - bge r19,$Loop -C Finish software pipeline for 1st loop -$Lend1: cmpult r28,r25,r25 C compute cy from last subtract - bis r8,r25,r25 C combine cy from the two subtracts - subq r6,r2,r28 C cy add - subq r28,r25,r22 C 3rd main subtract - cmpult r6,r2,r8 C compute cy from last subtract - cmpult r28,r25,r25 C compute cy from last subtract - stq r20,0(r16) - bis r8,r25,r25 C combine cy from the two subtracts - stq r21,8(r16) - subq r7,r3,r28 C cy add - subq r28,r25,r23 C 4th main subtract - cmpult r7,r3,r8 C compute cy from last subtract - cmpult r28,r25,r25 C compute cy from last subtract - bis r8,r25,r25 C combine cy from the two subtracts - addq r16,32,r16 C update res_ptr - stq r22,-16(r16) - stq r23,-8(r16) -$Lend2: addq r19,4,r19 C restore loop cnt - beq r19,$Lret -C Start software pipeline for 2nd loop - ldq r0,0(r18) - ldq r4,0(r17) + ldq r3,0(r17) + ldq r4,0(r18) + subq r19,1,r19 - beq r19,$Lend0 -C 2nd loop handles remaining 1-3 limbs - ALIGN(16) -$Loop0: subq r4,r0,r28 C main subtract - cmpult r4,r0,r8 C compute cy from last subtract - ldq r0,8(r18) - ldq r4,8(r17) - subq r28,r25,r20 C carry subtract - addq r18,8,r18 + and r19,4-1,r2 C number of limbs in first loop + bis r31,r31,r0 + beq r2,$L0 C if multiple of 4 limbs, skip first loop + + subq r19,r2,r19 + +$Loop0: subq r2,1,r2 + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,0(r16) + bis r0,r1,r0 + addq r17,8,r17 - stq r20,0(r16) - cmpult r28,r25,r25 C compute cy from last subtract - subq r19,1,r19 C decr loop cnt - bis r8,r25,r25 C combine cy from the two subtracts + addq r18,8,r18 + bis r5,r5,r3 + bis r6,r6,r4 addq r16,8,r16 - bne r19,$Loop0 -$Lend0: subq r4,r0,r28 C main subtract - subq r28,r25,r20 C carry subtract - cmpult r4,r0,r8 C compute cy from last subtract - cmpult r28,r25,r25 C compute cy from last subtract - stq r20,0(r16) - bis r8,r25,r25 C combine cy from the two subtracts - -$Lret: bis r25,r31,r0 C return cy + bne r2,$Loop0 + +$L0: beq r19,$Lend + + ALIGN(8) +$Loop: subq r19,4,r19 + + ldq r5,8(r17) + addq r4,r0,r4 + ldq r6,8(r18) + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,0(r16) + bis r0,r1,r0 + + ldq r3,16(r17) + addq r6,r0,r6 + ldq r4,16(r18) + cmpult r6,r0,r1 + subq r5,r6,r6 + cmpult r5,r6,r0 + stq r6,8(r16) + bis r0,r1,r0 + + ldq r5,24(r17) + addq r4,r0,r4 + ldq r6,24(r18) + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,16(r16) + bis r0,r1,r0 + + ldq r3,32(r17) + addq r6,r0,r6 + ldq r4,32(r18) + cmpult r6,r0,r1 + subq r5,r6,r6 + cmpult r5,r6,r0 + stq r6,24(r16) + bis r0,r1,r0 + + addq r17,32,r17 + addq r18,32,r18 + addq r16,32,r16 + bne r19,$Loop + +$Lend: addq r4,r0,r4 + cmpult r4,r0,r1 + subq r3,r4,r4 + cmpult r3,r4,r0 + stq r4,0(r16) + bis r0,r1,r0 ret r31,(r26),1 -EPILOGUE() +EPILOGUE(mpn_sub_n) ASM_END() diff --git a/gmp/mpn/alpha/submul_1.asm b/gmp/mpn/alpha/submul_1.asm index 2b63b52fa4..554ccf51b6 100644 --- a/gmp/mpn/alpha/submul_1.asm +++ b/gmp/mpn/alpha/submul_1.asm @@ -4,30 +4,19 @@ dnl the result from a second limb vector. dnl Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/umul.asm b/gmp/mpn/alpha/umul.asm index 039081ed48..7fa3f008f1 100644 --- a/gmp/mpn/alpha/umul.asm +++ b/gmp/mpn/alpha/umul.asm @@ -3,30 +3,19 @@ dnl mpn_umul_ppmm -- 1x1->2 limb multiplication dnl Copyright 1999, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') diff --git a/gmp/mpn/alpha/unicos.m4 b/gmp/mpn/alpha/unicos.m4 index e05cf5cca6..f1f41c18e4 100644 --- a/gmp/mpn/alpha/unicos.m4 +++ b/gmp/mpn/alpha/unicos.m4 @@ -3,33 +3,22 @@ divert(-1) dnl m4 macros for alpha assembler on unicos. -dnl Copyright 2000, 2002-2004, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. +dnl Copyright 2000, 2002, 2003, 2004 Free Software Foundation, Inc. dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. dnl Note that none of the standard GMP_ASM_ autoconf tests are done for @@ -86,9 +75,8 @@ m4_assert_numargs(1) ` .extern $1') define(`DATASTART', -m4_assert_numargs_range(1,2) +m4_assert_numargs(1) ` .psect $1@crud,data - ALIGN(ifelse($#,1,2,$2)) $1:') define(`DATAEND', |