diff options
Diffstat (limited to 'gmp/mpn/sparc64/ultrasparct3')
20 files changed, 0 insertions, 2739 deletions
diff --git a/gmp/mpn/sparc64/ultrasparct3/add_n.asm b/gmp/mpn/sparc64/ultrasparct3/add_n.asm deleted file mode 100644 index 0170746895..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/add_n.asm +++ /dev/null @@ -1,126 +0,0 @@ -dnl SPARC v9 mpn_add_n for T3/T4. - -dnl Contributed to the GNU project by David Miller. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 8 -C UltraSPARC T4: 3 - -C INPUT PARAMETERS -define(`rp', `%i0') -define(`up', `%i1') -define(`vp', `%i2') -define(`n', `%i3') -define(`cy', `%i4') - -define(`u0_off', `%l2') -define(`u1_off', `%l3') -define(`loop_n', `%l6') -define(`tmp', `%l7') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_add_nc) - save %sp, -176, %sp - b,a L(ent) -EPILOGUE() -PROLOGUE(mpn_add_n) - save %sp, -176, %sp - - mov 0, cy -L(ent): - subcc n, 1, n - be L(final_one) - cmp %g0, cy - - ldx [up + 0], %o4 - sllx n, 3, tmp - - ldx [vp + 0], %o5 - add up, tmp, u0_off - - ldx [up + 8], %g5 - neg tmp, loop_n - - ldx [vp + 8], %g1 - add u0_off, 8, u1_off - - sub loop_n, -(2 * 8), loop_n - - brgez,pn loop_n, L(loop_tail) - add vp, (2 * 8), vp - - b,a L(top) - ALIGN(16) -L(top): - addxccc(%o4, %o5, tmp) - ldx [vp + 0], %o5 - - add rp, (2 * 8), rp - ldx [loop_n + u0_off], %o4 - - add vp, (2 * 8), vp - stx tmp, [rp - 16] - - addxccc(%g1, %g5, tmp) - ldx [vp - 8], %g1 - - ldx [loop_n + u1_off], %g5 - sub loop_n, -(2 * 8), loop_n - - brlz loop_n, L(top) - stx tmp, [rp - 8] - -L(loop_tail): - addxccc(%o4, %o5, %g3) - add loop_n, u0_off, up - - addxccc(%g1, %g5, %g5) - stx %g3, [rp + 0] - - brgz,pt loop_n, L(done) - stx %g5, [rp + 8] - - add rp, (2 * 8), rp -L(final_one): - ldx [up+0], %o4 - ldx [vp+0], %o5 - addxccc(%o4, %o5, %g3) - stx %g3, [rp+0] - -L(done): - addxc(%g0, %g0, %i0) - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/addmul_1.asm b/gmp/mpn/sparc64/ultrasparct3/addmul_1.asm deleted file mode 100644 index 939811e1ce..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/addmul_1.asm +++ /dev/null @@ -1,182 +0,0 @@ -dnl SPARC v9 mpn_addmul_1 for T3/T4/T5. - -dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 26 -C UltraSPARC T4: 4.5 - -C INPUT PARAMETERS -define(`rp', `%i0') -define(`up', `%i1') -define(`n', `%i2') -define(`v0', `%i3') - -define(`u0', `%l0') -define(`u1', `%l1') -define(`u2', `%l2') -define(`u3', `%l3') -define(`r0', `%l4') -define(`r1', `%l5') -define(`r2', `%l6') -define(`r3', `%l7') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_addmul_1) - save %sp, -176, %sp - ldx [up+0], %g1 - - and n, 3, %g3 - brz %g3, L(b0) - addcc %g0, %g0, %g5 C clear carry limb, flag - cmp %g3, 2 - bcs %xcc, L(b01) - nop - be %xcc, L(b10) - ldx [up+8], %g5 - -L(b11): ldx [up+16], u3 - mulx %g1, v0, %o2 - umulxhi(%g1, v0, %o3) - ldx [rp+0], r1 - mulx %g5, v0, %o4 - ldx [rp+8], r2 - umulxhi(%g5, v0, %o5) - ldx [rp+16], r3 - mulx u3, v0, %g4 - umulxhi(u3, v0, %g5) - addcc %o3, %o4, %o4 - addxccc(%o5, %g4, %g4) - addxc( %g0, %g5, %g5) - addcc r1, %o2, r1 - stx r1, [rp+0] - addxccc(r2, %o4, r2) - stx r2, [rp+8] - addxccc(r3, %g4, r3) - stx r3, [rp+16] - add n, -3, n - add up, 24, up - brz n, L(xit) - add rp, 24, rp - b L(com) - nop - -L(b10): mulx %g1, v0, %o4 - ldx [rp+0], r2 - umulxhi(%g1, v0, %o5) - ldx [rp+8], r3 - mulx %g5, v0, %g4 - umulxhi(%g5, v0, %g5) - addcc %o5, %g4, %g4 - addxc( %g0, %g5, %g5) - addcc r2, %o4, r2 - stx r2, [rp+0] - addxccc(r3, %g4, r3) - stx r3, [rp+8] - add n, -2, n - add up, 16, up - brz n, L(xit) - add rp, 16, rp - b L(com) - nop - -L(b01): ldx [rp+0], r3 - mulx %g1, v0, %g4 - umulxhi(%g1, v0, %g5) - addcc r3, %g4, r3 - stx r3, [rp+0] - add n, -1, n - add up, 8, up - brz n, L(xit) - add rp, 8, rp - -L(com): ldx [up+0], %g1 -L(b0): ldx [up+8], u1 - ldx [up+16], u2 - ldx [up+24], u3 - mulx %g1, v0, %o0 - umulxhi(%g1, v0, %o1) - b L(lo0) - nop - - ALIGN(16) -L(top): ldx [up+0], u0 - addxc( %g0, %g5, %g5) C propagate carry into carry limb - ldx [up+8], u1 - addcc r0, %o0, r0 - ldx [up+16], u2 - addxccc(r1, %o2, r1) - ldx [up+24], u3 - addxccc(r2, %o4, r2) - stx r0, [rp-32] - addxccc(r3, %g4, r3) - stx r1, [rp-24] - mulx u0, v0, %o0 - stx r2, [rp-16] - umulxhi(u0, v0, %o1) - stx r3, [rp-8] -L(lo0): mulx u1, v0, %o2 - ldx [rp+0], r0 - umulxhi(u1, v0, %o3) - ldx [rp+8], r1 - mulx u2, v0, %o4 - ldx [rp+16], r2 - umulxhi(u2, v0, %o5) - ldx [rp+24], r3 - mulx u3, v0, %g4 - addxccc(%g5, %o0, %o0) - umulxhi(u3, v0, %g5) - add up, 32, up - addxccc(%o1, %o2, %o2) - add rp, 32, rp - addxccc(%o3, %o4, %o4) - add n, -4, n - addxccc(%o5, %g4, %g4) - brgz n, L(top) - nop - - addxc( %g0, %g5, %g5) - addcc r0, %o0, r0 - stx r0, [rp-32] - addxccc(r1, %o2, r1) - stx r1, [rp-24] - addxccc(r2, %o4, r2) - stx r2, [rp-16] - addxccc(r3, %g4, r3) - stx r3, [rp-8] -L(xit): addxc( %g0, %g5, %i0) - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/aormul_2.asm b/gmp/mpn/sparc64/ultrasparct3/aormul_2.asm deleted file mode 100644 index ccc6a4408d..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/aormul_2.asm +++ /dev/null @@ -1,228 +0,0 @@ -dnl SPARC v9 mpn_mul_2 and mpn_addmul_2 for T3/T4/T5. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb cycles/limb -C mul_2 addmul_2 -C UltraSPARC T3: 22.5 23.5 -C UltraSPARC T4: 3.25 3.75 - - -C The code is reasonably scheduled but also relies on OoO. There was hope that -C this could run at around 3.0 and 3.5 c/l respectively, on T4. Two cycles per -C iteration needs to be removed. -C -C We could almost use 2-way unrolling, but currently the wN registers live too -C long. By changing add x,w1,w1 to add x,w1,w0, i.e. migrate the values down- -C wards, 2-way unrolling should become possible. With n-indexed addressing it -C should run no slower. -C -C The rp loads to g1/g3 are very much over-scheduled. Presumably, they could -C be postponed a full way, and then just one register could be used. - -C INPUT PARAMETERS -define(`rp', `%i0') -define(`up', `%i1') -define(`n', `%i2') -define(`vp', `%i3') - -define(`v0', `%o0') -define(`v1', `%o1') - -define(`w0', `%o2') -define(`w1', `%o3') -define(`w2', `%o4') -define(`w3', `%o5') - -ifdef(`OPERATION_mul_2',` - define(`AM2', `') - define(`ADDX', `addcc`'$1') - define(`func', `mpn_mul_2') -') -ifdef(`OPERATION_addmul_2',` - define(`AM2', `$1') - define(`ADDX', `addxccc($1,$2,$3)') - define(`func', `mpn_addmul_2') -') - - -MULFUNC_PROLOGUE(mpn_mul_2 mpn_addmul_2) - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(func) - save %sp, -176, %sp - - ldx [vp+0], v0 C load v0 - and n, 3, %g5 - ldx [vp+8], v1 C load v1 - add n, -6, n - ldx [up+0], %g4 - brz %g5, L(b0) - cmp %g5, 2 - bcs L(b1) - nop - be L(b2) - nop - -L(b3): -AM2(` ldx [rp+0], %g1') - mulx %g4, v0, w2 - umulxhi(%g4, v0, w3) - ldx [up+8], %i5 - mulx %g4, v1, %l3 - umulxhi(%g4, v1, %l7) -AM2(` ldx [rp+8], %g3') - add up, -8, up - add rp, -8, rp - b L(lo3) - mov 0, w0 - -L(b2): -AM2(` ldx [rp+0], %g3') - mulx %g4, v0, w3 - umulxhi(%g4, v0, w0) - ldx [up+8], %i4 - mulx %g4, v1, %l1 - umulxhi(%g4, v1, %l5) -AM2(` ldx [rp+8], %g1') - add rp, 16, rp - brlz n, L(end) - mov 0, w1 - ba L(top) - add up, 16, up - -L(b1): -AM2(` ldx [rp+0], %g1') - mulx %g4, v0, w0 - umulxhi(%g4, v0, w1) - ldx [up+8], %i5 - mulx %g4, v1, %l3 - umulxhi(%g4, v1, %l7) -AM2(` ldx [rp+8], %g3') - add up, 8, up - add rp, 8, rp - b L(lo1) - mov 0, w2 - -L(b0): -AM2(` ldx [rp+0], %g3') - mulx %g4, v0, w1 - umulxhi(%g4, v0, w2) - ldx [up+8], %i4 - mulx %g4, v1, %l1 - umulxhi(%g4, v1, %l5) -AM2(` ldx [rp+8], %g1') - b L(lo0) - mov 0, w3 - - ALIGN(16) C cycle -L(top): mulx %i4, v0, %l2 C 0->5 - umulxhi(%i4, v0, %l6) C 0->5 - ldx [up+0], %i5 C 1->6 -AM2(` addcc w3, %g3, w3') C 1 - stx w3, [rp-16] C 2 - ADDX(` %l1, w0, w0') C 2 - addxccc(%l5, w1, w1) C 3 - mulx %i4, v1, %l3 C 3->9 - umulxhi(%i4, v1, %l7) C 4->9 -AM2(` ldx [rp+0], %g3') C 4 - addcc %l2, w0, w0 C 5 - addxccc(%l6, w1, w1) C 5 - addxc( %g0, %g0, w2) C 6 -L(lo1): mulx %i5, v0, %l0 C 6 - umulxhi(%i5, v0, %l4) C 7 - ldx [up+8], %i4 C 7 -AM2(` addcc w0, %g1, w0') C 8 - stx w0, [rp-8] C 8 - ADDX(` %l3, w1, w1') C 9 - addxccc(%l7, w2, w2) C 9 - mulx %i5, v1, %l1 C 10 - umulxhi(%i5, v1, %l5) C 10 -AM2(` ldx [rp+8], %g1') C 11 - addcc %l0, w1, w1 C 11 - addxccc(%l4, w2, w2) C 12 - addxc( %g0, %g0, w3) C 12 -L(lo0): mulx %i4, v0, %l2 C 13 - umulxhi(%i4, v0, %l6) C 13 - ldx [up+16], %i5 C 14 -AM2(` addcc w1, %g3, w1') C 14 - stx w1, [rp+0] C 15 - ADDX(` %l1, w2, w2') C 15 - addxccc(%l5, w3, w3) C 16 - mulx %i4, v1, %l3 C 16 - umulxhi(%i4, v1, %l7) C 17 -AM2(` ldx [rp+16], %g3') C 17 - addcc %l2, w2, w2 C 18 - addxccc(%l6, w3, w3) C 18 - addxc( %g0, %g0, w0) C 19 -L(lo3): mulx %i5, v0, %l0 C 19 - umulxhi(%i5, v0, %l4) C 20 - ldx [up+24], %i4 C 20 -AM2(` addcc w2, %g1, w2') C 21 - stx w2, [rp+8] C 21 - ADDX(` %l3, w3, w3') C 22 - addxccc(%l7, w0, w0) C 22 - mulx %i5, v1, %l1 C 23 - umulxhi(%i5, v1, %l5) C 23 -AM2(` ldx [rp+24], %g1') C 24 - addcc %l0, w3, w3 C 24 - addxccc(%l4, w0, w0) C 25 - addxc( %g0, %g0, w1) C 25 - add up, 32, up - add rp, 32, rp - brgz n, L(top) - add n, -4, n - -L(end): mulx %i4, v0, %l2 - umulxhi(%i4, v0, %l6) -AM2(` addcc w3, %g3, w3') - stx w3, [rp-16] - ADDX(` %l1, w0, w0') - addxccc(%l5, w1, w1) - mulx %i4, v1, %l3 - umulxhi(%i4, v1, %l7) - addcc %l2, w0, w0 - addxccc(%l6, w1, w1) - addxc( %g0, %g0, w2) -AM2(` addcc w0, %g1, w0') - stx w0, [rp-8] - ADDX(` %l3, w1, w1') - stx w1, [rp+0] - addxc(%l7, w2, %i0) - - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm b/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm deleted file mode 100644 index 845f6d6d69..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm +++ /dev/null @@ -1,219 +0,0 @@ -dnl SPARC v9 mpn_mul_4 and mpn_addmul_4 for T3/T4/T5. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - - -C cycles/limb cycles/limb -C mul_4 addmul_4 -C UltraSPARC T3: 21.5 22.0 -C UltraSPARC T4: 2.625 2.75 - - -C The code is well-scheduled and relies on OoO very little. There is hope that -C this will run at around 2.5 and 2.75 c/l respectively, on T4. - -define(`rp', `%i0') -define(`up', `%i1') -define(`n', `%i2') -define(`vp', `%i3') - -define(`v0', `%g1') -define(`v1', `%o7') -define(`v2', `%g2') -define(`v3', `%i3') - -define(`w0', `%o0') -define(`w1', `%o1') -define(`w2', `%o2') -define(`w3', `%o3') -define(`w4', `%o4') - -define(`r0', `%o5') - -define(`u0', `%i4') -define(`u1', `%i5') - -define(`rp0', `rp') -define(`rp1', `%g3') -define(`rp2', `%g4') -define(`up0', `up') -define(`up1', `%g5') - -ifdef(`OPERATION_mul_4',` - define(`AM4', `') - define(`ADDX', `addcc`'$1') - define(`func', `mpn_mul_4') -') -ifdef(`OPERATION_addmul_4',` - define(`AM4', `$1') - define(`ADDX', `addxccc($1,$2,$3)') - define(`func', `mpn_addmul_4') -') - - -MULFUNC_PROLOGUE(mpn_mul_4 mpn_addmul_4) - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(func) - save %sp, -176, %sp - - ldx [up + 0], u1 C load up[0] early - andcc n, 1, %g0 C is n odd? - ldx [vp + 0], v0 - sllx n, 3, n - ldx [vp + 8], v1 - add n, -28, n - ldx [vp + 16], v2 - add rp, -16, rp - ldx [vp + 24], v3 - add up, n, up0 - add rp, n, rp0 - add up0, 8, up1 - add rp0, 8, rp1 - add rp0, 16, rp2 - mulx u1, v0, %l0 - mov 0, w0 - mulx u1, v1, %l1 - mov 0, w1 - mulx u1, v2, %l2 - mov 0, w2 - mulx u1, v3, %l3 - mov 0, w3 - - be L(evn) - neg n, n - -L(odd): mov u1, u0 - ldx [up1 + n], u1 -AM4(` ldx [rp2 + n], r0') - umulxhi(u0, v0, %l4) - umulxhi(u0, v1, %l5) - umulxhi(u0, v2, %l6) - umulxhi(u0, v3, %l7) - b L(mid) - add n, 8, n - -L(evn): ldx [up1 + n], u0 -AM4(` ldx [rp2 + n], r0') - umulxhi(u1, v0, %l4) - umulxhi(u1, v1, %l5) - umulxhi(u1, v2, %l6) - umulxhi(u1, v3, %l7) - add n, 16, n - - ALIGN(16) -L(top): addcc %l0, w0, w0 - mulx u0, v0, %l0 C w 0 - addxccc(%l1, w1, w1) - mulx u0, v1, %l1 C w 1 - addxccc(%l2, w2, w2) - mulx u0, v2, %l2 C w 2 - addxccc(%l3, w3, w3) - mulx u0, v3, %l3 C w 3 - ldx [up0 + n], u1 - addxc( %g0, %g0, w4) -AM4(` addcc r0, w0, w0') - stx w0, [rp0 + n] - ADDX(` %l4, w1, w0') - umulxhi(u0, v0, %l4) C w 1 -AM4(` ldx [rp1 + n], r0') - addxccc(%l5, w2, w1) - umulxhi(u0, v1, %l5) C w 2 - addxccc(%l6, w3, w2) - umulxhi(u0, v2, %l6) C w 3 - addxc( %l7, w4, w3) - umulxhi(u0, v3, %l7) C w 4 -L(mid): addcc %l0, w0, w0 - mulx u1, v0, %l0 C w 1 - addxccc(%l1, w1, w1) - mulx u1, v1, %l1 C w 2 - addxccc(%l2, w2, w2) - mulx u1, v2, %l2 C w 3 - addxccc(%l3, w3, w3) - mulx u1, v3, %l3 C w 4 - ldx [up1 + n], u0 - addxc( %g0, %g0, w4) -AM4(` addcc r0, w0, w0') - stx w0, [rp1 + n] - ADDX(` %l4, w1, w0') - umulxhi(u1, v0, %l4) C w 2 -AM4(` ldx [rp2 + n], r0') - addxccc(%l5, w2, w1) - umulxhi(u1, v1, %l5) C w 3 - addxccc(%l6, w3, w2) - umulxhi(u1, v2, %l6) C w 4 - addxc( %l7, w4, w3) - umulxhi(u1, v3, %l7) C w 5 - brlz n, L(top) - add n, 16, n - -L(end): addcc %l0, w0, w0 - mulx u0, v0, %l0 - addxccc(%l1, w1, w1) - mulx u0, v1, %l1 - addxccc(%l2, w2, w2) - mulx u0, v2, %l2 - addxccc(%l3, w3, w3) - mulx u0, v3, %l3 - addxc( %g0, %g0, w4) -AM4(` addcc r0, w0, w0') - stx w0, [rp0 + n] - ADDX(` %l4, w1, w0') - umulxhi(u0, v0, %l4) -AM4(` ldx [rp1 + n], r0') - addxccc(%l5, w2, w1) - umulxhi(u0, v1, %l5) - addxccc(%l6, w3, w2) - umulxhi(u0, v2, %l6) - addxc( %l7, w4, w3) - umulxhi(u0, v3, %l7) - addcc %l0, w0, w0 - addxccc(%l1, w1, w1) - addxccc(%l2, w2, w2) - addxccc(%l3, w3, w3) - addxc( %g0, %g0, w4) -AM4(` addcc r0, w0, w0') - stx w0, [rp1 + n] - ADDX(` %l4, w1, w0') - addxccc(%l5, w2, w1) - addxccc(%l6, w3, w2) - stx w0, [rp2 + n] - add n, 16, n - stx w1, [rp1 + n] - stx w2, [rp2 + n] - addxc( %l7, w4, %i0) - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/aorslsh_n.asm b/gmp/mpn/sparc64/ultrasparct3/aorslsh_n.asm deleted file mode 100644 index 1014b1ba23..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/aorslsh_n.asm +++ /dev/null @@ -1,147 +0,0 @@ -dnl SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 11 -C UltraSPARC T4: 4 - -C For sublsh_n we combine the two shifted limbs using xnor, using the identity -C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) = -C 0 as it is in our usage. This gives us the ones complement for free. -C Unfortunately, the same trick will not work for rsblsh_n, which will instead -C require a separate negation. -C -C FIXME: Add rsblsh_n to this file. - -define(`rp', `%i0') -define(`up', `%i1') -define(`vp', `%i2') -define(`n', `%i3') -define(`cnt',`%i4') - -define(`tnc',`%o5') - -ifdef(`OPERATION_addlsh_n',` - define(`INITCY', `subcc %g0, 0, %g0') - define(`MERGE', `or') - define(`func', `mpn_addlsh_n') -') -ifdef(`OPERATION_sublsh_n',` - define(`INITCY', `subcc %g0, 1, %g0') - define(`MERGE', `xnor') - define(`func', `mpn_sublsh_n') -') - -define(`rp0', `rp') -define(`rp1', `%o2') -define(`up0', `up') -define(`up1', `%o3') -define(`vp0', `vp') -define(`vp1', `%o4') - -MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n) -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(func) - save %sp, -176, %sp - mov 64, tnc - sub tnc, cnt, tnc - - andcc n, 1, %g0 - sllx n, 3, n - add n, -16, n - add up, n, up0 - add vp, n, vp0 - add rp, n, rp0 - add up0, 8, up1 - add vp0, 8, vp1 - add rp0, -8, rp1 - add rp0, -16, rp0 - neg n, n - be L(evn) - INITCY - -L(odd): ldx [vp0 + n], %l1 - mov 0, %l2 - ldx [up0 + n], %l5 - sllx %l1, cnt, %g3 - brgez n, L(wd1) - add n, 8, n - ldx [vp0 + n], %l0 - b L(lo1) - sllx %l1, cnt, %g3 - -L(evn): ldx [vp0 + n], %l0 - mov 0, %l3 - ldx [up0 + n], %l4 - ldx [vp1 + n], %l1 - b L(lo0) - sllx %l0, cnt, %g1 - -L(top): addxccc(%l6, %l4, %o0) - ldx [vp0 + n], %l0 - sllx %l1, cnt, %g3 - stx %o0, [rp0 + n] -L(lo1): srlx %l1, tnc, %l3 - MERGE %l2, %g3, %l7 - ldx [up0 + n], %l4 - addxccc(%l7, %l5, %o1) - ldx [vp1 + n], %l1 - sllx %l0, cnt, %g1 - stx %o1, [rp1 + n] -L(lo0): srlx %l0, tnc, %l2 - MERGE %l3, %g1, %l6 - ldx [up1 + n], %l5 - brlz,pt n, L(top) - add n, 16, n - - addxccc(%l6, %l4, %o0) - sllx %l1, cnt, %g3 - stx %o0, [rp0 + n] -L(wd1): srlx %l1, tnc, %l3 - MERGE %l2, %g3, %l7 - addxccc(%l7, %l5, %o1) - stx %o1, [rp1 + n] - -ifdef(`OPERATION_addlsh_n', -` addxc( %l3, %g0, %i0)') -ifdef(`OPERATION_sublsh_n', -` addxc( %g0, %g0, %g1) - add %g1, -1, %g1 - sub %l3, %g1, %i0') - - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm b/gmp/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm deleted file mode 100644 index 550860d368..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/bdiv_dbm1c.asm +++ /dev/null @@ -1,147 +0,0 @@ -dnl SPARC T3/T4/T5 mpn_bdiv_dbm1c. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 25 -C UltraSPARC T4/T5: 4 - -C INPUT PARAMETERS -define(`qp', `%i0') -define(`ap', `%i1') -define(`n', `%i2') -define(`bd', `%i3') -define(`h', `%i4') - -define(`plo0',`%g4') define(`plo1',`%g5') -define(`phi0',`%l0') define(`phi1',`%l1') -define(`a0', `%g1') define(`a1', `%g3') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_bdiv_dbm1c) - save %sp, -176, %sp - - and n, 3, %g5 - ldx [ap + 0], %g2 - add n, -5, n - brz %g5, L(b0) - cmp %g5, 2 - bcs %xcc, L(b1) - nop - be %xcc, L(b2) - nop - -L(b3): ldx [ap + 8], a0 - mulx bd, %g2, plo1 - umulxhi(bd, %g2, phi1) - ldx [ap + 16], a1 - add qp, -24, qp - b L(lo3) - add ap, -8, ap - -L(b2): ldx [ap + 8], a1 - mulx bd, %g2, plo0 - umulxhi(bd, %g2, phi0) - brlz,pt n, L(wd2) - nop -L(gt2): ldx [ap + 16], a0 - add ap, 16, ap - b L(lo2) - add n, -1, n - -L(b1): mulx bd, %g2, plo1 - umulxhi(bd, %g2, phi1) - brlz,pn n, L(wd1) - add qp, -8, qp -L(gt1): ldx [ap + 8], a0 - ldx [ap + 16], a1 - b L(lo1) - add ap, 8, ap - -L(b0): ldx [ap + 8], a1 - mulx bd, %g2, plo0 - umulxhi(bd, %g2, phi0) - ldx [ap + 16], a0 - b L(lo0) - add qp, -16, qp - -L(top): ldx [ap + 0], a0 - sub h, phi1, h -L(lo2): mulx bd, a1, plo1 - umulxhi(bd, a1, phi1) - subcc h, plo0, h - addxc( phi0, %g0, phi0) - stx h, [qp + 0] - ldx [ap + 8], a1 - sub h, phi0, h -L(lo1): mulx bd, a0, plo0 - umulxhi(bd, a0, phi0) - subcc h, plo1, h - addxc( phi1, %g0, phi1) - stx h, [qp + 8] - ldx [ap + 16], a0 - sub h, phi1, h -L(lo0): mulx bd, a1, plo1 - umulxhi(bd, a1, phi1) - subcc h, plo0, h - addxc( phi0, %g0, phi0) - stx h, [qp + 16] - ldx [ap + 24], a1 - sub h, phi0, h -L(lo3): mulx bd, a0, plo0 - umulxhi(bd, a0, phi0) - subcc h, plo1, h - addxc( phi1, %g0, phi1) - stx h, [qp + 24] - add ap, 32, ap - add qp, 32, qp - brgz,pt n, L(top) - add n, -4, n - -L(end): sub h, phi1, h -L(wd2): mulx bd, a1, plo1 - umulxhi(bd, a1, phi1) - subcc h, plo0, h - addxc( phi0, %g0, phi0) - stx h, [qp + 0] - sub h, phi0, h -L(wd1): subcc h, plo1, h - addxc( phi1, %g0, phi1) - stx h, [qp + 8] - sub h, phi1, %i0 - - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/cnd_aors_n.asm b/gmp/mpn/sparc64/ultrasparct3/cnd_aors_n.asm deleted file mode 100644 index f10ee72c1f..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/cnd_aors_n.asm +++ /dev/null @@ -1,143 +0,0 @@ -dnl SPARC v9 mpn_cnd_add_n and mpn_cnd_sub_n for T3/T4/T5. - -dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 8.5 -C UltraSPARC T4: 3 - -C We use a double-pointer trick to allow indexed addressing. Its setup -C cost might be a problem in these functions, since we don't expect huge n -C arguments. -C -C For sub we need ~(a & mask) = (~a | ~mask) but by complementing mask we can -C instead do ~(a & ~mask) = (~a | mask), allowing us to use the orn insn. - -C INPUT PARAMETERS -define(`cnd', `%i0') -define(`rp', `%i1') -define(`up', `%i2') -define(`vp', `%i3') -define(`n', `%i4') - -define(`mask', `cnd') -define(`up0', `%l0') define(`up1', `%l1') -define(`vp0', `%l2') define(`vp1', `%l3') -define(`rp0', `%g4') define(`rp1', `%g5') -define(`u0', `%l4') define(`u1', `%l5') -define(`v0', `%l6') define(`v1', `%l7') -define(`x0', `%g1') define(`x1', `%g3') -define(`w0', `%g1') define(`w1', `%g3') - -ifdef(`OPERATION_cnd_add_n',` - define(`LOGOP', `and $1, $2, $3') - define(`MAKEMASK',`cmp %g0, $1 - subc %g0, %g0, $2') - define(`INITCY', `addcc %g0, 0, %g0') - define(`RETVAL', `addxc( %g0, %g0, %i0)') - define(`func', `mpn_cnd_add_n') -') -ifdef(`OPERATION_cnd_sub_n',` - define(`LOGOP', `orn $2, $1, $3') - define(`MAKEMASK',`cmp $1, 1 - subc %g0, %g0, $2') - define(`INITCY', `subcc %g0, 1, %g0') - define(`RETVAL', `addxc( %g0, %g0, %i0) - xor %i0, 1, %i0') - define(`func', `mpn_cnd_sub_n') -') - -MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n) - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(func) - save %sp, -176, %sp - - MAKEMASK(cnd,mask) - - andcc n, 1, %g0 - sllx n, 3, n - add n, -16, n - add vp, n, vp0 - add up, n, up0 - add rp, n, rp0 - neg n, n - be L(evn) - INITCY - -L(odd): ldx [vp0 + n], v1 - ldx [up0 + n], u1 - LOGOP( v1, mask, x1) - addxccc(u1, x1, w1) - stx w1, [rp0 + n] - add n, 8, n - brgz n, L(rtn) - nop - -L(evn): add vp0, 8, vp1 - add up0, 8, up1 - add rp0, -24, rp1 - ldx [vp0 + n], v0 - ldx [vp1 + n], v1 - ldx [up0 + n], u0 - ldx [up1 + n], u1 - add n, 16, n - brgz n, L(end) - add rp0, -16, rp0 - -L(top): LOGOP( v0, mask, x0) - ldx [vp0 + n], v0 - LOGOP( v1, mask, x1) - ldx [vp1 + n], v1 - addxccc(u0, x0, w0) - ldx [up0 + n], u0 - addxccc(u1, x1, w1) - ldx [up1 + n], u1 - stx w0, [rp0 + n] - add n, 16, n - brlez n, L(top) - stx w1, [rp1 + n] - -L(end): LOGOP( v0, mask, x0) - LOGOP( v1, mask, x1) - addxccc(u0, x0, w0) - addxccc(u1, x1, w1) - stx w0, [rp0 + n] - stx w1, [rp1 + 32] - -L(rtn): RETVAL - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/dive_1.asm b/gmp/mpn/sparc64/ultrasparct3/dive_1.asm deleted file mode 100644 index d7dbdf953c..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/dive_1.asm +++ /dev/null @@ -1,129 +0,0 @@ -dnl SPARC T3/T4/T5 mpn_divexact_1. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 31 -C UltraSPARC T4/T5: 20-26 hits 20 early, then sharply drops - -C INPUT PARAMETERS -define(`qp', `%i0') -define(`ap', `%i1') -define(`n', `%i2') -define(`d', `%i3') - -define(`dinv',`%o4') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_divexact_1) - save %sp, -176, %sp - cmp n, 1 - bne,pt %xcc, L(gt1) - ldx [ap], %o5 - udivx %o5, d, %g1 - stx %g1, [qp] - return %i7+8 - nop - -L(gt1): add d, -1, %g1 - andn %g1, d, %g1 - popc %g1, %i4 C i4 = count_trailing_zeros(d) - - srlx d, %i4, d - srlx d, 1, %g1 - and %g1, 127, %g1 - - LEA64(binvert_limb_table, g2, g4) - ldub [%g2+%g1], %g1 - add %g1, %g1, %g2 - mulx %g1, %g1, %g1 - mulx %g1, d, %g1 - sub %g2, %g1, %g2 - add %g2, %g2, %g1 - mulx %g2, %g2, %g2 - mulx %g2, d, %g2 - sub %g1, %g2, %g1 - add %g1, %g1, %o7 - mulx %g1, %g1, %g1 - mulx %g1, d, %g1 - add n, -2, n - brz,pt %i4, L(norm) - sub %o7, %g1, dinv - -L(unnorm): - mov 0, %g4 - sub %g0, %i4, %o2 - srlx %o5, %i4, %o5 -L(top_unnorm): - ldx [ap+8], %g3 - add ap, 8, ap - sllx %g3, %o2, %g5 - or %g5, %o5, %g5 - srlx %g3, %i4, %o5 - subcc %g5, %g4, %g4 - mulx %g4, dinv, %g1 - stx %g1, [qp] - add qp, 8, qp - umulxhi(d, %g1, %g1) - addxc( %g1, %g0, %g4) - brgz,pt n, L(top_unnorm) - add n, -1, n - - sub %o5, %g4, %g4 - mulx %g4, dinv, %g1 - stx %g1, [qp] - return %i7+8 - nop - -L(norm): - mulx dinv, %o5, %g1 - stx %g1, [qp] - add qp, 8, qp - addcc %g0, 0, %g4 -L(top_norm): - umulxhi(d, %g1, %g1) - ldx [ap+8], %g5 - add ap, 8, ap - addxc( %g1, %g0, %g1) - subcc %g5, %g1, %g1 - mulx %g1, dinv, %g1 - stx %g1, [qp] - add qp, 8, qp - brgz,pt n, L(top_norm) - add n, -1, n - - return %i7+8 - nop -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/hamdist.asm b/gmp/mpn/sparc64/ultrasparct3/hamdist.asm deleted file mode 100644 index 20ed8bf15b..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/hamdist.asm +++ /dev/null @@ -1,78 +0,0 @@ -dnl SPARC v9 mpn_hamdist for T3/T4. - -dnl Contributed to the GNU project by David Miller. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 18 -C UltraSPARC T4: 3.5 - -C INPUT PARAMETERS -define(`up', `%o0') -define(`vp', `%o1') -define(`n', `%o2') -define(`pcnt', `%o5') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_hamdist) - subcc n, 1, n - be L(final_one) - clr pcnt -L(top): - ldx [up + 0], %g1 - ldx [vp + 0], %g2 - ldx [up + 8], %o4 - ldx [vp + 8], %g3 - sub n, 2, n - xor %g1, %g2, %g1 - add up, 16, up - popc %g1, %g2 - add vp, 16, vp - xor %o4, %g3, %o4 - add pcnt, %g2, pcnt - popc %o4, %g3 - brgz n, L(top) - add pcnt, %g3, pcnt - brlz,pt n, L(done) - nop -L(final_one): - ldx [up + 0], %g1 - ldx [vp + 0], %g2 - xor %g1,%g2, %g1 - popc %g1, %g2 - add pcnt, %g2, pcnt -L(done): - retl - mov pcnt, %o0 -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/invert_limb.asm b/gmp/mpn/sparc64/ultrasparct3/invert_limb.asm deleted file mode 100644 index 4da49cf030..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/invert_limb.asm +++ /dev/null @@ -1,92 +0,0 @@ -dnl SPARC T3/T4/T5 mpn_invert_limb. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: ? -C UltraSPARC T4/T5: ? - -C INPUT PARAMETERS -define(`d', `%o0') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_invert_limb) - srlx d, 54, %g1 - LEA64(approx_tab, g2, g3) - and %g1, 0x1fe, %g1 - srlx d, 24, %g4 - lduh [%g2+%g1], %g3 - add %g4, 1, %g4 - sllx %g3, 11, %g2 - add %g2, -1, %g2 - mulx %g3, %g3, %g3 - mulx %g3, %g4, %g3 - srlx %g3, 40, %g3 - sub %g2, %g3, %g2 - sllx %g2, 60, %g1 - mulx %g2, %g2, %g3 - mulx %g3, %g4, %g4 - sub %g1, %g4, %g1 - srlx %g1, 47, %g1 - sllx %g2, 13, %g2 - add %g1, %g2, %g1 - and d, 1, %g2 - srlx %g1, 1, %g4 - sub %g0, %g2, %g3 - and %g4, %g3, %g3 - srlx d, 1, %g4 - add %g4, %g2, %g2 - mulx %g1, %g2, %g2 - sub %g3, %g2, %g2 - umulxhi(%g1, %g2, %g2) - srlx %g2, 1, %g2 - sllx %g1, 31, %g1 - add %g2, %g1, %g1 - mulx %g1, d, %g3 - umulxhi(d, %g1, %g4) - addcc %g3, d, %g0 - addxc( %g4, d, %o0) - jmp %o7+8 - sub %g1, %o0, %o0 -EPILOGUE() - - RODATA - ALIGN(2) - TYPE( approx_tab, object) - SIZE( approx_tab, 512) -approx_tab: -forloop(i,256,512-1,dnl -` .half eval(0x7fd00/i) -')dnl diff --git a/gmp/mpn/sparc64/ultrasparct3/missing.asm b/gmp/mpn/sparc64/ultrasparct3/missing.asm deleted file mode 100644 index c79032dd38..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/missing.asm +++ /dev/null @@ -1,77 +0,0 @@ -dnl SPARC v9-2011 simulation support. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -ASM_START() -PROLOGUE(__gmpn_umulh) - save %sp, -176, %sp - ldx [%sp+2047+176+256], %o0 - ldx [%sp+2047+176+256+8], %o1 - rd %ccr, %o4 - srl %o0, 0, %l4 - srl %o1, 0, %l1 - srlx %o1, 32, %o1 - mulx %o1, %l4, %l2 - srlx %o0, 32, %o0 - mulx %o0, %l1, %l3 - mulx %l1, %l4, %l1 - srlx %l1, 32, %l1 - add %l2, %l1, %l2 - addcc %l2, %l3, %l2 - mulx %o1, %o0, %o1 - mov 0, %l1 - movcs %xcc, 1, %l1 - sllx %l1, 32, %l1 - add %o1, %l1, %o1 - srlx %l2, 32, %o0 - add %o1, %o0, %o0 - stx %o0, [%sp+2047+176+256] - wr %o4, 0, %ccr - ret - restore -EPILOGUE() - -PROLOGUE(__gmpn_lzcnt) - save %sp, -176, %sp - ldx [%sp+2047+176+256], %o0 - brz,a %o0, 2f - mov 64, %o1 - brlz %o0, 2f - mov 0, %o1 -1: sllx %o0, 1, %o0 - brgz %o0, 1b - add %o1, 1, %o1 - stx %o1, [%sp+2047+176+256] -2: ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/missing.m4 b/gmp/mpn/sparc64/ultrasparct3/missing.m4 deleted file mode 100644 index e5d6d8e98e..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/missing.m4 +++ /dev/null @@ -1,88 +0,0 @@ -dnl SPARC v9-2011 simulation support. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - - -dnl Usage addxccc(r1,r2,r3, t1) -dnl 64-bit add with carry-in and carry-out -dnl FIXME: Register g2 must not be destination - -define(`addxccc',`dnl - add %sp, -512, %sp - stx %g2, [%sp+2047+256+16] - mov 0, %g2 - movcs %xcc, -1, %g2 - addcc %g2, 1, %g0 - addccc $1, $2, $3 - ldx [%sp+2047+256+16], %g2 - sub %sp, -512, %sp -') - - -dnl Usage addxc(r1,r2,r3, t1,t2) -dnl 64-bit add with carry-in - -define(`addxc',`dnl - bcc %xcc, 1f - add $1, $2, $3 - add $3, 1, $3 -1: -') - - -dnl Usage umulxhi(r1,r2,r3) -dnl 64-bit multiply returning upper 64 bits -dnl Calls __gmpn_umulh using a non-standard calling convention - -define(`umulxhi',`dnl - add %sp, -512, %sp - stx $1, [%sp+2047+256] - stx $2, [%sp+2047+256+8] - stx %o7, [%sp+2047+256+16] - call __gmpn_umulh - nop - ldx [%sp+2047+256+16], %o7 - ldx [%sp+2047+256], $3 - sub %sp, -512, %sp -') -dnl Usage lzcnt(r1,r2) -dnl Plain count leading zeros -dnl Calls __gmpn_lzcnt using a non-standard calling convention - -define(`lzcnt',`dnl - add %sp, -512, %sp - stx %o7, [%sp+2047+256+16] - call __gmpn_lzcnt - stx $1, [%sp+2047+256] - ldx [%sp+2047+256+16], %o7 - ldx [%sp+2047+256], $2 - sub %sp, -512, %sp -') diff --git a/gmp/mpn/sparc64/ultrasparct3/mod_1_4.asm b/gmp/mpn/sparc64/ultrasparct3/mod_1_4.asm deleted file mode 100644 index 08facbd1cc..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/mod_1_4.asm +++ /dev/null @@ -1,233 +0,0 @@ -dnl SPARC T3/T4/T5 mpn_mod_1s_4p. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 30 -C UltraSPARC T4/T5: 4 - -C INPUT PARAMETERS -define(`ap', `%o0') -define(`n', `%o1') -define(`d', `%o2') -define(`cps', `%o3') - - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_mod_1s_4p) - save %sp, -176, %sp - ldx [%i3+16], %o4 - ldx [%i3+24], %o3 - ldx [%i3+32], %o2 - ldx [%i3+40], %o1 - ldx [%i3+48], %o0 - - and %i1, 3, %g3 - sllx %i1, 3, %g1 - add %i0, %g1, %i0 - brz %g3, L(b00) - cmp %g3, 2 - bcs %xcc, L(b01) - nop - be %xcc, L(b10) - nop - -L(b11): ldx [%i0-16], %g2 - mulx %g2, %o4, %g5 - umulxhi(%g2, %o4, %g3) - ldx [%i0-24], %g4 - addcc %g5, %g4, %g5 - addxc( %g3, %g0, %g4) - ldx [%i0-8], %g2 - mulx %g2, %o3, %g1 - umulxhi(%g2, %o3, %g3) - addcc %g1, %g5, %g1 - addxc( %g3, %g4, %g2) - ba,pt %xcc, .L8 - add %i0, -32, %i0 - -L(b00): ldx [%i0-24], %g3 - mulx %g3, %o4, %g2 - umulxhi(%g3, %o4, %g5) - ldx [%i0-32], %g4 - addcc %g2, %g4, %g2 - addxc( %g5, %g0, %g3) - ldx [%i0-16], %g4 - mulx %g4, %o3, %g5 - umulxhi(%g4, %o3, %i5) - addcc %g2, %g5, %g5 - addxc( %g3, %i5, %g4) - ldx [%i0-8], %g2 - mulx %g2, %o2, %g1 - umulxhi(%g2, %o2, %g3) - addcc %g1, %g5, %g1 - addxc( %g3, %g4, %g2) - ba,pt %xcc, .L8 - add %i0, -40, %i0 - -L(b01): ldx [%i0-8], %g1 - mov 0, %g2 - ba,pt %xcc, .L8 - add %i0, -16, %i0 - -L(b10): ldx [%i0-8], %g2 - ldx [%i0-16], %g1 - add %i0, -24, %i0 - -.L8: add %i1, -5, %g3 - brlz,pn %g3, L(end) - nop - -L(top): ldx [%i0-16], %i4 - mulx %i4, %o4, %o5 - umulxhi(%i4, %o4, %i1) - ldx [%i0-24], %i5 - addcc %o5, %i5, %o5 - addxc( %i1, %g0, %i4) - ldx [%i0-8], %i5 - mulx %i5, %o3, %o7 - umulxhi(%i5, %o3, %i1) - addcc %o5, %o7, %o7 - addxc( %i4, %i1, %i5) - ldx [%i0+0], %g4 - mulx %g4, %o2, %i1 - umulxhi(%g4, %o2, %i4) - addcc %o7, %i1, %i1 - addxc( %i5, %i4, %g4) - mulx %g1, %o1, %i5 - umulxhi(%g1, %o1, %i4) - addcc %i1, %i5, %i5 - addxc( %g4, %i4, %g5) - mulx %g2, %o0, %g1 - umulxhi(%g2, %o0, %g4) - addcc %g1, %i5, %g1 - addxc( %g4, %g5, %g2) - add %g3, -4, %g3 - brgez,pt %g3, L(top) - add %i0, -32, %i0 - -L(end): mulx %g2, %o4, %g5 - umulxhi(%g2, %o4, %g3) - addcc %g1, %g5, %g5 - addxc( %g3, %g0, %g2) - ldx [%i3+8], %i0 - ldx [%i3], %g4 - sub %g0, %i0, %i5 - srlx %g5, %i5, %i5 - sllx %g2, %i0, %g2 - or %i5, %g2, %g1 - mulx %g1, %g4, %l7 - umulxhi(%g1, %g4, %g3) - sllx %g5, %i0, %g2 - add %g1, 1, %g1 - addcc %l7, %g2, %g5 - addxc( %g3, %g1, %g1) - mulx %g1, %i2, %g1 - sub %g2, %g1, %g2 - cmp %g2, %g5 - add %i2, %g2, %g1 - movlu %xcc, %g2, %g1 - subcc %g1, %i2, %g2 - movgeu %xcc, %g2, %g1 - return %i7+8 - srlx %g1, %o0, %o0 -EPILOGUE() - -PROLOGUE(mpn_mod_1s_4p_cps) - save %sp, -176, %sp - lzcnt( %i1, %i5) - sllx %i1, %i5, %i1 - call mpn_invert_limb, 0 - mov %i1, %o0 - stx %o0, [%i0] - sra %i5, 0, %g1 - stx %g1, [%i0+8] - sub %g0, %i5, %g2 - srlx %o0, %g2, %g2 - mov 1, %g1 - sllx %g1, %i5, %g1 - or %g2, %g1, %g2 - sub %g0, %i1, %g1 - mulx %g2, %g1, %g2 - srlx %g2, %i5, %g1 - stx %g1, [%i0+16] - - umulxhi(%o0, %g2, %g3) - add %g2, %g3, %g3 - xnor %g0, %g3, %g3 - mulx %g3, %i1, %g3 - mulx %g2, %o0, %g2 - cmp %g2, %g3 - add %i1, %g3, %g1 - movgeu %xcc, %g3, %g1 - srlx %g1, %i5, %g2 - stx %g2, [%i0+24] - - umulxhi(%o0, %g1, %g3) - add %g1, %g3, %g3 - xnor %g0, %g3, %g3 - mulx %g3, %i1, %g3 - mulx %g1, %o0, %g1 - cmp %g1, %g3 - add %i1, %g3, %g2 - movgeu %xcc, %g3, %g2 - srlx %g2, %i5, %g1 - stx %g1, [%i0+32] - - umulxhi(%o0, %g2, %g3) - add %g2, %g3, %g3 - xnor %g0, %g3, %g3 - mulx %g3, %i1, %g3 - mulx %g2, %o0, %g2 - cmp %g2, %g3 - add %i1, %g3, %g1 - movgeu %xcc, %g3, %g1 - srlx %g1, %i5, %g2 - stx %g2, [%i0+40] - - umulxhi(%o0, %g1, %g2) - add %g1, %g2, %g2 - xnor %g0, %g2, %g2 - mulx %g2, %i1, %g2 - mulx %g1, %o0, %o0 - cmp %o0, %g2 - add %i1, %g2, %g3 - movgeu %xcc, %g2, %g3 - srlx %g3, %i5, %i5 - stx %i5, [%i0+48] - - return %i7+8 - nop -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/mod_34lsub1.asm b/gmp/mpn/sparc64/ultrasparct3/mod_34lsub1.asm deleted file mode 100644 index 874428069e..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/mod_34lsub1.asm +++ /dev/null @@ -1,117 +0,0 @@ -dnl SPARC v9 mpn_mod_34lsub1 for T3/T4/T5. - -dnl Copyright 2005, 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T1: - -C UltraSPARC T3: 5 -C UltraSPARC T4: 1.57 - -C This is based on the powerpc64/mode64 code. - -C INPUT PARAMETERS -define(`up', `%i0') -define(`n', `%i1') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_mod_34lsub1) - save %sp, -176, %sp - - mov 0, %g1 - mov 0, %g3 - mov 0, %g4 - addcc %g0, 0, %g5 - - add n, -3, n - brlz n, L(lt3) - nop - - add n, -3, n - ldx [up+0], %l5 - ldx [up+8], %l6 - ldx [up+16], %l7 - brlz n, L(end) - add up, 24, up - - ALIGN(16) -L(top): addxccc(%g1, %l5, %g1) - ldx [up+0], %l5 - addxccc(%g3, %l6, %g3) - ldx [up+8], %l6 - addxccc(%g4, %l7, %g4) - ldx [up+16], %l7 - add n, -3, n - brgez n, L(top) - add up, 24, up - -L(end): addxccc( %g1, %l5, %g1) - addxccc(%g3, %l6, %g3) - addxccc(%g4, %l7, %g4) - addxc( %g5, %g0, %g5) - -L(lt3): cmp n, -2 - blt L(2) - nop - - ldx [up+0], %l5 - mov 0, %l6 - beq L(1) - addcc %g1, %l5, %g1 - - ldx [up+8], %l6 -L(1): addxccc(%g3, %l6, %g3) - addxccc(%g4, %g0, %g4) - addxc( %g5, %g0, %g5) - -L(2): sllx %g1, 16, %l0 - srlx %l0, 16, %l0 C %l0 = %g1 mod 2^48 - srlx %g1, 48, %l3 C %l3 = %g1 div 2^48 - srl %g3, 0, %g1 - sllx %g1, 16, %l4 C %l4 = (%g3 mod 2^32) << 16 - srlx %g3, 32, %l5 C %l5 = %g3 div 2^32 - sethi %hi(0xffff0000), %g1 - andn %g4, %g1, %g1 - sllx %g1, 32, %l6 C %l6 = (%g4 mod 2^16) << 32 - srlx %g4, 16, %l7 C %l7 = %g4 div 2^16 - - add %l0, %l3, %l0 - add %l4, %l5, %l4 - add %l6, %l7, %l6 - - add %l0, %l4, %l0 - add %l6, %g5, %l6 - - add %l0, %l6, %i0 - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/mode1o.asm b/gmp/mpn/sparc64/ultrasparct3/mode1o.asm deleted file mode 100644 index 494e1d3f4f..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/mode1o.asm +++ /dev/null @@ -1,82 +0,0 @@ -dnl SPARC T3/T4/T5 mpn_modexact_1c_odd. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 30 -C UltraSPARC T4/T5: 26 - -C INPUT PARAMETERS -define(`ap', `%o0') -define(`n', `%o1') -define(`d', `%o2') -define(`cy', `%o3') - -define(`dinv',`%o5') -define(`a0', `%g1') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_modexact_1c_odd) - srlx d, 1, %g1 - and %g1, 127, %g1 - - LEA64(binvert_limb_table, g2, g4) - ldub [%g2+%g1], %g1 - add %g1, %g1, %g2 - mulx %g1, %g1, %g1 - mulx %g1, d, %g1 - sub %g2, %g1, %g2 - add %g2, %g2, %g1 - mulx %g2, %g2, %g2 - mulx %g2, d, %g2 - sub %g1, %g2, %g1 - add %g1, %g1, %o5 - mulx %g1, %g1, %g1 - mulx %g1, d, %g1 - sub %o5, %g1, dinv - add n, -1, n - -L(top): ldx [ap], a0 - add ap, 8, ap - subcc a0, cy, %g3 - mulx %g3, dinv, %g5 - umulxhi(d, %g5, %g5) - addxc( %g5, %g0, cy) - brnz,pt n, L(top) - add n, -1, n - - retl - mov cy, %o0 -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/mul_1.asm b/gmp/mpn/sparc64/ultrasparct3/mul_1.asm deleted file mode 100644 index af05d627bc..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/mul_1.asm +++ /dev/null @@ -1,174 +0,0 @@ -dnl SPARC v9 mpn_mul_1 for T3/T4/T5. - -dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 23 -C UltraSPARC T4: 3 - -C INPUT PARAMETERS -define(`rp', `%i0') -define(`up', `%i1') -define(`n', `%i2') -define(`v0', `%i3') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_mul_1) - save %sp, -176, %sp - - and n, 3, %g5 - add n, -4, n - brz %g5, L(b0) - cmp %g5, 2 - bcs %xcc, L(b1) - nop - be %xcc, L(b2) - nop - -L(b3): addcc %g0, %g0, %i5 - ldx [up+0], %l0 - ldx [up+8], %l1 - ldx [up+16], %l2 - mulx %l0, v0, %o0 - umulxhi(%l0, v0, %o1) - brgz n, L(gt3) - add rp, -8, rp - mulx %l1, v0, %o2 - umulxhi(%l1, v0, %o3) - b L(wd3) - nop -L(gt3): ldx [up+24], %l3 - mulx %l1, v0, %o2 - umulxhi(%l1, v0, %o3) - add up, 24, up - b L(lo3) - add n, -3, n - -L(b2): addcc %g0, %g0, %o1 - ldx [up+0], %l1 - ldx [up+8], %l2 - brgz n, L(gt2) - add rp, -16, rp - mulx %l1, v0, %o2 - umulxhi(%l1, v0, %o3) - mulx %l2, v0, %o4 - umulxhi(%l2, v0, %o5) - b L(wd2) - nop -L(gt2): ldx [up+16], %l3 - mulx %l1, v0, %o2 - umulxhi(%l1, v0, %o3) - ldx [up+24], %l0 - mulx %l2, v0, %o4 - umulxhi(%l2, v0, %o5) - add up, 16, up - b L(lo2) - add n, -2, n - -L(b1): addcc %g0, %g0, %o3 - ldx [up+0], %l2 - brgz n, L(gt1) - nop - mulx %l2, v0, %o4 - stx %o4, [rp+0] - umulxhi(%l2, v0, %i0) - ret - restore -L(gt1): ldx [up+8], %l3 - ldx [up+16], %l0 - mulx %l2, v0, %o4 - umulxhi(%l2, v0, %o5) - ldx [up+24], %l1 - mulx %l3, v0, %i4 - umulxhi(%l3, v0, %i5) - add rp, -24, rp - add up, 8, up - b L(lo1) - add n, -1, n - -L(b0): addcc %g0, %g0, %o5 - ldx [up+0], %l3 - ldx [up+8], %l0 - ldx [up+16], %l1 - mulx %l3, v0, %i4 - umulxhi(%l3, v0, %i5) - ldx [up+24], %l2 - mulx %l0, v0, %o0 - umulxhi(%l0, v0, %o1) - b L(lo0) - nop - - ALIGN(16) -L(top): ldx [up+0], %l3 C 0 - addxccc(%i4, %o5, %i4) C 0 - mulx %l1, v0, %o2 C 1 - stx %i4, [rp+0] C 1 - umulxhi(%l1, v0, %o3) C 2 -L(lo3): ldx [up+8], %l0 C 2 - addxccc(%o0, %i5, %o0) C 3 - mulx %l2, v0, %o4 C 3 - stx %o0, [rp+8] C 4 - umulxhi(%l2, v0, %o5) C 4 -L(lo2): ldx [up+16], %l1 C 5 - addxccc(%o2, %o1, %o2) C 5 - mulx %l3, v0, %i4 C 6 - stx %o2, [rp+16] C 6 - umulxhi(%l3, v0, %i5) C 7 -L(lo1): ldx [up+24], %l2 C 7 - addxccc(%o4, %o3, %o4) C 8 - mulx %l0, v0, %o0 C 8 - stx %o4, [rp+24] C 9 - umulxhi(%l0, v0, %o1) C 9 - add rp, 32, rp C 10 -L(lo0): add up, 32, up C 10 - brgz n, L(top) C 11 - add n, -4, n C 11 - -L(end): addxccc(%i4, %o5, %i4) - mulx %l1, v0, %o2 - stx %i4, [rp+0] - umulxhi(%l1, v0, %o3) - addxccc(%o0, %i5, %o0) -L(wd3): mulx %l2, v0, %o4 - stx %o0, [rp+8] - umulxhi(%l2, v0, %o5) - addxccc(%o2, %o1, %o2) -L(wd2): stx %o2, [rp+16] - addxccc(%o4, %o3, %o4) - stx %o4, [rp+24] - addxc( %g0, %o5, %i0) - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/popcount.asm b/gmp/mpn/sparc64/ultrasparct3/popcount.asm deleted file mode 100644 index de80f3c809..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/popcount.asm +++ /dev/null @@ -1,70 +0,0 @@ -dnl SPARC v9 mpn_popcount for T3/T4. - -dnl Contributed to the GNU project by David Miller. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 15 -C UltraSPARC T4: 2.5 - -C INPUT PARAMETERS -define(`up', `%o0') -define(`n', `%o1') -define(`pcnt', `%o5') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_popcount) - subcc n, 1, n - be L(final_one) - clr pcnt -L(top): - ldx [up + 0], %g1 - sub n, 2, n - ldx [up + 8], %o4 - add up, 16, up - popc %g1, %g2 - popc %o4, %g3 - add pcnt, %g2, pcnt - brgz n, L(top) - add pcnt, %g3, pcnt - brlz,pt n, L(done) - nop -L(final_one): - ldx [up + 0], %g1 - popc %g1, %g2 - add pcnt, %g2, pcnt -L(done): - retl - mov pcnt, %o0 -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm b/gmp/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm deleted file mode 100644 index 216ddc0ba1..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/sqr_diag_addlsh1.asm +++ /dev/null @@ -1,93 +0,0 @@ -dnl SPARC v9 mpn_sqr_dial_addlsh1 for T3/T4/T5. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: ? -C UltraSPARC T4: >= 4.5 - - -define(`rp', `%i0') -define(`tp', `%i1') -define(`up', `%i2') -define(`n', `%i3') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_sqr_diag_addlsh1) - save %sp, -176, %sp - - ldx [up+0], %g1 - mulx %g1, %g1, %o0 - umulxhi(%g1, %g1, %g2) - stx %o0, [rp+0] - - ldx [up+8], %g1 - ldx [tp+0], %g4 - ldx [tp+8], %g5 - mulx %g1, %g1, %o0 - orcc %g0, %g0, %o5 - b L(dm) - add n, -2, n - - ALIGN(16) -L(top): ldx [up+8], %g1 - addcc %g4, %o2, %o2 - addxccc(%g5, %o0, %g3) - ldx [tp+16], %g4 - ldx [tp+24], %g5 - mulx %g1, %g1, %o0 - stx %o2, [rp+8] - stx %g3, [rp+16] - add rp, 16, rp - add tp, 16, tp -L(dm): add %g2, %o5, %o2 - umulxhi(%g1, %g1, %g2) - addxccc(%g4, %g4, %g4) - addxccc(%g5, %g5, %g5) - add up, 8, up - addxc( %g0, %g0, %o5) - brnz n, L(top) - add n, -1, n - - addcc %o2, %g4, %g4 - addxccc(%o0, %g5, %g5) - stx %g4, [rp+8] - stx %g5, [rp+16] - addxc( %o5, %g2, %g2) - stx %g2, [rp+24] - - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/sub_n.asm b/gmp/mpn/sparc64/ultrasparct3/sub_n.asm deleted file mode 100644 index 0e4bc939e3..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/sub_n.asm +++ /dev/null @@ -1,144 +0,0 @@ -dnl SPARC v9 mpn_sub_n for T3/T4. - -dnl Contributed to the GNU project by David Miller. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 8 -C UltraSPARC T4: 3 - -C INPUT PARAMETERS -define(`rp', `%i0') -define(`up', `%i1') -define(`vp', `%i2') -define(`n', `%i3') -define(`cy', `%i4') - -define(`u0_off', `%l0') -define(`u1_off', `%l1') -define(`v0_off', `%l2') -define(`v1_off', `%l3') -define(`r0_off', `%l4') -define(`r1_off', `%l5') -define(`loop_n', `%l6') -define(`tmp', `%l7') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_sub_nc) - save %sp, -176, %sp - ba,pt %xcc, L(ent) - xor cy, 1, cy -EPILOGUE() -PROLOGUE(mpn_sub_n) - save %sp, -176, %sp - mov 1, cy -L(ent): - subcc n, 1, n - be L(final_one) - cmp %g0, cy - - ldx [up + 0], %o4 - sllx n, 3, tmp - - ldx [vp + 0], %o5 - add up, tmp, u0_off - - ldx [up + 8], %g5 - add vp, tmp, v0_off - - ldx [vp + 8], %g1 - add rp, tmp, r0_off - - neg tmp, loop_n - add u0_off, 8, u1_off - - add v0_off, 8, v1_off - sub loop_n, -(2 * 8), loop_n - - sub r0_off, 16, r0_off - brgez,pn loop_n, L(loop_tail) - sub r0_off, 8, r1_off - - b,a L(top) - ALIGN(16) -L(top): - xnor %o5, 0, tmp - ldx [loop_n + v0_off], %o5 - - addxccc(%o4, tmp, %g3) - ldx [loop_n + u0_off], %o4 - - xnor %g1, 0, %g1 - stx %g3, [loop_n + r0_off] - - addxccc(%g5, %g1, tmp) - ldx [loop_n + v1_off], %g1 - - ldx [loop_n + u1_off], %g5 - sub loop_n, -(2 * 8), loop_n - - brlz loop_n, L(top) - stx tmp, [loop_n + r1_off] - -L(loop_tail): - xnor %o5, 0, tmp - xnor %g1, 0, %g1 - - addxccc(%o4, tmp, %g3) - add loop_n, u0_off, up - - addxccc(%g5, %g1, %g5) - add loop_n, r0_off, rp - - stx %g3, [rp + 0] - add loop_n, v0_off, vp - - brgz,pt loop_n, L(done) - stx %g5, [rp + 8] - - add rp, (2 * 8), rp - -L(final_one): - ldx [up+0], %o4 - ldx [vp+0], %o5 - xnor %o5, %g0, %o5 - addxccc(%o4, %o5, %g3) - stx %g3, [rp+0] - -L(done): - clr %i0 - movcc %xcc, 1, %i0 - ret - restore -EPILOGUE() diff --git a/gmp/mpn/sparc64/ultrasparct3/submul_1.asm b/gmp/mpn/sparc64/ultrasparct3/submul_1.asm deleted file mode 100644 index 5635d1bdbd..0000000000 --- a/gmp/mpn/sparc64/ultrasparct3/submul_1.asm +++ /dev/null @@ -1,170 +0,0 @@ -dnl SPARC v9 mpn_submul_1 for T3/T4/T5. - -dnl Contributed to the GNU project by David Miller and Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C UltraSPARC T3: 26 -C UltraSPARC T4: 4.5 - -C INPUT PARAMETERS -define(`rp', `%i0') -define(`up', `%i1') -define(`n', `%i2') -define(`v0', `%i3') - -ASM_START() - REGISTER(%g2,#scratch) - REGISTER(%g3,#scratch) -PROLOGUE(mpn_submul_1) - save %sp, -176, %sp - ldx [up+0], %g1 - - and n, 3, %g5 - add n, -4, n - brz %g5, L(b00) - cmp %g5, 2 - bcs %xcc, L(b01) - nop - bne %xcc, L(b11) - ldx [up+8], %g4 - -L(b10): add up, 16, up - addcc %g0, 0, %g3 - mulx %g1, v0, %l4 - umulxhi(%g1, v0, %l5) - ldx [rp+0], %o2 - mulx %g4, v0, %l6 - umulxhi(%g4, v0, %l7) - brlz n, L(wd2) - nop -L(gt2): ldx [up+0], %o0 - b L(lo2) - nop - -L(b00): add rp, -16, rp - addcc %g0, 0, %g3 - ldx [up+8], %o1 - mulx %g1, v0, %l0 - umulxhi(%g1, v0, %l1) - ldx [up+16], %o0 - ldx [rp+16], %o2 - mulx %o1, v0, %l2 - umulxhi(%o1, v0, %l3) - b L(lo0) - nop - -L(b01): add up, 8, up - add rp, -8, rp - addcc %g0, 0, %g3 - ldx [rp+8], %o3 - mulx %g1, v0, %l6 - umulxhi(%g1, v0, %l7) - brlz n, L(wd1) - nop - ldx [up+0], %o0 - ldx [up+8], %o1 - mulx %o0, v0, %l0 - umulxhi(%o0, v0, %l1) - b L(lo1) - nop - -L(b11): add up, 24, up - add rp, 8, rp - addcc %g0, 0, %g3 - mulx %g1, v0, %l2 - umulxhi(%g1, v0, %l3) - ldx [up-8], %o1 - ldx [rp-8], %o3 - mulx %g4, v0, %l4 - umulxhi(%g4, v0, %l5) - brlz n, L(end) - nop - - ALIGN(16) -L(top): ldx [up+0], %o0 - addxccc(%g3, %l2, %g1) - ldx [rp+0], %o2 - addxc( %g0, %l3, %g3) - mulx %o1, v0, %l6 - subcc %o3, %g1, %g4 - umulxhi(%o1, v0, %l7) - stx %g4, [rp-8] -L(lo2): ldx [up+8], %o1 - addxccc(%g3, %l4, %g1) - ldx [rp+8], %o3 - addxc( %g0, %l5, %g3) - mulx %o0, v0, %l0 - subcc %o2, %g1, %g4 - umulxhi(%o0, v0, %l1) - stx %g4, [rp+0] -L(lo1): ldx [up+16], %o0 - addxccc(%g3, %l6, %g1) - ldx [rp+16], %o2 - addxc( %g0, %l7, %g3) - mulx %o1, v0, %l2 - subcc %o3, %g1, %g4 - umulxhi(%o1, v0, %l3) - stx %g4, [rp+8] -L(lo0): ldx [up+24], %o1 - addxccc(%g3, %l0, %g1) - ldx [rp+24], %o3 - addxc( %g0, %l1, %g3) - mulx %o0, v0, %l4 - subcc %o2, %g1, %g4 - umulxhi(%o0, v0, %l5) - stx %g4, [rp+16] - add n, -4, n - add up, 32, up - brgez n, L(top) - add rp, 32, rp - -L(end): addxccc(%g3, %l2, %g1) - ldx [rp+0], %o2 - addxc( %g0, %l3, %g3) - mulx %o1, v0, %l6 - subcc %o3, %g1, %g4 - umulxhi(%o1, v0, %l7) - stx %g4, [rp-8] -L(wd2): addxccc(%g3, %l4, %g1) - ldx [rp+8], %o3 - addxc( %g0, %l5, %g3) - subcc %o2, %g1, %g4 - stx %g4, [rp+0] -L(wd1): addxccc(%g3, %l6, %g1) - addxc( %g0, %l7, %g3) - subcc %o3, %g1, %g4 - stx %g4, [rp+8] - addxc( %g0, %g3, %i0) - ret - restore -EPILOGUE() |