summaryrefslogtreecommitdiff
path: root/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/sparc64/ultrasparct3/aormul_4.asm')
-rw-r--r--gmp/mpn/sparc64/ultrasparct3/aormul_4.asm219
1 files changed, 0 insertions, 219 deletions
diff --git a/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm b/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm
deleted file mode 100644
index 845f6d6d69..0000000000
--- a/gmp/mpn/sparc64/ultrasparct3/aormul_4.asm
+++ /dev/null
@@ -1,219 +0,0 @@
-dnl SPARC v9 mpn_mul_4 and mpn_addmul_4 for T3/T4/T5.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-
-C cycles/limb cycles/limb
-C mul_4 addmul_4
-C UltraSPARC T3: 21.5 22.0
-C UltraSPARC T4: 2.625 2.75
-
-
-C The code is well-scheduled and relies on OoO very little. There is hope that
-C this will run at around 2.5 and 2.75 c/l respectively, on T4.
-
-define(`rp', `%i0')
-define(`up', `%i1')
-define(`n', `%i2')
-define(`vp', `%i3')
-
-define(`v0', `%g1')
-define(`v1', `%o7')
-define(`v2', `%g2')
-define(`v3', `%i3')
-
-define(`w0', `%o0')
-define(`w1', `%o1')
-define(`w2', `%o2')
-define(`w3', `%o3')
-define(`w4', `%o4')
-
-define(`r0', `%o5')
-
-define(`u0', `%i4')
-define(`u1', `%i5')
-
-define(`rp0', `rp')
-define(`rp1', `%g3')
-define(`rp2', `%g4')
-define(`up0', `up')
-define(`up1', `%g5')
-
-ifdef(`OPERATION_mul_4',`
- define(`AM4', `')
- define(`ADDX', `addcc`'$1')
- define(`func', `mpn_mul_4')
-')
-ifdef(`OPERATION_addmul_4',`
- define(`AM4', `$1')
- define(`ADDX', `addxccc($1,$2,$3)')
- define(`func', `mpn_addmul_4')
-')
-
-
-MULFUNC_PROLOGUE(mpn_mul_4 mpn_addmul_4)
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-PROLOGUE(func)
- save %sp, -176, %sp
-
- ldx [up + 0], u1 C load up[0] early
- andcc n, 1, %g0 C is n odd?
- ldx [vp + 0], v0
- sllx n, 3, n
- ldx [vp + 8], v1
- add n, -28, n
- ldx [vp + 16], v2
- add rp, -16, rp
- ldx [vp + 24], v3
- add up, n, up0
- add rp, n, rp0
- add up0, 8, up1
- add rp0, 8, rp1
- add rp0, 16, rp2
- mulx u1, v0, %l0
- mov 0, w0
- mulx u1, v1, %l1
- mov 0, w1
- mulx u1, v2, %l2
- mov 0, w2
- mulx u1, v3, %l3
- mov 0, w3
-
- be L(evn)
- neg n, n
-
-L(odd): mov u1, u0
- ldx [up1 + n], u1
-AM4(` ldx [rp2 + n], r0')
- umulxhi(u0, v0, %l4)
- umulxhi(u0, v1, %l5)
- umulxhi(u0, v2, %l6)
- umulxhi(u0, v3, %l7)
- b L(mid)
- add n, 8, n
-
-L(evn): ldx [up1 + n], u0
-AM4(` ldx [rp2 + n], r0')
- umulxhi(u1, v0, %l4)
- umulxhi(u1, v1, %l5)
- umulxhi(u1, v2, %l6)
- umulxhi(u1, v3, %l7)
- add n, 16, n
-
- ALIGN(16)
-L(top): addcc %l0, w0, w0
- mulx u0, v0, %l0 C w 0
- addxccc(%l1, w1, w1)
- mulx u0, v1, %l1 C w 1
- addxccc(%l2, w2, w2)
- mulx u0, v2, %l2 C w 2
- addxccc(%l3, w3, w3)
- mulx u0, v3, %l3 C w 3
- ldx [up0 + n], u1
- addxc( %g0, %g0, w4)
-AM4(` addcc r0, w0, w0')
- stx w0, [rp0 + n]
- ADDX(` %l4, w1, w0')
- umulxhi(u0, v0, %l4) C w 1
-AM4(` ldx [rp1 + n], r0')
- addxccc(%l5, w2, w1)
- umulxhi(u0, v1, %l5) C w 2
- addxccc(%l6, w3, w2)
- umulxhi(u0, v2, %l6) C w 3
- addxc( %l7, w4, w3)
- umulxhi(u0, v3, %l7) C w 4
-L(mid): addcc %l0, w0, w0
- mulx u1, v0, %l0 C w 1
- addxccc(%l1, w1, w1)
- mulx u1, v1, %l1 C w 2
- addxccc(%l2, w2, w2)
- mulx u1, v2, %l2 C w 3
- addxccc(%l3, w3, w3)
- mulx u1, v3, %l3 C w 4
- ldx [up1 + n], u0
- addxc( %g0, %g0, w4)
-AM4(` addcc r0, w0, w0')
- stx w0, [rp1 + n]
- ADDX(` %l4, w1, w0')
- umulxhi(u1, v0, %l4) C w 2
-AM4(` ldx [rp2 + n], r0')
- addxccc(%l5, w2, w1)
- umulxhi(u1, v1, %l5) C w 3
- addxccc(%l6, w3, w2)
- umulxhi(u1, v2, %l6) C w 4
- addxc( %l7, w4, w3)
- umulxhi(u1, v3, %l7) C w 5
- brlz n, L(top)
- add n, 16, n
-
-L(end): addcc %l0, w0, w0
- mulx u0, v0, %l0
- addxccc(%l1, w1, w1)
- mulx u0, v1, %l1
- addxccc(%l2, w2, w2)
- mulx u0, v2, %l2
- addxccc(%l3, w3, w3)
- mulx u0, v3, %l3
- addxc( %g0, %g0, w4)
-AM4(` addcc r0, w0, w0')
- stx w0, [rp0 + n]
- ADDX(` %l4, w1, w0')
- umulxhi(u0, v0, %l4)
-AM4(` ldx [rp1 + n], r0')
- addxccc(%l5, w2, w1)
- umulxhi(u0, v1, %l5)
- addxccc(%l6, w3, w2)
- umulxhi(u0, v2, %l6)
- addxc( %l7, w4, w3)
- umulxhi(u0, v3, %l7)
- addcc %l0, w0, w0
- addxccc(%l1, w1, w1)
- addxccc(%l2, w2, w2)
- addxccc(%l3, w3, w3)
- addxc( %g0, %g0, w4)
-AM4(` addcc r0, w0, w0')
- stx w0, [rp1 + n]
- ADDX(` %l4, w1, w0')
- addxccc(%l5, w2, w1)
- addxccc(%l6, w3, w2)
- stx w0, [rp2 + n]
- add n, 16, n
- stx w1, [rp1 + n]
- stx w2, [rp2 + n]
- addxc( %l7, w4, %i0)
- ret
- restore
-EPILOGUE()