summaryrefslogtreecommitdiff
path: root/sysdeps/sparc/sparc32/mul_1.S
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-11-13 12:32:17 +0000
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2019-11-27 09:37:57 -0300
commit5d9b7b9fa734c5381e0295c85c0e40520d9f6063 (patch)
treed8f050c206fdbe03167d31f8ff3916e2d52d8dc1 /sysdeps/sparc/sparc32/mul_1.S
parentbfdb731438206b0f70fe7afa890681155c30b419 (diff)
downloadglibc-5d9b7b9fa734c5381e0295c85c0e40520d9f6063.tar.gz
Remove 32 bit sparc v7 support
The patch is straighforward: - The sparc32 v8 implementations are moved as the generic ones. - A configure test is added to check for either __sparc_v8__ or __sparc_v9__. - The triple names are simplified and sparc implies sparcv8. The idea is to keep support on sparcv8 architectures that does support CAS instructions, such as LEON3/LEON4. Checked on a sparcv9-linux-gnu and sparc64-linux-gnu. Tested-by: Andreas Larsson <andreas@gaisler.com>
Diffstat (limited to 'sysdeps/sparc/sparc32/mul_1.S')
-rw-r--r--sysdeps/sparc/sparc32/mul_1.S244
1 files changed, 74 insertions, 170 deletions
diff --git a/sysdeps/sparc/sparc32/mul_1.S b/sysdeps/sparc/sparc32/mul_1.S
index acce57ec12..86619c71d6 100644
--- a/sysdeps/sparc/sparc32/mul_1.S
+++ b/sysdeps/sparc/sparc32/mul_1.S
@@ -1,198 +1,102 @@
-! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
-! the result in a second limb vector.
-!
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
! Copyright (C) 1992-2019 Free Software Foundation, Inc.
-!
+
! This file is part of the GNU MP Library.
-!
+
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Lesser General Public License as published by
! the Free Software Foundation; either version 2.1 of the License, or (at your
! option) any later version.
-!
+
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
! License for more details.
-!
+
! You should have received a copy of the GNU Lesser General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not,
! see <https://www.gnu.org/licenses/>.
! INPUT PARAMETERS
-! RES_PTR o0
-! S1_PTR o1
-! SIZE o2
-! S2_LIMB o3
-
-! ADD CODE FOR SMALL MULTIPLIERS!
-!1: ld
-! st
-!
-!2: ld ,a
-! addxcc a,a,x
-! st x,
-!
-!3_unrolled:
-! ld ,a
-! addxcc a,a,x1 ! 2a + cy
-! addx %g0,%g0,x2
-! addcc a,x1,x ! 3a + c
-! st x,
-!
-! ld ,a
-! addxcc a,a,y1
-! addx %g0,%g0,y2
-! addcc a,y1,x
-! st x,
-!
-!4_unrolled:
-! ld ,a
-! srl a,2,x1 ! 4a
-! addxcc y2,x1,x
-! sll a,30,x2
-! st x,
-!
-! ld ,a
-! srl a,2,y1
-! addxcc x2,y1,y
-! sll a,30,y2
-! st x,
-!
-!5_unrolled:
-! ld ,a
-! srl a,2,x1 ! 4a
-! addxcc a,x1,x ! 5a + c
-! sll a,30,x2
-! addx %g0,x2,x2
-! st x,
-!
-! ld ,a
-! srl a,2,y1
-! addxcc a,y1,x
-! sll a,30,y2
-! addx %g0,y2,y2
-! st x,
-!
-!8_unrolled:
-! ld ,a
-! srl a,3,x1 ! 8a
-! addxcc y2,x1,x
-! sll a,29,x2
-! st x,
-!
-! ld ,a
-! srl a,3,y1
-! addxcc x2,y1,y
-! sll a,29,y2
-! st x,
+! res_ptr o0
+! s1_ptr o1
+! size o2
+! s2_limb o3
#include <sysdep.h>
ENTRY(__mpn_mul_1)
- ! Make S1_PTR and RES_PTR point at the end of their blocks
- ! and put (- 4 x SIZE) in index/loop counter.
- sll %o2,2,%o2
- add %o0,%o2,%o4 ! RES_PTR in o4 since o0 is retval
- add %o1,%o2,%o1
- sub %g0,%o2,%o2
+ sll %o2,4,%g1
+ mov %o7,%g4 ! Save return address register
+ and %g1,(4-1)<<4,%g1
+1: call 2f
+ add %o7,3f-1b,%g3
+2: mov %g4,%o7 ! Restore return address register
+ jmp %g3+%g1
+ ld [%o1+0],%o4 ! 1
- cmp %o3,0xfff
- bgu LOC(large)
+ .align 4
+3:
+LOC(00):
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ b LOC(loop00) /* 4, 8, 12, ... */
+ orcc %g0,%g0,%g2
+LOC(01):
+ b LOC(loop01) /* 1, 5, 9, ... */
+ orcc %g0,%g0,%g2
nop
+ nop
+LOC(10):
+ add %o0,-12,%o0 /* 2, 6, 10, ... */
+ add %o1,4,%o1
+ b LOC(loop10)
+ orcc %g0,%g0,%g2
+ nop
+LOC(11):
+ add %o0,-8,%o0 /* 3, 7, 11, ... */
+ add %o1,-8,%o1
+ b LOC(loop11)
+ orcc %g0,%g0,%g2
- ld [%o1+%o2],%o5
- mov 0,%o0
- b LOC(0)
- add %o4,-4,%o4
-LOC(loop0):
- st %g1,[%o4+%o2]
-LOC(0): wr %g0,%o3,%y
- sra %o5,31,%g2
- and %o3,%g2,%g2
- andcc %g1,0,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,%o5,%g1
- mulscc %g1,0,%g1
- sra %g1,20,%g4
- sll %g1,12,%g1
- rd %y,%g3
- srl %g3,20,%g3
- or %g1,%g3,%g1
-
- addcc %g1,%o0,%g1
- addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
- addcc %o2,4,%o2 ! loop counter
- bne,a LOC(loop0)
- ld [%o1+%o2],%o5
-
- retl
- st %g1,[%o4+%o2]
-
-
-LOC(large):
- ld [%o1+%o2],%o5
- mov 0,%o0
- sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b LOC(1)
- add %o4,-4,%o4
LOC(loop):
- st %g3,[%o4+%o2]
-LOC(1): wr %g0,%o5,%y
- and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
- andcc %g0,%g0,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%o3,%g1
- mulscc %g1,%g0,%g1
- rd %y,%g3
- addcc %g3,%o0,%g3
- addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
- addcc %o2,4,%o2 ! loop counter
- bne,a LOC(loop)
- ld [%o1+%o2],%o5
+ addcc %g3,%g2,%g3 ! 1
+ ld [%o1+4],%o4 ! 2
+ st %g3,[%o0+0] ! 1
+ rd %y,%g2 ! 1
+LOC(loop00):
+ umul %o4,%o3,%g3 ! 2
+ addxcc %g3,%g2,%g3 ! 2
+ ld [%o1+8],%o4 ! 3
+ st %g3,[%o0+4] ! 2
+ rd %y,%g2 ! 2
+LOC(loop11):
+ umul %o4,%o3,%g3 ! 3
+ addxcc %g3,%g2,%g3 ! 3
+ ld [%o1+12],%o4 ! 4
+ add %o1,16,%o1
+ st %g3,[%o0+8] ! 3
+ rd %y,%g2 ! 3
+LOC(loop10):
+ umul %o4,%o3,%g3 ! 4
+ addxcc %g3,%g2,%g3 ! 4
+ ld [%o1+0],%o4 ! 1
+ st %g3,[%o0+12] ! 4
+ add %o0,16,%o0
+ rd %y,%g2 ! 4
+ addx %g0,%g2,%g2
+LOC(loop01):
+ addcc %o2,-4,%o2
+ bg LOC(loop)
+ umul %o4,%o3,%g3 ! 1
+ addcc %g3,%g2,%g3 ! 4
+ st %g3,[%o0+0] ! 4
+ rd %y,%g2 ! 4
retl
- st %g3,[%o4+%o2]
+ addx %g0,%g2,%o0
END(__mpn_mul_1)