summaryrefslogtreecommitdiff
path: root/mpn/pa64
diff options
context:
space:
mode:
authortege <tege@gmplib.org>1997-09-07 04:16:42 +0200
committertege <tege@gmplib.org>1997-09-07 04:16:42 +0200
commit8cea4ee78e9981429e3d3e95c9169dac17d55b23 (patch)
treea1f04a6a6ea044476a7fde890e9337d58b44a507 /mpn/pa64
parent630a40df269c638a322a9b5f5a084c2efe966310 (diff)
downloadgmp-8cea4ee78e9981429e3d3e95c9169dac17d55b23.tar.gz
New files optimized for PA8000.
Diffstat (limited to 'mpn/pa64')
-rw-r--r--mpn/pa64/add_n.s90
-rw-r--r--mpn/pa64/lshift.s103
-rw-r--r--mpn/pa64/rshift.s100
-rw-r--r--mpn/pa64/sub_n.s90
4 files changed, 383 insertions, 0 deletions
diff --git a/mpn/pa64/add_n.s b/mpn/pa64/add_n.s
new file mode 100644
index 000000000..0bb1ab051
--- /dev/null
+++ b/mpn/pa64/add_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __mpn_add_n -- Add two limb vectors of the same length > 0 and
+; store sum in a third limb vector.
+
+; Copyright (C) 1997 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __mpn_add_n
+__mpn_add_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ sub %r26,%r22,%r26 ; offset res_ptr
+ blr %r28,%r0 ; branch into loop
+ add %r0,%r0,%r0 ; reset carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ add,dc %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ add,dc %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ bve (%r2)
+ .exit
+ ldi 0,%r28
+ .procend
diff --git a/mpn/pa64/lshift.s b/mpn/pa64/lshift.s
new file mode 100644
index 000000000..eaefec8c2
--- /dev/null
+++ b/mpn/pa64/lshift.s
@@ -0,0 +1,103 @@
+; HP-PA 2.0 __mpn_lshift --
+
+; Copyright (C) 1997 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __mpn_lshift
+__mpn_lshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ shladd %r24,3,%r25,%r25
+ shladd %r24,3,%r26,%r26
+ subi 64,%r23,%r23
+ mtsar %r23
+ ldd -8(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r0,%r21,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ add %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ add %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd -16(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-8(%r26)
+L$7 ldd -24(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-16(%r26)
+L$6 ldd -32(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-24(%r26)
+L$5 ldd -40(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-32(%r26)
+L$4 ldd -48(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-40(%r26)
+L$3 ldd -56(%r25),%r21
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-48(%r26)
+L$2 ldd -64(%r25),%r20
+ shrpd %r21,%r20,%sar,%r21
+ std %r21,-56(%r26)
+L$1 ldd -72(%r25),%r21
+ ldo -64(%r25),%r25
+ shrpd %r20,%r21,%sar,%r20
+ std %r20,-64(%r26)
+ addib,> -8,%r24,L$loop
+ ldo -64(%r26),%r26
+
+L$end shrpd %r21,%r0,%sar,%r21
+ std %r21,-8(%r26)
+ bve (%r2)
+ .exit
+ extrd,u %r29,31,32,%r28
+ .procend
diff --git a/mpn/pa64/rshift.s b/mpn/pa64/rshift.s
new file mode 100644
index 000000000..5ff956b4c
--- /dev/null
+++ b/mpn/pa64/rshift.s
@@ -0,0 +1,100 @@
+; HP-PA 2.0 __mpn_rshift --
+
+; Copyright (C) 1997 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; size gr24
+; cnt gr23
+
+; This runs at 1.5 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __mpn_rshift
+__mpn_rshift
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ mtsar %r23
+ ldd 0(%r25),%r21
+ addib,= -1,%r24,L$end
+ shrpd %r21,%r0,%sar,%r29 ; compute carry out limb
+ depw,z %r24,31,3,%r28 ; r28 = (size & 7)
+ sub %r0,%r24,%r22
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ blr %r28,%r0 ; branch into jump table
+ sub %r26,%r22,%r26 ; offset res_ptr
+ b L$0
+ nop
+ b L$1
+ copy %r21,%r20
+ b L$2
+ nop
+ b L$3
+ copy %r21,%r20
+ b L$4
+ nop
+ b L$5
+ copy %r21,%r20
+ b L$6
+ nop
+ b L$7
+ copy %r21,%r20
+
+L$loop
+L$0 ldd 8(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,0(%r26)
+L$7 ldd 16(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,8(%r26)
+L$6 ldd 24(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,16(%r26)
+L$5 ldd 32(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,24(%r26)
+L$4 ldd 40(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,32(%r26)
+L$3 ldd 48(%r25),%r21
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,40(%r26)
+L$2 ldd 56(%r25),%r20
+ shrpd %r20,%r21,%sar,%r21
+ std %r21,48(%r26)
+L$1 ldd 64(%r25),%r21
+ ldo 64(%r25),%r25
+ shrpd %r21,%r20,%sar,%r20
+ std %r20,56(%r26)
+ addib,> -8,%r24,L$loop
+ ldo 64(%r26),%r26
+
+L$end shrpd %r0,%r21,%sar,%r21
+ std %r21,0(%r26)
+ bve (%r2)
+ .exit
+ extrd,u %r29,31,32,%r28
+ .procend
diff --git a/mpn/pa64/sub_n.s b/mpn/pa64/sub_n.s
new file mode 100644
index 000000000..23a0462d0
--- /dev/null
+++ b/mpn/pa64/sub_n.s
@@ -0,0 +1,90 @@
+; HP-PA 2.0 __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+; and store difference in a third limb vector.
+
+; Copyright (C) 1997 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr gr26
+; s1_ptr gr25
+; s2_ptr gr24
+; size gr23
+
+; This runs at 2 cycles/limb on PA8000.
+
+ .level 2.0n
+ .code
+ .export __mpn_sub_n
+__mpn_sub_n
+ .proc
+ .callinfo frame=0,args_saved
+ .entry
+
+ sub %r0,%r23,%r22
+ depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7)
+ depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7)
+ sub %r25,%r22,%r25 ; offset s1_ptr
+ sub %r24,%r22,%r24 ; offset s2_ptr
+ blr %r28,%r0 ; branch into loop
+ sub %r26,%r22,%r26 ; offset res_ptr and set carry
+
+L$loop ldd 0(%r25),%r20
+ ldd 0(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,0(%r26)
+L$7 ldd 8(%r25),%r21
+ ldd 8(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,8(%r26)
+L$6 ldd 16(%r25),%r20
+ ldd 16(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,16(%r26)
+L$5 ldd 24(%r25),%r21
+ ldd 24(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,24(%r26)
+L$4 ldd 32(%r25),%r20
+ ldd 32(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,32(%r26)
+L$3 ldd 40(%r25),%r21
+ ldd 40(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,40(%r26)
+L$2 ldd 48(%r25),%r20
+ ldd 48(%r24),%r31
+ sub,db %r20,%r31,%r20
+ std %r20,48(%r26)
+L$1 ldd 56(%r25),%r21
+ ldo 64(%r25),%r25
+ ldd 56(%r24),%r19
+ sub,db %r21,%r19,%r21
+ std %r21,56(%r26)
+ ldo 64(%r24),%r24
+ addib,> -8,%r23,L$loop
+ ldo 64(%r26),%r26
+
+ add,dc %r0,%r0,%r29
+ subi 1,%r29,%r29
+ bve (%r2)
+ .exit
+ ldi 0,%r28
+ .procend