diff options
author | tege <tege@gmplib.org> | 1997-09-07 04:16:42 +0200 |
---|---|---|
committer | tege <tege@gmplib.org> | 1997-09-07 04:16:42 +0200 |
commit | 8cea4ee78e9981429e3d3e95c9169dac17d55b23 (patch) | |
tree | a1f04a6a6ea044476a7fde890e9337d58b44a507 /mpn/pa64 | |
parent | 630a40df269c638a322a9b5f5a084c2efe966310 (diff) | |
download | gmp-8cea4ee78e9981429e3d3e95c9169dac17d55b23.tar.gz |
New files optimized for PA8000.
Diffstat (limited to 'mpn/pa64')
-rw-r--r-- | mpn/pa64/add_n.s | 90 | ||||
-rw-r--r-- | mpn/pa64/lshift.s | 103 | ||||
-rw-r--r-- | mpn/pa64/rshift.s | 100 | ||||
-rw-r--r-- | mpn/pa64/sub_n.s | 90 |
4 files changed, 383 insertions, 0 deletions
diff --git a/mpn/pa64/add_n.s b/mpn/pa64/add_n.s new file mode 100644 index 000000000..0bb1ab051 --- /dev/null +++ b/mpn/pa64/add_n.s @@ -0,0 +1,90 @@ +; HP-PA 2.0 __mpn_add_n -- Add two limb vectors of the same length > 0 and +; store sum in a third limb vector. + +; Copyright (C) 1997 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .level 2.0n + .code + .export __mpn_add_n +__mpn_add_n + .proc + .callinfo frame=0,args_saved + .entry + + sub %r0,%r23,%r22 + depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + sub %r26,%r22,%r26 ; offset res_ptr + blr %r28,%r0 ; branch into loop + add %r0,%r0,%r0 ; reset carry + +L$loop ldd 0(%r25),%r20 + ldd 0(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,0(%r26) +L$7 ldd 8(%r25),%r21 + ldd 8(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,8(%r26) +L$6 ldd 16(%r25),%r20 + ldd 16(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,16(%r26) +L$5 ldd 24(%r25),%r21 + ldd 24(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,24(%r26) +L$4 ldd 32(%r25),%r20 + ldd 32(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,32(%r26) +L$3 ldd 40(%r25),%r21 + ldd 40(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,40(%r26) +L$2 ldd 48(%r25),%r20 + ldd 48(%r24),%r31 + add,dc %r20,%r31,%r20 + std %r20,48(%r26) +L$1 ldd 56(%r25),%r21 + ldo 64(%r25),%r25 + ldd 56(%r24),%r19 + add,dc %r21,%r19,%r21 + std %r21,56(%r26) + ldo 64(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 64(%r26),%r26 + + add,dc %r0,%r0,%r29 + bve (%r2) + .exit + ldi 0,%r28 + .procend diff --git a/mpn/pa64/lshift.s b/mpn/pa64/lshift.s new file mode 100644 index 000000000..eaefec8c2 --- /dev/null +++ b/mpn/pa64/lshift.s @@ -0,0 +1,103 @@ +; HP-PA 2.0 __mpn_lshift -- + +; Copyright (C) 1997 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; size gr24 +; cnt gr23 + +; This runs at 1.5 cycles/limb on PA8000. + + .level 2.0n + .code + .export __mpn_lshift +__mpn_lshift + .proc + .callinfo frame=0,args_saved + .entry + + shladd %r24,3,%r25,%r25 + shladd %r24,3,%r26,%r26 + subi 64,%r23,%r23 + mtsar %r23 + ldd -8(%r25),%r21 + addib,= -1,%r24,L$end + shrpd %r0,%r21,%sar,%r29 ; compute carry out limb + depw,z %r24,31,3,%r28 ; r28 = (size & 7) + sub %r0,%r24,%r22 + depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7) + add %r25,%r22,%r25 ; offset s1_ptr + blr %r28,%r0 ; branch into jump table + add %r26,%r22,%r26 ; offset res_ptr + b L$0 + nop + b L$1 + copy %r21,%r20 + b L$2 + nop + b L$3 + copy %r21,%r20 + b L$4 + nop + b L$5 + copy %r21,%r20 + b L$6 + nop + b L$7 + copy %r21,%r20 + +L$loop +L$0 ldd -16(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-8(%r26) +L$7 ldd -24(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-16(%r26) +L$6 ldd -32(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-24(%r26) +L$5 ldd -40(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-32(%r26) +L$4 ldd -48(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-40(%r26) +L$3 ldd -56(%r25),%r21 + shrpd %r20,%r21,%sar,%r20 + std %r20,-48(%r26) +L$2 ldd -64(%r25),%r20 + shrpd %r21,%r20,%sar,%r21 + std %r21,-56(%r26) +L$1 ldd -72(%r25),%r21 + ldo -64(%r25),%r25 + shrpd %r20,%r21,%sar,%r20 + std %r20,-64(%r26) + addib,> -8,%r24,L$loop + ldo -64(%r26),%r26 + +L$end shrpd %r21,%r0,%sar,%r21 + std %r21,-8(%r26) + bve (%r2) + .exit + extrd,u %r29,31,32,%r28 + .procend diff --git a/mpn/pa64/rshift.s b/mpn/pa64/rshift.s new file mode 100644 index 000000000..5ff956b4c --- /dev/null +++ b/mpn/pa64/rshift.s @@ -0,0 +1,100 @@ +; HP-PA 2.0 __mpn_rshift -- + +; Copyright (C) 1997 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; size gr24 +; cnt gr23 + +; This runs at 1.5 cycles/limb on PA8000. + + .level 2.0n + .code + .export __mpn_rshift +__mpn_rshift + .proc + .callinfo frame=0,args_saved + .entry + + mtsar %r23 + ldd 0(%r25),%r21 + addib,= -1,%r24,L$end + shrpd %r21,%r0,%sar,%r29 ; compute carry out limb + depw,z %r24,31,3,%r28 ; r28 = (size & 7) + sub %r0,%r24,%r22 + depw,z %r22,28,3,%r22 ; r22 = 8 * (-size & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + blr %r28,%r0 ; branch into jump table + sub %r26,%r22,%r26 ; offset res_ptr + b L$0 + nop + b L$1 + copy %r21,%r20 + b L$2 + nop + b L$3 + copy %r21,%r20 + b L$4 + nop + b L$5 + copy %r21,%r20 + b L$6 + nop + b L$7 + copy %r21,%r20 + +L$loop +L$0 ldd 8(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,0(%r26) +L$7 ldd 16(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,8(%r26) +L$6 ldd 24(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,16(%r26) +L$5 ldd 32(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,24(%r26) +L$4 ldd 40(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,32(%r26) +L$3 ldd 48(%r25),%r21 + shrpd %r21,%r20,%sar,%r20 + std %r20,40(%r26) +L$2 ldd 56(%r25),%r20 + shrpd %r20,%r21,%sar,%r21 + std %r21,48(%r26) +L$1 ldd 64(%r25),%r21 + ldo 64(%r25),%r25 + shrpd %r21,%r20,%sar,%r20 + std %r20,56(%r26) + addib,> -8,%r24,L$loop + ldo 64(%r26),%r26 + +L$end shrpd %r0,%r21,%sar,%r21 + std %r21,0(%r26) + bve (%r2) + .exit + extrd,u %r29,31,32,%r28 + .procend diff --git a/mpn/pa64/sub_n.s b/mpn/pa64/sub_n.s new file mode 100644 index 000000000..23a0462d0 --- /dev/null +++ b/mpn/pa64/sub_n.s @@ -0,0 +1,90 @@ +; HP-PA 2.0 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 +; and store difference in a third limb vector. + +; Copyright (C) 1997 Free Software Foundation, Inc. + +; This file is part of the GNU MP Library. + +; The GNU MP Library is free software; you can redistribute it and/or modify +; it under the terms of the GNU Library General Public License as published by +; the Free Software Foundation; either version 2 of the License, or (at your +; option) any later version. + +; The GNU MP Library is distributed in the hope that it will be useful, but +; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +; License for more details. + +; You should have received a copy of the GNU Library General Public License +; along with the GNU MP Library; see the file COPYING.LIB. If not, write to +; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +; MA 02111-1307, USA. + + +; INPUT PARAMETERS +; res_ptr gr26 +; s1_ptr gr25 +; s2_ptr gr24 +; size gr23 + +; This runs at 2 cycles/limb on PA8000. + + .level 2.0n + .code + .export __mpn_sub_n +__mpn_sub_n + .proc + .callinfo frame=0,args_saved + .entry + + sub %r0,%r23,%r22 + depw,z %r22,30,3,%r28 ; r28 = 2 * (-n & 7) + depw,z %r22,28,3,%r22 ; r22 = 8 * (-n & 7) + sub %r25,%r22,%r25 ; offset s1_ptr + sub %r24,%r22,%r24 ; offset s2_ptr + blr %r28,%r0 ; branch into loop + sub %r26,%r22,%r26 ; offset res_ptr and set carry + +L$loop ldd 0(%r25),%r20 + ldd 0(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,0(%r26) +L$7 ldd 8(%r25),%r21 + ldd 8(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,8(%r26) +L$6 ldd 16(%r25),%r20 + ldd 16(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,16(%r26) +L$5 ldd 24(%r25),%r21 + ldd 24(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,24(%r26) +L$4 ldd 32(%r25),%r20 + ldd 32(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,32(%r26) +L$3 ldd 40(%r25),%r21 + ldd 40(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,40(%r26) +L$2 ldd 48(%r25),%r20 + ldd 48(%r24),%r31 + sub,db %r20,%r31,%r20 + std %r20,48(%r26) +L$1 ldd 56(%r25),%r21 + ldo 64(%r25),%r25 + ldd 56(%r24),%r19 + sub,db %r21,%r19,%r21 + std %r21,56(%r26) + ldo 64(%r24),%r24 + addib,> -8,%r23,L$loop + ldo 64(%r26),%r26 + + add,dc %r0,%r0,%r29 + subi 1,%r29,%r29 + bve (%r2) + .exit + ldi 0,%r28 + .procend |