summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Ryde <user42@zip.com.au>2003-11-19 00:19:54 +0100
committerKevin Ryde <user42@zip.com.au>2003-11-19 00:19:54 +0100
commitc8ebc7f80aab12736a8bab4062d9a7ed364317f2 (patch)
tree6eed63f37476642114d9cd95419a735f9a62784b
parentdb0c6d84301bee0cb8dddf5984b8d668aff88aa2 (diff)
downloadgmp-c8ebc7f80aab12736a8bab4062d9a7ed364317f2.tar.gz
* mpn/powerpc32/750/lshift.asm, mpn/powerpc32/750/rshift.asm: New
files.
-rw-r--r--mpn/powerpc32/750/lshift.asm146
-rw-r--r--mpn/powerpc32/750/rshift.asm144
2 files changed, 290 insertions, 0 deletions
diff --git a/mpn/powerpc32/750/lshift.asm b/mpn/powerpc32/750/lshift.asm
new file mode 100644
index 000000000..df7e541a0
--- /dev/null
+++ b/mpn/powerpc32/750/lshift.asm
@@ -0,0 +1,146 @@
+dnl PowerPC 750 mpn_lshift -- mpn left shift.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 750: 3.0
+C 7400: 3.0
+
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+
+ C r3 dst
+ C r4 src
+ C r5 size
+ C r6 shift
+
+ mtctr r5 C size
+ slwi r5, r5, 2 C 4*size
+
+ subfic r7, r6, 32 C 32-shift
+ add r4, r4, r5 C &src[size]
+
+ add r5, r3, r5 C &dst[size]
+ lwz r8, -4(r4) C src[size-1]
+ bdz L(one)
+
+ lwzu r9, -8(r4) C src[size-2]
+
+ srw r3, r8, r7 C return value
+ slw r8, r8, r6 C src[size-1] << shift
+ bdz L(two)
+
+
+L(top):
+ C r3 return value
+ C r4 src, incrementing
+ C r5 dst, incrementing
+ C r6 lshift
+ C r7 32-shift
+ C r8 src[i+1] << shift
+ C r9 src[i]
+ C r10
+
+ lwzu r10, -4(r4)
+ srw r11, r9, r7
+
+ or r8, r8, r11
+ stwu r8, -4(r5)
+
+ slw r8, r9, r6
+ bdz L(odd)
+
+ C r8 src[i+1] << shift
+ C r9
+ C r10 src[i]
+
+ lwzu r9, -4(r4)
+ srw r11, r10, r7
+
+ or r8, r8, r11
+ stwu r8, -4(r5)
+
+ slw r8, r10, r6
+ bdnz L(top)
+
+
+L(two):
+ C r3 return value
+ C r4
+ C r5 &dst[2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[1] << shift
+ C r9 src[0]
+ C r10
+
+ srw r11, r9, r7
+ slw r12, r9, r6 C src[0] << shift
+
+ or r8, r8, r11
+ stw r12, -8(r5) C dst[0]
+
+ stw r8, -4(r5) C dst[1]
+ blr
+
+
+L(odd):
+ C r3 return value
+ C r4
+ C r5 &dst[2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[1] << shift
+ C r9
+ C r10 src[0]
+
+ srw r11, r10, r7
+ slw r12, r10, r6
+
+ or r8, r8, r11
+ stw r12, -8(r5) C dst[0]
+
+ stw r8, -4(r5) C dst[1]
+ blr
+
+
+L(one):
+ C r5 &dst[1]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[0]
+
+ srw r3, r8, r7 C return value
+ slw r8, r8, r6 C src[size-1] << shift
+
+ stw r8, -4(r5) C dst[0]
+ blr
+
+EPILOGUE(mpn_lshift)
diff --git a/mpn/powerpc32/750/rshift.asm b/mpn/powerpc32/750/rshift.asm
new file mode 100644
index 000000000..393ebcb5b
--- /dev/null
+++ b/mpn/powerpc32/750/rshift.asm
@@ -0,0 +1,144 @@
+dnl PowerPC 750 mpn_rshift -- mpn right shift.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 2.1 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with the GNU MP Library; see the file COPYING.LIB. If
+dnl not, write to the Free Software Foundation, Inc., 59 Temple Place -
+dnl Suite 330, Boston, MA 02111-1307, USA.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C 750: 3.0
+C 7400: 3.0
+
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned shift);
+C
+C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but
+C smaller and saving about 30 or so cycles of overhead.
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+
+ C r3 dst
+ C r4 src
+ C r5 size
+ C r6 shift
+
+ mtctr r5 C size
+ lwz r8, 0(r4) C src[0]
+
+ subfic r7, r6, 32 C 32-shift
+ addi r5, r3, -4 C dst-4
+
+ slw r3, r8, r7 C return value
+ bdz L(one)
+
+ lwzu r9, 4(r4) C src[1]
+ srw r8, r8, r6 C src[0] >> shift
+ bdz L(two)
+
+
+L(top):
+ C r3 return value
+ C r4 src, incrementing
+ C r5 dst, incrementing
+ C r6 shift
+ C r7 32-shift
+ C r8 src[i-1] >> shift
+ C r9 src[i]
+ C r10
+
+ lwzu r10, 4(r4)
+ slw r11, r9, r7
+
+ or r8, r8, r11
+ stwu r8, 4(r5)
+
+ srw r8, r9, r6
+ bdz L(odd)
+
+ C r8 src[i-1] >> shift
+ C r9
+ C r10 src[i]
+
+ lwzu r9, 4(r4)
+ slw r11, r10, r7
+
+ or r8, r8, r11
+ stwu r8, 4(r5)
+
+ srw r8, r10, r6
+ bdnz L(top)
+
+
+L(two):
+ C r3 return value
+ C r4
+ C r5 &dst[size-2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[size-2] >> shift
+ C r9 src[size-1]
+ C r10
+
+ slw r11, r9, r7
+ srw r12, r9, r6 C src[size-1] >> shift
+
+ or r8, r8, r11
+ stw r12, 8(r5) C dst[size-1]
+
+ stw r8, 4(r5) C dst[size-2]
+ blr
+
+
+L(odd):
+ C r3 return value
+ C r4
+ C r5 &dst[size-2]
+ C r6 shift
+ C r7 32-shift
+ C r8 src[size-2] >> shift
+ C r9
+ C r10 src[size-1]
+
+ slw r11, r10, r7
+ srw r12, r10, r6
+
+ or r8, r8, r11
+ stw r12, 8(r5) C dst[size-1]
+
+ stw r8, 4(r5) C dst[size-2]
+ blr
+
+
+L(one):
+ C r3 return value
+ C r4
+ C r5 dst-4
+ C r6 shift
+ C r7
+ C r8 src[0]
+
+ srw r8, r8, r6
+
+ stw r8, 4(r5) C dst[0]
+ blr
+
+EPILOGUE(mpn_rshift)