diff options
author | Kevin Ryde <user42@zip.com.au> | 2003-11-19 00:19:54 +0100 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2003-11-19 00:19:54 +0100 |
commit | c8ebc7f80aab12736a8bab4062d9a7ed364317f2 (patch) | |
tree | 6eed63f37476642114d9cd95419a735f9a62784b | |
parent | db0c6d84301bee0cb8dddf5984b8d668aff88aa2 (diff) | |
download | gmp-c8ebc7f80aab12736a8bab4062d9a7ed364317f2.tar.gz |
* mpn/powerpc32/750/lshift.asm, mpn/powerpc32/750/rshift.asm: New
files.
-rw-r--r-- | mpn/powerpc32/750/lshift.asm | 146 | ||||
-rw-r--r-- | mpn/powerpc32/750/rshift.asm | 144 |
2 files changed, 290 insertions, 0 deletions
diff --git a/mpn/powerpc32/750/lshift.asm b/mpn/powerpc32/750/lshift.asm new file mode 100644 index 000000000..df7e541a0 --- /dev/null +++ b/mpn/powerpc32/750/lshift.asm @@ -0,0 +1,146 @@ +dnl PowerPC 750 mpn_lshift -- mpn left shift. + +dnl Copyright 2002, 2003 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + +include(`../config.m4') + + +C cycles/limb +C 750: 3.0 +C 7400: 3.0 + + +C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but +C smaller and saving about 30 or so cycles of overhead. + +ASM_START() +PROLOGUE(mpn_lshift) + + C r3 dst + C r4 src + C r5 size + C r6 shift + + mtctr r5 C size + slwi r5, r5, 2 C 4*size + + subfic r7, r6, 32 C 32-shift + add r4, r4, r5 C &src[size] + + add r5, r3, r5 C &dst[size] + lwz r8, -4(r4) C src[size-1] + bdz L(one) + + lwzu r9, -8(r4) C src[size-2] + + srw r3, r8, r7 C return value + slw r8, r8, r6 C src[size-1] << shift + bdz L(two) + + +L(top): + C r3 return value + C r4 src, incrementing + C r5 dst, incrementing + C r6 lshift + C r7 32-shift + C r8 src[i+1] << shift + C r9 src[i] + C r10 + + lwzu r10, -4(r4) + srw r11, r9, r7 + + or r8, r8, r11 + stwu r8, -4(r5) + + slw r8, r9, r6 + bdz L(odd) + + C r8 src[i+1] << shift + C r9 + C r10 src[i] + + lwzu r9, -4(r4) + srw r11, r10, r7 + + or r8, r8, r11 + stwu r8, -4(r5) + + slw r8, r10, r6 + bdnz L(top) + + +L(two): + C r3 return value + C r4 + C r5 &dst[2] + C r6 shift + C r7 32-shift + C r8 src[1] << shift + C r9 src[0] + C r10 + + srw r11, r9, r7 + slw r12, r9, r6 C src[0] << shift + + or r8, r8, r11 + stw r12, -8(r5) C dst[0] + + stw r8, -4(r5) C dst[1] + blr + + +L(odd): + C r3 return value + C r4 + C r5 &dst[2] + C r6 shift + C r7 32-shift + C r8 src[1] << shift + C r9 + C r10 src[0] + + srw r11, r10, r7 + slw r12, r10, r6 + + or r8, r8, r11 + stw r12, -8(r5) C dst[0] + + stw r8, -4(r5) C dst[1] + blr + + +L(one): + C r5 &dst[1] + C r6 shift + C r7 32-shift + C r8 src[0] + + srw r3, r8, r7 C return value + slw r8, r8, r6 C src[size-1] << shift + + stw r8, -4(r5) C dst[0] + blr + +EPILOGUE(mpn_lshift) diff --git a/mpn/powerpc32/750/rshift.asm b/mpn/powerpc32/750/rshift.asm new file mode 100644 index 000000000..393ebcb5b --- /dev/null +++ b/mpn/powerpc32/750/rshift.asm @@ -0,0 +1,144 @@ +dnl PowerPC 750 mpn_rshift -- mpn right shift. + +dnl Copyright 2002, 2003 Free Software Foundation, Inc. +dnl +dnl This file is part of the GNU MP Library. +dnl +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 2.1 of the +dnl License, or (at your option) any later version. +dnl +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with the GNU MP Library; see the file COPYING.LIB. If +dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - +dnl Suite 330, Boston, MA 02111-1307, USA. + +include(`../config.m4') + + +C cycles/limb +C 750: 3.0 +C 7400: 3.0 + + +C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, +C unsigned shift); +C +C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but +C smaller and saving about 30 or so cycles of overhead. + +ASM_START() +PROLOGUE(mpn_rshift) + + C r3 dst + C r4 src + C r5 size + C r6 shift + + mtctr r5 C size + lwz r8, 0(r4) C src[0] + + subfic r7, r6, 32 C 32-shift + addi r5, r3, -4 C dst-4 + + slw r3, r8, r7 C return value + bdz L(one) + + lwzu r9, 4(r4) C src[1] + srw r8, r8, r6 C src[0] >> shift + bdz L(two) + + +L(top): + C r3 return value + C r4 src, incrementing + C r5 dst, incrementing + C r6 shift + C r7 32-shift + C r8 src[i-1] >> shift + C r9 src[i] + C r10 + + lwzu r10, 4(r4) + slw r11, r9, r7 + + or r8, r8, r11 + stwu r8, 4(r5) + + srw r8, r9, r6 + bdz L(odd) + + C r8 src[i-1] >> shift + C r9 + C r10 src[i] + + lwzu r9, 4(r4) + slw r11, r10, r7 + + or r8, r8, r11 + stwu r8, 4(r5) + + srw r8, r10, r6 + bdnz L(top) + + +L(two): + C r3 return value + C r4 + C r5 &dst[size-2] + C r6 shift + C r7 32-shift + C r8 src[size-2] >> shift + C r9 src[size-1] + C r10 + + slw r11, r9, r7 + srw r12, r9, r6 C src[size-1] >> shift + + or r8, r8, r11 + stw r12, 8(r5) C dst[size-1] + + stw r8, 4(r5) C dst[size-2] + blr + + +L(odd): + C r3 return value + C r4 + C r5 &dst[size-2] + C r6 shift + C r7 32-shift + C r8 src[size-2] >> shift + C r9 + C r10 src[size-1] + + slw r11, r10, r7 + srw r12, r10, r6 + + or r8, r8, r11 + stw r12, 8(r5) C dst[size-1] + + stw r8, 4(r5) C dst[size-2] + blr + + +L(one): + C r3 return value + C r4 + C r5 dst-4 + C r6 shift + C r7 + C r8 src[0] + + srw r8, r8, r6 + + stw r8, 4(r5) C dst[0] + blr + +EPILOGUE(mpn_rshift) |