diff options
author | Kevin Ryde <user42@zip.com.au> | 2002-03-19 00:06:41 +0100 |
---|---|---|
committer | Kevin Ryde <user42@zip.com.au> | 2002-03-19 00:06:41 +0100 |
commit | a5562755e000769d0b6fa39c4e4db6dc7ed51833 (patch) | |
tree | 5a8fbcf72edcc968c43a5bbf00e21987b0146de9 /mpn/powerpc32 | |
parent | b0fb055435c4da51ef3374a51344a08a59ebe9e7 (diff) | |
download | gmp-a5562755e000769d0b6fa39c4e4db6dc7ed51833.tar.gz |
* mpn/powerpc32/*.asm: Use L(), add some measured speeds.
Diffstat (limited to 'mpn/powerpc32')
-rw-r--r-- | mpn/powerpc32/add_n.asm | 20 | ||||
-rw-r--r-- | mpn/powerpc32/lshift.asm | 40 | ||||
-rw-r--r-- | mpn/powerpc32/rshift.asm | 20 | ||||
-rw-r--r-- | mpn/powerpc32/sub_n.asm | 20 |
4 files changed, 62 insertions, 38 deletions
diff --git a/mpn/powerpc32/add_n.asm b/mpn/powerpc32/add_n.asm index bb257f2f3..43b50101e 100644 --- a/mpn/powerpc32/add_n.asm +++ b/mpn/powerpc32/add_n.asm @@ -1,7 +1,11 @@ dnl PowerPC-32 mpn_add_n -- Add two limb vectors of the same length > 0 and dnl store sum in a third limb vector. +dnl +dnl cycles/limb +dnl 604e: 2.67 +dnl 750: 4.5 -dnl Copyright 1995, 1997, 2000 Free Software Foundation, Inc. +dnl Copyright 1995, 1997, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -36,24 +40,26 @@ PROLOGUE(mpn_add_n) lwz r8,0(r4) C load least significant s1 limb lwz r0,0(r5) C load least significant s2 limb addi r3,r3,-4 C offset res_ptr, it's updated before it's used - bdz .Lend C If done, skip loop -.Loop: lwz r9,4(r4) C load s1 limb + bdz L(end) C If done, skip loop + +L(oop): lwz r9,4(r4) C load s1 limb lwz r10,4(r5) C load s2 limb adde r7,r0,r8 C add limbs with cy, set cy stw r7,4(r3) C store result limb - bdz .Lexit C decrement CTR and exit if done + bdz L(exit) C decrement CTR and exit if done lwzu r8,8(r4) C load s1 limb and update s1_ptr lwzu r0,8(r5) C load s2 limb and update s2_ptr adde r7,r10,r9 C add limbs with cy, set cy stwu r7,8(r3) C store result limb and update res_ptr - bdnz .Loop C decrement CTR and loop back + bdnz L(oop) C decrement CTR and loop back -.Lend: adde r7,r0,r8 +L(end): adde r7,r0,r8 stw r7,4(r3) C store ultimate result limb li r3,0 C load cy into ... addze r3,r3 C ... return value register blr -.Lexit: adde r7,r10,r9 +L(exit): + adde r7,r10,r9 stw r7,8(r3) li r3,0 C load cy into ... addze r3,r3 C ... return value register diff --git a/mpn/powerpc32/lshift.asm b/mpn/powerpc32/lshift.asm index a89d1b815..7305f2b9c 100644 --- a/mpn/powerpc32/lshift.asm +++ b/mpn/powerpc32/lshift.asm @@ -1,6 +1,10 @@ dnl PowerPC-32 mpn_lshift -- Shift a number left. +dnl +dnl cycles/limb +dnl 604e: 2.0 +dnl 750: 3.0 -dnl Copyright 1995, 1998, 2000 Free Software Foundation, Inc. +dnl Copyright 1995, 1998, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -34,35 +38,37 @@ PROLOGUE(mpn_lshift) slwi r0,r5,2 add r4,r4,r0 C make r4 point at end of s1 add r7,r3,r0 C make r7 point at end of res - bgt .LBIG C branch if more than 12 limbs + bgt L(BIG) C branch if more than 12 limbs mtctr r5 C copy size into CTR subfic r8,r6,32 lwzu r11,-4(r4) C load first s1 limb srw r3,r11,r8 C compute function return value - bdz .Lend1 + bdz L(end1) -.Loop: lwzu r10,-4(r4) +L(oop): lwzu r10,-4(r4) slw r9,r11,r6 srw r12,r10,r8 or r9,r9,r12 stwu r9,-4(r7) - bdz .Lend2 + bdz L(end2) lwzu r11,-4(r4) slw r9,r10,r6 srw r12,r11,r8 or r9,r9,r12 stwu r9,-4(r7) - bdnz .Loop + bdnz L(oop) -.Lend1: slw r0,r11,r6 +L(end1): + slw r0,r11,r6 stw r0,-4(r7) blr -.Lend2: slw r0,r10,r6 +L(end2): + slw r0,r10,r6 stw r0,-4(r7) blr -.LBIG: +L(BIG): stmw r24,-32(r1) C save registers we are supposed to preserve lwzu r9,-4(r4) subfic r8,r6,32 @@ -71,26 +77,26 @@ PROLOGUE(mpn_lshift) addi r5,r5,-1 andi. r10,r5,3 C count for spill loop - beq .Le + beq L(e) mtctr r10 lwzu r28,-4(r4) - bdz .Lxe0 + bdz L(xe0) -.Loop0: slw r12,r28,r6 +L(oop0): slw r12,r28,r6 srw r24,r28,r8 lwzu r28,-4(r4) or r24,r0,r24 stwu r24,-4(r7) mr r0,r12 - bdnz .Loop0 C taken at most once! + bdnz L(oop0) C taken at most once! -.Lxe0: slw r12,r28,r6 +L(xe0): slw r12,r28,r6 srw r24,r28,r8 or r24,r0,r24 stwu r24,-4(r7) mr r0,r12 -.Le: srwi r5,r5,2 C count for unrolled loop +L(e): srwi r5,r5,2 C count for unrolled loop addi r5,r5,-1 mtctr r5 lwz r28,-4(r4) @@ -98,7 +104,7 @@ PROLOGUE(mpn_lshift) lwz r30,-12(r4) lwzu r31,-16(r4) -.LoopU: slw r9,r28,r6 +L(oopU): slw r9,r28,r6 srw r24,r28,r8 lwz r28,-4(r4) slw r10,r29,r6 @@ -119,7 +125,7 @@ PROLOGUE(mpn_lshift) or r27,r11,r27 stwu r27,-16(r7) mr r0,r12 - bdnz .LoopU + bdnz L(oopU) slw r9,r28,r6 srw r24,r28,r8 diff --git a/mpn/powerpc32/rshift.asm b/mpn/powerpc32/rshift.asm index 62627fecd..f32db73b5 100644 --- a/mpn/powerpc32/rshift.asm +++ b/mpn/powerpc32/rshift.asm @@ -1,6 +1,10 @@ dnl PowerPC-32 mpn_rshift -- Shift a number right. +dnl +dnl cycles/limb +dnl 604e: 3.3 +dnl 750: 4.0 -dnl Copyright 1995, 2000 Free Software Foundation, Inc. +dnl Copyright 1995, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -35,26 +39,28 @@ PROLOGUE(mpn_rshift) subfic r8,r6,32 lwz r11,0(r4) C load first s1 limb slw r3,r11,r8 C compute function return value - bdz .Lend1 + bdz L(end1) -.Loop: lwzu r10,4(r4) +L(oop): lwzu r10,4(r4) srw r9,r11,r6 slw r12,r10,r8 or r9,r9,r12 stwu r9,4(r7) - bdz .Lend2 + bdz L(end2) lwzu r11,4(r4) srw r9,r10,r6 slw r12,r11,r8 or r9,r9,r12 stwu r9,4(r7) - bdnz .Loop + bdnz L(oop) -.Lend1: srw r0,r11,r6 +L(end1): + srw r0,r11,r6 stw r0,4(r7) blr -.Lend2: srw r0,r10,r6 +L(end2): + srw r0,r10,r6 stw r0,4(r7) blr EPILOGUE(mpn_rshift) diff --git a/mpn/powerpc32/sub_n.asm b/mpn/powerpc32/sub_n.asm index 354a86510..4993c5bff 100644 --- a/mpn/powerpc32/sub_n.asm +++ b/mpn/powerpc32/sub_n.asm @@ -1,7 +1,11 @@ dnl PowerPC-32 mpn_sub_n -- Subtract two limb vectors of the same length > 0 dnl and store difference in a third limb vector. +dnl +dnl cycles/limb +dnl 604e: 2.67 +dnl 750: 4.5 -dnl Copyright 1995, 1997, 2000 Free Software Foundation, Inc. +dnl Copyright 1995, 1997, 2000, 2002 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. @@ -36,24 +40,26 @@ PROLOGUE(mpn_sub_n) lwz r8,0(r4) C load least significant s1 limb lwz r0,0(r5) C load least significant s2 limb addi r3,r3,-4 C offset res_ptr, it's updated before it's used - bdz .Lend C If done, skip loop -.Loop: lwz r9,4(r4) C load s1 limb + bdz L(end) C If done, skip loop +L(oop): lwz r9,4(r4) C load s1 limb lwz r10,4(r5) C load s2 limb subfe r7,r0,r8 C subtract limbs with cy, set cy stw r7,4(r3) C store result limb - bdz .Lexit C decrement CTR and exit if done + bdz L(exit) C decrement CTR and exit if done lwzu r8,8(r4) C load s1 limb and update s1_ptr lwzu r0,8(r5) C load s2 limb and update s2_ptr subfe r7,r10,r9 C subtract limbs with cy, set cy stwu r7,8(r3) C store result limb and update res_ptr - bdnz .Loop C decrement CTR and loop back + bdnz L(oop) C decrement CTR and loop back -.Lend: subfe r7,r0,r8 +L(end): + subfe r7,r0,r8 stw r7,4(r3) C store ultimate result limb subfe r3,r0,r0 C load !cy into ... subfic r3,r3,0 C ... return value register blr -.Lexit: subfe r7,r10,r9 +L(exit): + subfe r7,r10,r9 stw r7,8(r3) subfe r3,r0,r0 C load !cy into ... subfic r3,r3,0 C ... return value register |