summaryrefslogtreecommitdiff
path: root/gmp/mpn/arm/v5
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/arm/v5')
-rw-r--r--gmp/mpn/arm/v5/gcd_1.asm120
-rw-r--r--gmp/mpn/arm/v5/mod_1_1.asm129
-rw-r--r--gmp/mpn/arm/v5/mod_1_2.asm156
3 files changed, 0 insertions, 405 deletions
diff --git a/gmp/mpn/arm/v5/gcd_1.asm b/gmp/mpn/arm/v5/gcd_1.asm
deleted file mode 100644
index 169d154bf0..0000000000
--- a/gmp/mpn/arm/v5/gcd_1.asm
+++ /dev/null
@@ -1,120 +0,0 @@
-dnl ARM v5 mpn_gcd_1.
-
-dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn
-dnl Granlund.
-
-dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/bit (approx)
-C StrongARM -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 5.9
-C Cortex-A15 ?
-C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
-
-C TODO
-C * Optimise inner-loop better.
-
-C Threshold of when to call bmod when U is one limb. Should be about
-C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
-define(`BMOD_THRES_LOG2', 6)
-
-C INPUT PARAMETERS
-define(`up', `r0')
-define(`n', `r1')
-define(`v0', `r2')
-
-ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
- `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_gcd_1)
- push {r4, r7, lr}
- ldr r3, [up] C U low limb
-
- orr r3, r3, v0
- rsb r4, r3, #0
- and r4, r4, r3
- clz r4, r4 C min(ctz(u0),ctz(v0))
- rsb r4, r4, #31
-
- rsb r12, v0, #0
- and r12, r12, v0
- clz r12, r12
- rsb r12, r12, #31
- mov v0, v0, lsr r12
-
- mov r7, v0
-
- cmp n, #1
- bne L(nby1)
-
-C Both U and V are single limbs, reduce with bmod if u0 >> v0.
- ldr r3, [up]
- cmp v0, r3, lsr #BMOD_THRES_LOG2
- bhi L(red1)
-
-L(bmod):mov r3, #0 C carry argument
- bl mpn_modexact_1c_odd
- b L(red0)
-
-L(nby1):cmp n, #BMOD_1_TO_MOD_1_THRESHOLD
- blo L(bmod)
-
- bl mpn_mod_1
-
-L(red0):mov r3, r0
-L(red1):rsbs r12, r3, #0
- and r12, r12, r3
- clz r12, r12
- rsb r12, r12, #31
- bne L(mid)
- b L(end)
-
- ALIGN(8)
-L(top): rsb r12, r12, #31
- movcc r3, r1 C if x-y < 0
- movcc r7, r0 C use x,y-x
-L(mid): mov r3, r3, lsr r12 C
- mov r0, r3 C
- sub r1, r7, r3 C
- rsbs r3, r7, r3 C
- and r12, r1, r3 C
- clz r12, r12 C
- bne L(top) C
-
-L(end): mov r0, r7, lsl r4
- pop {r4, r7, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v5/mod_1_1.asm b/gmp/mpn/arm/v5/mod_1_1.asm
deleted file mode 100644
index 3cf0cd7763..0000000000
--- a/gmp/mpn/arm/v5/mod_1_1.asm
+++ /dev/null
@@ -1,129 +0,0 @@
-dnl ARM mpn_mod_1_1p
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 7
-C Cortex-A15 6
-
-define(`ap', `r0')
-define(`n', `r1')
-define(`d', `r2')
-define(`cps',`r3')
-
-ASM_START()
-PROLOGUE(mpn_mod_1_1p)
- push {r4-r10}
- add r0, r0, r1, asl #2
- ldr r5, [r0, #-4]!
- ldr r12, [r0, #-4]!
- subs r1, r1, #2
- ble L(4)
- ldr r8, [r3, #12]
- mov r4, r12
- mov r10, r5
- umull r7, r5, r10, r8
- sub r1, r1, #1
- b L(mid)
-
-L(top): adds r12, r6, r7
- adcs r10, r4, r5
- sub r1, r1, #1
- mov r6, #0
- movcs r6, r8
- umull r7, r5, r10, r8
- adds r4, r12, r6
- subcs r4, r4, r2
-L(mid): ldr r6, [r0, #-4]!
- teq r1, #0
- bne L(top)
-
- adds r12, r6, r7
- adcs r5, r4, r5
- subcs r5, r5, r2
-L(4): ldr r1, [r3, #4]
- cmp r1, #0
- beq L(7)
- ldr r4, [r3, #8]
- umull r0, r6, r5, r4
- adds r12, r0, r12
- addcs r6, r6, #1
- rsb r0, r1, #32
- mov r0, r12, lsr r0
- orr r5, r0, r6, asl r1
- mov r12, r12, asl r1
- b L(8)
-L(7): cmp r5, r2
- subcs r5, r5, r2
-L(8): ldr r0, [r3, #0]
- umull r4, r3, r5, r0
- add r5, r5, #1
- adds r0, r4, r12
- adc r5, r3, r5
- mul r5, r2, r5
- sub r12, r12, r5
- cmp r12, r0
- addhi r12, r12, r2
- cmp r2, r12
- subls r12, r12, r2
- mov r0, r12, lsr r1
- pop {r4-r10}
- bx r14
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1_1p_cps)
- stmfd sp!, {r4, r5, r6, r14}
- mov r5, r0
- clz r4, r1
- mov r0, r1, asl r4
- rsb r6, r0, #0
- bl mpn_invert_limb
- str r0, [r5, #0]
- str r4, [r5, #4]
- cmp r4, #0
- beq L(2)
- rsb r1, r4, #32
- mov r3, #1
- mov r3, r3, asl r4
- orr r3, r3, r0, lsr r1
- mul r3, r6, r3
- mov r4, r3, lsr r4
- str r4, [r5, #8]
-L(2): mul r0, r6, r0
- str r0, [r5, #12]
- ldmfd sp!, {r4, r5, r6, pc}
-EPILOGUE()
diff --git a/gmp/mpn/arm/v5/mod_1_2.asm b/gmp/mpn/arm/v5/mod_1_2.asm
deleted file mode 100644
index aa26ecb21c..0000000000
--- a/gmp/mpn/arm/v5/mod_1_2.asm
+++ /dev/null
@@ -1,156 +0,0 @@
-dnl ARM mpn_mod_1s_2p
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2012 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 4.25
-C Cortex-A15 3
-
-define(`ap', `r0')
-define(`n', `r1')
-define(`d', `r2')
-define(`cps',`r3')
-
-ASM_START()
-PROLOGUE(mpn_mod_1s_2p)
- push {r4-r10}
- tst n, #1
- add r7, r3, #8
- ldmia r7, {r7, r8, r12} C load B1, B2, B3
- add ap, ap, n, lsl #2 C put ap at operand end
- beq L(evn)
-
-L(odd): subs n, n, #1
- beq L(1)
- ldmdb ap!, {r4,r6,r9}
- mov r10, #0
- umlal r4, r10, r6, r7
- umlal r4, r10, r9, r8
- b L(com)
-
-L(evn): ldmdb ap!, {r4,r10}
-L(com): subs n, n, #2
- ble L(end)
- ldmdb ap!, {r5,r6}
- b L(mid)
-
-L(top): mov r9, #0
- umlal r5, r9, r6, r7 C B1
- umlal r5, r9, r4, r8 C B2
- ldmdb ap!, {r4,r6}
- umlal r5, r9, r10, r12 C B3
- ble L(xit)
- mov r10, #0
- umlal r4, r10, r6, r7 C B1
- umlal r4, r10, r5, r8 C B2
- ldmdb ap!, {r5,r6}
- umlal r4, r10, r9, r12 C B3
-L(mid): subs n, n, #4
- bge L(top)
-
- mov r9, #0
- umlal r5, r9, r6, r7 C B1
- umlal r5, r9, r4, r8 C B2
- umlal r5, r9, r10, r12 C B3
- mov r4, r5
-
-L(end): movge r9, r10 C executed iff coming via xit
- ldr r6, [r3, #4] C cps[1] = cnt
- mov r5, #0
- umlal r4, r5, r9, r7
- mov r7, r5, lsl r6
-L(x): rsb r1, r6, #32
- orr r8, r7, r4, lsr r1
- mov r9, r4, lsl r6
- ldr r5, [r3, #0]
- add r0, r8, #1
- umull r12, r1, r8, r5
- adds r4, r12, r9
- adc r1, r1, r0
- mul r5, r2, r1
- sub r9, r9, r5
- cmp r9, r4
- addhi r9, r9, r2
- cmp r2, r9
- subls r9, r9, r2
- mov r0, r9, lsr r6
- pop {r4-r10}
- bx r14
-
-L(xit): mov r10, #0
- umlal r4, r10, r6, r7 C B1
- umlal r4, r10, r5, r8 C B2
- umlal r4, r10, r9, r12 C B3
- b L(end)
-
-L(1): ldr r6, [r3, #4] C cps[1] = cnt
- ldr r4, [ap, #-4] C ap[0]
- mov r7, #0
- b L(x)
-EPILOGUE()
-
-PROLOGUE(mpn_mod_1s_2p_cps)
- push {r4-r8, r14}
- clz r4, r1
- mov r5, r1, lsl r4 C b <<= cnt
- mov r6, r0 C r6 = cps
- mov r0, r5
- bl mpn_invert_limb
- rsb r3, r4, #32
- mov r3, r0, lsr r3
- mov r2, #1
- orr r3, r3, r2, lsl r4
- rsb r1, r5, #0
- mul r2, r1, r3
- umull r3, r12, r2, r0
- add r12, r2, r12
- mvn r12, r12
- mul r1, r5, r12
- cmp r1, r3
- addhi r1, r1, r5
- umull r12, r7, r1, r0
- add r7, r1, r7
- mvn r7, r7
- mul r3, r5, r7
- cmp r3, r12
- addhi r3, r3, r5
- mov r5, r2, lsr r4
- mov r7, r1, lsr r4
- mov r8, r3, lsr r4
- stmia r6, {r0,r4,r5,r7,r8} C fill cps
- pop {r4-r8, pc}
-EPILOGUE()