diff options
Diffstat (limited to 'gmp/mpn/arm/v5')
-rw-r--r-- | gmp/mpn/arm/v5/gcd_1.asm | 120 | ||||
-rw-r--r-- | gmp/mpn/arm/v5/mod_1_1.asm | 129 | ||||
-rw-r--r-- | gmp/mpn/arm/v5/mod_1_2.asm | 156 |
3 files changed, 0 insertions, 405 deletions
diff --git a/gmp/mpn/arm/v5/gcd_1.asm b/gmp/mpn/arm/v5/gcd_1.asm deleted file mode 100644 index 169d154bf0..0000000000 --- a/gmp/mpn/arm/v5/gcd_1.asm +++ /dev/null @@ -1,120 +0,0 @@ -dnl ARM v5 mpn_gcd_1. - -dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjörn -dnl Granlund. - -dnl Copyright 2000-2002, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/bit (approx) -C StrongARM - -C XScale ? -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 5.9 -C Cortex-A15 ? -C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1 - -C TODO -C * Optimise inner-loop better. - -C Threshold of when to call bmod when U is one limb. Should be about -C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit). -define(`BMOD_THRES_LOG2', 6) - -C INPUT PARAMETERS -define(`up', `r0') -define(`n', `r1') -define(`v0', `r2') - -ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',, - `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)') - -ASM_START() - TEXT - ALIGN(16) -PROLOGUE(mpn_gcd_1) - push {r4, r7, lr} - ldr r3, [up] C U low limb - - orr r3, r3, v0 - rsb r4, r3, #0 - and r4, r4, r3 - clz r4, r4 C min(ctz(u0),ctz(v0)) - rsb r4, r4, #31 - - rsb r12, v0, #0 - and r12, r12, v0 - clz r12, r12 - rsb r12, r12, #31 - mov v0, v0, lsr r12 - - mov r7, v0 - - cmp n, #1 - bne L(nby1) - -C Both U and V are single limbs, reduce with bmod if u0 >> v0. - ldr r3, [up] - cmp v0, r3, lsr #BMOD_THRES_LOG2 - bhi L(red1) - -L(bmod):mov r3, #0 C carry argument - bl mpn_modexact_1c_odd - b L(red0) - -L(nby1):cmp n, #BMOD_1_TO_MOD_1_THRESHOLD - blo L(bmod) - - bl mpn_mod_1 - -L(red0):mov r3, r0 -L(red1):rsbs r12, r3, #0 - and r12, r12, r3 - clz r12, r12 - rsb r12, r12, #31 - bne L(mid) - b L(end) - - ALIGN(8) -L(top): rsb r12, r12, #31 - movcc r3, r1 C if x-y < 0 - movcc r7, r0 C use x,y-x -L(mid): mov r3, r3, lsr r12 C - mov r0, r3 C - sub r1, r7, r3 C - rsbs r3, r7, r3 C - and r12, r1, r3 C - clz r12, r12 C - bne L(top) C - -L(end): mov r0, r7, lsl r4 - pop {r4, r7, pc} -EPILOGUE() diff --git a/gmp/mpn/arm/v5/mod_1_1.asm b/gmp/mpn/arm/v5/mod_1_1.asm deleted file mode 100644 index 3cf0cd7763..0000000000 --- a/gmp/mpn/arm/v5/mod_1_1.asm +++ /dev/null @@ -1,129 +0,0 @@ -dnl ARM mpn_mod_1_1p - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C StrongARM - -C XScale ? -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 7 -C Cortex-A15 6 - -define(`ap', `r0') -define(`n', `r1') -define(`d', `r2') -define(`cps',`r3') - -ASM_START() -PROLOGUE(mpn_mod_1_1p) - push {r4-r10} - add r0, r0, r1, asl #2 - ldr r5, [r0, #-4]! - ldr r12, [r0, #-4]! - subs r1, r1, #2 - ble L(4) - ldr r8, [r3, #12] - mov r4, r12 - mov r10, r5 - umull r7, r5, r10, r8 - sub r1, r1, #1 - b L(mid) - -L(top): adds r12, r6, r7 - adcs r10, r4, r5 - sub r1, r1, #1 - mov r6, #0 - movcs r6, r8 - umull r7, r5, r10, r8 - adds r4, r12, r6 - subcs r4, r4, r2 -L(mid): ldr r6, [r0, #-4]! - teq r1, #0 - bne L(top) - - adds r12, r6, r7 - adcs r5, r4, r5 - subcs r5, r5, r2 -L(4): ldr r1, [r3, #4] - cmp r1, #0 - beq L(7) - ldr r4, [r3, #8] - umull r0, r6, r5, r4 - adds r12, r0, r12 - addcs r6, r6, #1 - rsb r0, r1, #32 - mov r0, r12, lsr r0 - orr r5, r0, r6, asl r1 - mov r12, r12, asl r1 - b L(8) -L(7): cmp r5, r2 - subcs r5, r5, r2 -L(8): ldr r0, [r3, #0] - umull r4, r3, r5, r0 - add r5, r5, #1 - adds r0, r4, r12 - adc r5, r3, r5 - mul r5, r2, r5 - sub r12, r12, r5 - cmp r12, r0 - addhi r12, r12, r2 - cmp r2, r12 - subls r12, r12, r2 - mov r0, r12, lsr r1 - pop {r4-r10} - bx r14 -EPILOGUE() - -PROLOGUE(mpn_mod_1_1p_cps) - stmfd sp!, {r4, r5, r6, r14} - mov r5, r0 - clz r4, r1 - mov r0, r1, asl r4 - rsb r6, r0, #0 - bl mpn_invert_limb - str r0, [r5, #0] - str r4, [r5, #4] - cmp r4, #0 - beq L(2) - rsb r1, r4, #32 - mov r3, #1 - mov r3, r3, asl r4 - orr r3, r3, r0, lsr r1 - mul r3, r6, r3 - mov r4, r3, lsr r4 - str r4, [r5, #8] -L(2): mul r0, r6, r0 - str r0, [r5, #12] - ldmfd sp!, {r4, r5, r6, pc} -EPILOGUE() diff --git a/gmp/mpn/arm/v5/mod_1_2.asm b/gmp/mpn/arm/v5/mod_1_2.asm deleted file mode 100644 index aa26ecb21c..0000000000 --- a/gmp/mpn/arm/v5/mod_1_2.asm +++ /dev/null @@ -1,156 +0,0 @@ -dnl ARM mpn_mod_1s_2p - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2012 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C StrongARM - -C XScale ? -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 4.25 -C Cortex-A15 3 - -define(`ap', `r0') -define(`n', `r1') -define(`d', `r2') -define(`cps',`r3') - -ASM_START() -PROLOGUE(mpn_mod_1s_2p) - push {r4-r10} - tst n, #1 - add r7, r3, #8 - ldmia r7, {r7, r8, r12} C load B1, B2, B3 - add ap, ap, n, lsl #2 C put ap at operand end - beq L(evn) - -L(odd): subs n, n, #1 - beq L(1) - ldmdb ap!, {r4,r6,r9} - mov r10, #0 - umlal r4, r10, r6, r7 - umlal r4, r10, r9, r8 - b L(com) - -L(evn): ldmdb ap!, {r4,r10} -L(com): subs n, n, #2 - ble L(end) - ldmdb ap!, {r5,r6} - b L(mid) - -L(top): mov r9, #0 - umlal r5, r9, r6, r7 C B1 - umlal r5, r9, r4, r8 C B2 - ldmdb ap!, {r4,r6} - umlal r5, r9, r10, r12 C B3 - ble L(xit) - mov r10, #0 - umlal r4, r10, r6, r7 C B1 - umlal r4, r10, r5, r8 C B2 - ldmdb ap!, {r5,r6} - umlal r4, r10, r9, r12 C B3 -L(mid): subs n, n, #4 - bge L(top) - - mov r9, #0 - umlal r5, r9, r6, r7 C B1 - umlal r5, r9, r4, r8 C B2 - umlal r5, r9, r10, r12 C B3 - mov r4, r5 - -L(end): movge r9, r10 C executed iff coming via xit - ldr r6, [r3, #4] C cps[1] = cnt - mov r5, #0 - umlal r4, r5, r9, r7 - mov r7, r5, lsl r6 -L(x): rsb r1, r6, #32 - orr r8, r7, r4, lsr r1 - mov r9, r4, lsl r6 - ldr r5, [r3, #0] - add r0, r8, #1 - umull r12, r1, r8, r5 - adds r4, r12, r9 - adc r1, r1, r0 - mul r5, r2, r1 - sub r9, r9, r5 - cmp r9, r4 - addhi r9, r9, r2 - cmp r2, r9 - subls r9, r9, r2 - mov r0, r9, lsr r6 - pop {r4-r10} - bx r14 - -L(xit): mov r10, #0 - umlal r4, r10, r6, r7 C B1 - umlal r4, r10, r5, r8 C B2 - umlal r4, r10, r9, r12 C B3 - b L(end) - -L(1): ldr r6, [r3, #4] C cps[1] = cnt - ldr r4, [ap, #-4] C ap[0] - mov r7, #0 - b L(x) -EPILOGUE() - -PROLOGUE(mpn_mod_1s_2p_cps) - push {r4-r8, r14} - clz r4, r1 - mov r5, r1, lsl r4 C b <<= cnt - mov r6, r0 C r6 = cps - mov r0, r5 - bl mpn_invert_limb - rsb r3, r4, #32 - mov r3, r0, lsr r3 - mov r2, #1 - orr r3, r3, r2, lsl r4 - rsb r1, r5, #0 - mul r2, r1, r3 - umull r3, r12, r2, r0 - add r12, r2, r12 - mvn r12, r12 - mul r1, r5, r12 - cmp r1, r3 - addhi r1, r1, r5 - umull r12, r7, r1, r0 - add r7, r1, r7 - mvn r7, r7 - mul r3, r5, r7 - cmp r3, r12 - addhi r3, r3, r5 - mov r5, r2, lsr r4 - mov r7, r1, lsr r4 - mov r8, r3, lsr r4 - stmia r6, {r0,r4,r5,r7,r8} C fill cps - pop {r4-r8, pc} -EPILOGUE() |