summaryrefslogtreecommitdiff
path: root/gmp/mpn/arm/v7a
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/arm/v7a')
-rw-r--r--gmp/mpn/arm/v7a/cora15/addmul_1.asm145
-rw-r--r--gmp/mpn/arm/v7a/cora15/aors_n.asm162
-rw-r--r--gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm158
-rw-r--r--gmp/mpn/arm/v7a/cora15/com.asm180
-rw-r--r--gmp/mpn/arm/v7a/cora15/gmp-mparam.h197
-rw-r--r--gmp/mpn/arm/v7a/cora15/logops_n.asm253
-rw-r--r--gmp/mpn/arm/v7a/cora15/mul_1.asm104
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm43
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm43
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm144
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/com.asm97
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/copyd.asm110
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/copyi.asm90
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm177
-rw-r--r--gmp/mpn/arm/v7a/cora15/submul_1.asm159
-rw-r--r--gmp/mpn/arm/v7a/cora9/gmp-mparam.h209
16 files changed, 0 insertions, 2271 deletions
diff --git a/gmp/mpn/arm/v7a/cora15/addmul_1.asm b/gmp/mpn/arm/v7a/cora15/addmul_1.asm
deleted file mode 100644
index c2277b32b2..0000000000
--- a/gmp/mpn/arm/v7a/cora15/addmul_1.asm
+++ /dev/null
@@ -1,145 +0,0 @@
-dnl ARM mpn_addmul_1 optimised for A15.
-
-dnl Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb best
-C StrongARM: -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 6 3.25
-C Cortex-A15 2 this
-
-C This code uses umlal for adding in the rp[] data, keeping the recurrency path
-C separate from any multiply instructions. It performs well on A15, at umlal's
-C bandwidth.
-C
-C An A9 variant should perhaps stick to 3-way unrolling, and use ldm and stm
-C for all loads and stores. Alternatively, it could do 2-way or 4-way, but
-C then alignment aware code will be necessary (adding O(1) bookkeeping
-C overhead).
-C
-C We don't use r12 due to ldrd and strd limitations.
-
-C Architecture requirements:
-C v5 -
-C v5t -
-C v5te ldrd strd
-C v6 -
-C v6t2 -
-C v7a -
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-define(`v0', `r3')
-
-define(`w0', `r10') define(`w1', `r11')
-define(`u0', `r8') define(`u1', `r9')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
- push { r4-r11 }
-
- ands r6, n, #3
- sub n, n, #3
- beq L(b00)
- cmp r6, #2
- bcc L(b01)
- beq L(b10)
-
-L(b11): mov r6, #0
- cmn r13, #0 C carry clear
- ldr u1, [up], #-4
- ldr w1, [rp], #-4
- mov r7, #0
- b L(mid)
-
-L(b00): ldrd u0, u1, [up]
- ldrd w0, w1, [rp]
- mov r6, #0
- umlal w0, r6, u0, v0
- cmn r13, #0 C carry clear
- mov r7, #0
- str w0, [rp]
- b L(mid)
-
-L(b10): ldrd u0, u1, [up], #8
- ldrd w0, w1, [rp]
- mov r4, #0
- umlal w0, r4, u0, v0
- cmn r13, #0 C carry clear
- mov r5, #0
- str w0, [rp], #8
- umlal w1, r5, u1, v0
- tst n, n
- bmi L(end)
- b L(top)
-
-L(b01): mov r4, #0
- ldr u1, [up], #4
- ldr w1, [rp], #4
- mov r5, #0
- umlal w1, r5, u1, v0
- tst n, n
- bmi L(end)
-
- ALIGN(16)
-L(top): ldrd u0, u1, [up, #0]
- adcs r4, r4, w1
- ldrd w0, w1, [rp, #0]
- mov r6, #0
- umlal w0, r6, u0, v0 C 1 2
- adcs r5, r5, w0
- mov r7, #0
- strd r4, r5, [rp, #-4]
-L(mid): umlal w1, r7, u1, v0 C 2 3
- ldrd u0, u1, [up, #8]
- adcs r6, r6, w1
- ldrd w0, w1, [rp, #8]
- mov r4, #0
- umlal w0, r4, u0, v0 C 3 4
- adcs r7, r7, w0
- mov r5, #0
- strd r6, r7, [rp, #4]
- umlal w1, r5, u1, v0 C 0 1
- sub n, n, #4
- add up, up, #16
- add rp, rp, #16
- tst n, n
- bpl L(top)
-
-L(end): adcs r4, r4, w1
- str r4, [rp, #-4]
- adc r0, r5, #0
- pop { r4-r11 }
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/aors_n.asm b/gmp/mpn/arm/v7a/cora15/aors_n.asm
deleted file mode 100644
index dc3f83992e..0000000000
--- a/gmp/mpn/arm/v7a/cora15/aors_n.asm
+++ /dev/null
@@ -1,162 +0,0 @@
-dnl ARM mpn_add_n/mpn_sub_n optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb best
-C StrongARM: -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 3.55 2.5
-C Cortex-A15 1.27 this
-
-C This was a major improvement compared to the code we had before, but it might
-C not be the best 8-way code possible. We've tried some permutations of auto-
-C increments and separate pointer updates, but they all ran at the same speed
-C on A15.
-
-C Architecture requirements:
-C v5 -
-C v5t -
-C v5te ldrd strd
-C v6 -
-C v6t2 -
-C v7a -
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
-
-ifdef(`OPERATION_add_n', `
- define(`ADDSUBC', adcs)
- define(`IFADD', `$1')
- define(`SETCY', `cmp $1, #1')
- define(`RETVAL', `adc r0, n, #0')
- define(`RETVAL2', `adc r0, n, #1')
- define(`func', mpn_add_n)
- define(`func_nc', mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
- define(`ADDSUBC', sbcs)
- define(`IFADD', `')
- define(`SETCY', `rsbs $1, $1, #0')
- define(`RETVAL', `sbc r0, r0, r0
- and r0, r0, #1')
- define(`RETVAL2', `RETVAL')
- define(`func', mpn_sub_n)
- define(`func_nc', mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
-PROLOGUE(func_nc)
- ldr r12, [sp]
- b L(ent)
-EPILOGUE()
-PROLOGUE(func)
- mov r12, #0
-L(ent): push { r4-r9 }
-
- ands r6, n, #3
- mov n, n, lsr #2
- beq L(b00)
- cmp r6, #2
- bcc L(b01)
- beq L(b10)
-
-L(b11): ldr r5, [up], #4
- ldr r7, [vp], #4
- SETCY( r12)
- ADDSUBC r9, r5, r7
- ldrd r4, r5, [up, #0]
- ldrd r6, r7, [vp, #0]
- str r9, [rp], #-4
- b L(lo)
-
-L(b00): ldrd r4, r5, [up], #-8
- ldrd r6, r7, [vp], #-8
- SETCY( r12)
- sub rp, rp, #16
- b L(mid)
-
-L(b01): ldr r5, [up], #-4
- ldr r7, [vp], #-4
- SETCY( r12)
- ADDSUBC r9, r5, r7
- str r9, [rp], #-12
- tst n, n
- beq L(wd1)
-L(gt1): ldrd r4, r5, [up, #8]
- ldrd r6, r7, [vp, #8]
- b L(mid)
-
-L(b10): ldrd r4, r5, [up]
- ldrd r6, r7, [vp]
- SETCY( r12)
- sub rp, rp, #8
- b L(lo)
-
- ALIGN(16)
-L(top): ldrd r4, r5, [up, #8]
- ldrd r6, r7, [vp, #8]
- strd r8, r9, [rp, #8]
-L(mid): ADDSUBC r8, r4, r6
- ADDSUBC r9, r5, r7
- ldrd r4, r5, [up, #16]
- ldrd r6, r7, [vp, #16]
- strd r8, r9, [rp, #16]
- ADDSUBC r8, r4, r6
- ADDSUBC r9, r5, r7
- sub n, n, #2
- tst n, n
- bmi L(dne)
- ldrd r4, r5, [up, #24]
- ldrd r6, r7, [vp, #24]
- strd r8, r9, [rp, #24]
- ADDSUBC r8, r4, r6
- ADDSUBC r9, r5, r7
- ldrd r4, r5, [up, #32]!
- ldrd r6, r7, [vp, #32]!
- strd r8, r9, [rp, #32]!
-L(lo): ADDSUBC r8, r4, r6
- ADDSUBC r9, r5, r7
- tst n, n
- bne L(top)
-
-L(end): strd r8, r9, [rp, #8]
-L(wd1): RETVAL
- pop { r4-r9 }
- bx r14
-L(dne): strd r8, r9, [rp, #24]
- RETVAL2
- pop { r4-r9 }
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm b/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm
deleted file mode 100644
index b9e5cd3f79..0000000000
--- a/gmp/mpn/arm/v7a/cora15/cnd_aors_n.asm
+++ /dev/null
@@ -1,158 +0,0 @@
-dnl ARM mpn_cnd_add_n/mpn_cnd_sub_n optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb best
-C StrongARM: -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 3.75 3
-C Cortex-A15 1.78 this
-
-C This code does not run as well as one could have hoped, since 1.5 c/l seems
-C realistic for this insn mix.
-
-C Architecture requirements:
-C v5 -
-C v5t -
-C v5te ldrd strd
-C v6 -
-C v6t2 -
-C v7a -
-
-define(`cnd',`r0')
-define(`rp', `r1')
-define(`up', `r2')
-define(`vp', `r3')
-define(`n', `r12')
-
-ifdef(`OPERATION_cnd_add_n', `
- define(`ADDSUB', adds)
- define(`ADDSUBC', adcs)
- define(`IFADD', `$1')
- define(`INITCY', `cmn r0, #0')
- define(`RETVAL', `adc r0, n, #0')
- define(`RETVAL2', `adc r0, n, #1')
- define(`func', mpn_cnd_add_n)
- define(`func_nc', mpn_add_nc)')
-ifdef(`OPERATION_cnd_sub_n', `
- define(`ADDSUB', subs)
- define(`ADDSUBC', sbcs)
- define(`IFADD', `')
- define(`INITCY', `cmp r0, #0')
- define(`RETVAL', `sbc r0, r0, r0
- and r0, r0, #1')
- define(`RETVAL2', `RETVAL')
- define(`func', mpn_cnd_sub_n)
- define(`func_nc', mpn_sub_nc)')
-
-MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n)
-
-ASM_START()
-PROLOGUE(func)
- ldr n, [sp]
- push { r4-r9 }
-
- cmp cnd, #1
- sbc cnd, cnd, cnd C conditionally set to 0xffffffff
-
- ands r6, n, #3
- mov n, n, lsr #2
- beq L(b00)
- cmp r6, #2
- bcc L(b01)
- beq L(b10)
-
-L(b11): ldr r5, [up], #4
- ldr r7, [vp], #4
- bic r7, r7, cnd
- ADDSUB r9, r5, r7
- ldrd r4, r5, [up, #0]
- ldrd r6, r7, [vp, #0]
- bic r6, r6, cnd
- bic r7, r7, cnd
- str r9, [rp], #-4
- b L(lo)
-
-L(b00): ldrd r4, r5, [up], #-8
- ldrd r6, r7, [vp], #-8
- bic r6, r6, cnd
- bic r7, r7, cnd
- INITCY
- sub rp, rp, #16
- b L(mid)
-
-L(b01): ldr r5, [up], #-4
- ldr r7, [vp], #-4
- bic r7, r7, cnd
- ADDSUB r9, r5, r7
- str r9, [rp], #-12
- tst n, n
- beq L(wd1)
-L(gt1): ldrd r4, r5, [up, #8]
- ldrd r6, r7, [vp, #8]
- bic r6, r6, cnd
- bic r7, r7, cnd
- b L(mid)
-
-L(b10): ldrd r4, r5, [up]
- ldrd r6, r7, [vp]
- bic r6, r6, cnd
- bic r7, r7, cnd
- INITCY
- sub rp, rp, #8
- b L(lo)
-
- ALIGN(16)
-L(top): ldrd r6, r7, [vp, #8]
- ldrd r4, r5, [up, #8]
- bic r6, r6, cnd
- bic r7, r7, cnd
- strd r8, r9, [rp, #8]
-L(mid): ADDSUBC r8, r4, r6
- ADDSUBC r9, r5, r7
- ldrd r6, r7, [vp, #16]!
- ldrd r4, r5, [up, #16]!
- bic r6, r6, cnd
- bic r7, r7, cnd
- sub n, n, #1
- strd r8, r9, [rp, #16]!
-L(lo): ADDSUBC r8, r4, r6
- ADDSUBC r9, r5, r7
- tst n, n
- bne L(top)
-
-L(end): strd r8, r9, [rp, #8]
-L(wd1): RETVAL
- pop { r4-r9 }
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/com.asm b/gmp/mpn/arm/v7a/cora15/com.asm
deleted file mode 100644
index a258afe934..0000000000
--- a/gmp/mpn/arm/v7a/cora15/com.asm
+++ /dev/null
@@ -1,180 +0,0 @@
-dnl ARM mpn_com optimised for A15.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM ?
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 2.5
-C Cortex-A15 1.0
-
-C This is great A15 core register code, but it is a bit large.
-C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
-
-C Architecture requirements:
-C v5 -
-C v5t -
-C v5te ldrd strd
-C v6 -
-C v6t2 -
-C v7a -
-
-define(`FEEDIN_VARIANT', 1) C alternatives: 0 1 2
-define(`UNROLL', 4x2) C alternatives: 4 4x2
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_com)
- push { r4-r5,r8-r9 }
-
-ifelse(FEEDIN_VARIANT,0,`
- ands r12, n, #3
- mov n, n, lsr #2
- beq L(b00a)
- tst r12, #1
- beq L(bx0)
- ldr r5, [up], #4
- mvn r9, r5
- str r9, [rp], #4
- tst r12, #2
- beq L(b00)
-L(bx0): ldrd r4, r5, [up, #0]
- sub rp, rp, #8
- b L(lo)
-L(b00): tst n, n
- beq L(wd1)
-L(b00a):ldrd r4, r5, [up], #-8
- sub rp, rp, #16
- b L(mid)
-')
-ifelse(FEEDIN_VARIANT,1,`
- and r12, n, #3
- mov n, n, lsr #2
- tst r12, #1
- beq L(bx0)
- ldr r5, [up], #4
- mvn r9, r5
- str r9, [rp], #4
-L(bx0): tst r12, #2
- beq L(b00)
- ldrd r4, r5, [up, #0]
- sub rp, rp, #8
- b L(lo)
-L(b00): tst n, n
- beq L(wd1)
- ldrd r4, r5, [up], #-8
- sub rp, rp, #16
- b L(mid)
-')
-ifelse(FEEDIN_VARIANT,2,`
- ands r12, n, #3
- mov n, n, lsr #2
- beq L(b00)
- cmp r12, #2
- bcc L(b01)
- beq L(b10)
-
-L(b11): ldr r5, [up], #4
- mvn r9, r5
- ldrd r4, r5, [up, #0]
- str r9, [rp], #-4
- b L(lo)
-
-L(b00): ldrd r4, r5, [up], #-8
- sub rp, rp, #16
- b L(mid)
-
-L(b01): ldr r5, [up], #-4
- mvn r9, r5
- str r9, [rp], #-12
- tst n, n
- beq L(wd1)
-L(gt1): ldrd r4, r5, [up, #8]
- b L(mid)
-
-L(b10): ldrd r4, r5, [up]
- sub rp, rp, #8
- b L(lo)
-')
- ALIGN(16)
-ifelse(UNROLL,4,`
-L(top): ldrd r4, r5, [up, #8]
- strd r8, r9, [rp, #8]
-L(mid): mvn r8, r4
- mvn r9, r5
- ldrd r4, r5, [up, #16]!
- strd r8, r9, [rp, #16]!
- sub n, n, #1
-L(lo): mvn r8, r4
- mvn r9, r5
- tst n, n
- bne L(top)
-')
-ifelse(UNROLL,4x2,`
-L(top): ldrd r4, r5, [up, #8]
- strd r8, r9, [rp, #8]
-L(mid): mvn r8, r4
- mvn r9, r5
- ldrd r4, r5, [up, #16]
- strd r8, r9, [rp, #16]
- mvn r8, r4
- mvn r9, r5
- sub n, n, #2
- tst n, n
- bmi L(dne)
- ldrd r4, r5, [up, #24]
- strd r8, r9, [rp, #24]
- mvn r8, r4
- mvn r9, r5
- ldrd r4, r5, [up, #32]!
- strd r8, r9, [rp, #32]!
-L(lo): mvn r8, r4
- mvn r9, r5
- tst n, n
- bne L(top)
-')
-
-L(end): strd r8, r9, [rp, #8]
-L(wd1): pop { r4-r5,r8-r9 }
- bx r14
-ifelse(UNROLL,4x2,`
-L(dne): strd r8, r9, [rp, #24]
- pop { r4-r5,r8-r9 }
- bx r14
-')
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/gmp-mparam.h b/gmp/mpn/arm/v7a/cora15/gmp-mparam.h
deleted file mode 100644
index 2a06532b3e..0000000000
--- a/gmp/mpn/arm/v7a/cora15/gmp-mparam.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012-2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1700MHz Cortex-A15 with Neon (in spite of file position) */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 15
-
-#define MUL_TOOM22_THRESHOLD 23
-#define MUL_TOOM33_THRESHOLD 90
-#define MUL_TOOM44_THRESHOLD 262
-#define MUL_TOOM6H_THRESHOLD 351
-#define MUL_TOOM8H_THRESHOLD 557
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 90
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 169
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 130
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 43
-#define SQR_TOOM3_THRESHOLD 138
-#define SQR_TOOM4_THRESHOLD 363
-#define SQR_TOOM6_THRESHOLD 517
-#define SQR_TOOM8_THRESHOLD 725
-
-#define MULMID_TOOM42_THRESHOLD 52
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 23
-
-#define MUL_FFT_MODF_THRESHOLD 550 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 550, 5}, { 25, 6}, { 27, 7}, { 15, 6}, \
- { 31, 7}, { 19, 6}, { 39, 7}, { 25, 6}, \
- { 51, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
- { 19, 7}, { 41, 8}, { 23, 7}, { 51, 8}, \
- { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
- { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
- { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
- { 47, 8}, { 99, 9}, { 55,10}, { 31, 9}, \
- { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \
- { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
- { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \
- { 159,11}, { 95,10}, { 191, 9}, { 383,10}, \
- { 207,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,11}, { 159,10}, \
- { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
- { 351,11}, { 191,10}, { 383, 9}, { 767,10}, \
- { 399, 9}, { 799,10}, { 415,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 543,11}, { 287,10}, \
- { 607,11}, { 319,10}, { 671,11}, { 351,12}, \
- { 191,11}, { 383,10}, { 799,11}, { 415,10}, \
- { 831,13}, { 127,12}, { 255,11}, { 543,10}, \
- { 1087,11}, { 607,12}, { 319,11}, { 671,10}, \
- { 1343,11}, { 735,12}, { 383,11}, { 799,10}, \
- { 1599,11}, { 831,12}, { 447,11}, { 895,13}, \
- { 255,12}, { 511,11}, { 1023,12}, { 575,11}, \
- { 1151,12}, { 639,11}, { 1279,12}, { 703,13}, \
- { 383,12}, { 767,11}, { 1599,12}, { 831,11}, \
- { 1663,12}, { 895,13}, { 511,12}, { 1087,13}, \
- { 639,12}, { 1407,13}, { 767,12}, { 1599,13}, \
- { 895,14}, { 511,13}, { 1023,12}, { 2111,13}, \
- { 1151,12}, { 2431,13}, { 1279,14}, { 767,13}, \
- { 1535,12}, { 3071,15}, { 511,14}, { 1023,13}, \
- { 2175,14}, { 1279,13}, { 2559,12}, { 5119,13}, \
- { 2815,12}, { 5631,13}, { 2943,14}, { 16384,15}, \
- { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 137
-#define MUL_FFT_THRESHOLD 5760
-
-#define SQR_FFT_MODF_THRESHOLD 525 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 525, 5}, { 25, 6}, { 27, 7}, { 15, 6}, \
- { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 25, 6}, { 51, 7}, { 27, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 51, 8}, { 27, 7}, { 55, 9}, \
- { 15, 8}, { 31, 7}, { 63, 8}, { 39, 9}, \
- { 23, 8}, { 55,10}, { 15, 9}, { 31, 8}, \
- { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
- { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
- { 191,10}, { 111,11}, { 63,10}, { 143, 9}, \
- { 287,10}, { 159,11}, { 95,10}, { 191, 9}, \
- { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
- { 159,10}, { 335, 9}, { 671,10}, { 351,11}, \
- { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
- { 799,10}, { 415,11}, { 223,12}, { 127,11}, \
- { 255,10}, { 543,11}, { 287,10}, { 607,11}, \
- { 319,10}, { 671,11}, { 351,12}, { 191,11}, \
- { 383,10}, { 799,11}, { 415,10}, { 831,13}, \
- { 127,12}, { 255,11}, { 543,10}, { 1087,11}, \
- { 607,12}, { 319,11}, { 671,10}, { 1343,11}, \
- { 735,12}, { 383,11}, { 799,10}, { 1599,11}, \
- { 831,12}, { 447,11}, { 895,12}, { 511,11}, \
- { 1023,12}, { 575,11}, { 1151,12}, { 639,11}, \
- { 1343,12}, { 703,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1663,12}, { 895,13}, \
- { 511,12}, { 1087,13}, { 639,12}, { 1407,13}, \
- { 767,12}, { 1727,13}, { 895,14}, { 511,13}, \
- { 1023,12}, { 2047,13}, { 1151,12}, { 2431,13}, \
- { 1279,14}, { 767,13}, { 1535,12}, { 3071,15}, \
- { 511,14}, { 1023,13}, { 2047,12}, { 4095,13}, \
- { 2175,14}, { 1279,13}, { 2559,12}, { 5119,13}, \
- { 2687,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 139
-#define SQR_FFT_THRESHOLD 4736
-
-#define MULLO_BASECASE_THRESHOLD 9
-#define MULLO_DC_THRESHOLD 39
-#define MULLO_MUL_N_THRESHOLD 11278
-
-#define DC_DIV_QR_THRESHOLD 54
-#define DC_DIVAPPR_Q_THRESHOLD 296
-#define DC_BDIV_QR_THRESHOLD 52
-#define DC_BDIV_Q_THRESHOLD 300
-
-#define INV_MULMOD_BNM1_THRESHOLD 44
-#define INV_NEWTON_THRESHOLD 294
-#define INV_APPR_THRESHOLD 294
-
-#define BINV_NEWTON_THRESHOLD 375
-#define REDC_1_TO_REDC_2_THRESHOLD 102
-#define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */
-
-#define MU_DIV_QR_THRESHOLD 1718
-#define MU_DIVAPPR_Q_THRESHOLD 1718
-#define MUPI_DIV_QR_THRESHOLD 108
-#define MU_BDIV_QR_THRESHOLD 1528
-#define MU_BDIV_Q_THRESHOLD 1718
-
-#define POWM_SEC_TABLE 3,32,70,416,1464
-
-#define MATRIX22_STRASSEN_THRESHOLD 22
-#define HGCD_THRESHOLD 152
-#define HGCD_APPR_THRESHOLD 230
-#define HGCD_REDUCE_THRESHOLD 3259
-#define GCD_DC_THRESHOLD 702
-#define GCDEXT_DC_THRESHOLD 538
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 18
-#define GET_STR_PRECOMPUTE_THRESHOLD 32
-#define SET_STR_DC_THRESHOLD 119
-#define SET_STR_PRECOMPUTE_THRESHOLD 1063
-
-#define FAC_DSC_THRESHOLD 262
-#define FAC_ODD_THRESHOLD 26
diff --git a/gmp/mpn/arm/v7a/cora15/logops_n.asm b/gmp/mpn/arm/v7a/cora15/logops_n.asm
deleted file mode 100644
index 06026143e1..0000000000
--- a/gmp/mpn/arm/v7a/cora15/logops_n.asm
+++ /dev/null
@@ -1,253 +0,0 @@
-dnl ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc, optimised for A15.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb cycles/limb
-C and andn ior xor nand iorn nior xnor
-C StrongARM ? ?
-C XScale ? ?
-C Cortex-A7 ? ?
-C Cortex-A8 ? ?
-C Cortex-A9 3.5 3.56
-C Cortex-A15 1.27 1.64
-
-C This is great A15 core register code, but it is a bit large.
-C We use FEEDIN_VARIANT 1 to save some space, but use 8-way unrolling.
-
-C Architecture requirements:
-C v5 -
-C v5t -
-C v5te ldrd strd
-C v6 -
-C v6t2 -
-C v7a -
-
-define(`FEEDIN_VARIANT', 1) C alternatives: 0 1 2
-define(`UNROLL', 4x2) C alternatives: 4 4x2
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
-
-define(`POSTOP')
-
-ifdef(`OPERATION_and_n',`
- define(`func', `mpn_and_n')
- define(`LOGOP', `and $1, $2, $3')')
-ifdef(`OPERATION_andn_n',`
- define(`func', `mpn_andn_n')
- define(`LOGOP', `bic $1, $2, $3')')
-ifdef(`OPERATION_nand_n',`
- define(`func', `mpn_nand_n')
- define(`POSTOP', `mvn $1, $1')
- define(`LOGOP', `and $1, $2, $3')')
-ifdef(`OPERATION_ior_n',`
- define(`func', `mpn_ior_n')
- define(`LOGOP', `orr $1, $2, $3')')
-ifdef(`OPERATION_iorn_n',`
- define(`func', `mpn_iorn_n')
- define(`POSTOP', `mvn $1, $1')
- define(`LOGOP', `bic $1, $3, $2')')
-ifdef(`OPERATION_nior_n',`
- define(`func', `mpn_nior_n')
- define(`POSTOP', `mvn $1, $1')
- define(`LOGOP', `orr $1, $2, $3')')
-ifdef(`OPERATION_xor_n',`
- define(`func', `mpn_xor_n')
- define(`LOGOP', `eor $1, $2, $3')')
-ifdef(`OPERATION_xnor_n',`
- define(`func', `mpn_xnor_n')
- define(`POSTOP', `mvn $1, $1')
- define(`LOGOP', `eor $1, $2, $3')')
-
-MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
-
-ASM_START()
-PROLOGUE(func)
- push { r4-r9 }
-
-ifelse(FEEDIN_VARIANT,0,`
- ands r6, n, #3
- mov n, n, lsr #2
- beq L(b00a)
- tst r6, #1
- beq L(bx0)
- ldr r5, [up], #4
- ldr r7, [vp], #4
- LOGOP( r9, r5, r7)
- POSTOP( r9)
- str r9, [rp], #4
- tst r6, #2
- beq L(b00)
-L(bx0): ldrd r4, r5, [up, #0]
- ldrd r6, r7, [vp, #0]
- sub rp, rp, #8
- b L(lo)
-L(b00): tst n, n
- beq L(wd1)
-L(b00a):ldrd r4, r5, [up], #-8
- ldrd r6, r7, [vp], #-8
- sub rp, rp, #16
- b L(mid)
-')
-ifelse(FEEDIN_VARIANT,1,`
- and r6, n, #3
- mov n, n, lsr #2
- tst r6, #1
- beq L(bx0)
- ldr r5, [up], #4
- ldr r7, [vp], #4
- LOGOP( r9, r5, r7)
- POSTOP( r9)
- str r9, [rp], #4
-L(bx0): tst r6, #2
- beq L(b00)
- ldrd r4, r5, [up, #0]
- ldrd r6, r7, [vp, #0]
- sub rp, rp, #8
- b L(lo)
-L(b00): tst n, n
- beq L(wd1)
- ldrd r4, r5, [up], #-8
- ldrd r6, r7, [vp], #-8
- sub rp, rp, #16
- b L(mid)
-')
-ifelse(FEEDIN_VARIANT,2,`
- ands r6, n, #3
- mov n, n, lsr #2
- beq L(b00)
- cmp r6, #2
- bcc L(b01)
- beq L(b10)
-
-L(b11): ldr r5, [up], #4
- ldr r7, [vp], #4
- LOGOP( r9, r5, r7)
- ldrd r4, r5, [up, #0]
- ldrd r6, r7, [vp, #0]
- POSTOP( r9)
- str r9, [rp], #-4
- b L(lo)
-
-L(b00): ldrd r4, r5, [up], #-8
- ldrd r6, r7, [vp], #-8
- sub rp, rp, #16
- b L(mid)
-
-L(b01): ldr r5, [up], #-4
- ldr r7, [vp], #-4
- LOGOP( r9, r5, r7)
- POSTOP( r9)
- str r9, [rp], #-12
- tst n, n
- beq L(wd1)
-L(gt1): ldrd r4, r5, [up, #8]
- ldrd r6, r7, [vp, #8]
- b L(mid)
-
-L(b10): ldrd r4, r5, [up]
- ldrd r6, r7, [vp]
- sub rp, rp, #8
- b L(lo)
-')
- ALIGN(16)
-ifelse(UNROLL,4,`
-L(top): ldrd r4, r5, [up, #8]
- ldrd r6, r7, [vp, #8]
- POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #8]
-L(mid): LOGOP( r8, r4, r6)
- LOGOP( r9, r5, r7)
- ldrd r4, r5, [up, #16]!
- ldrd r6, r7, [vp, #16]!
- POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #16]!
- sub n, n, #1
-L(lo): LOGOP( r8, r4, r6)
- LOGOP( r9, r5, r7)
- tst n, n
- bne L(top)
-')
-ifelse(UNROLL,4x2,`
-L(top): ldrd r4, r5, [up, #8]
- ldrd r6, r7, [vp, #8]
- POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #8]
-L(mid): LOGOP( r8, r4, r6)
- LOGOP( r9, r5, r7)
- ldrd r4, r5, [up, #16]
- ldrd r6, r7, [vp, #16]
- POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #16]
- LOGOP( r8, r4, r6)
- LOGOP( r9, r5, r7)
- sub n, n, #2
- tst n, n
- bmi L(dne)
- ldrd r4, r5, [up, #24]
- ldrd r6, r7, [vp, #24]
- POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #24]
- LOGOP( r8, r4, r6)
- LOGOP( r9, r5, r7)
- ldrd r4, r5, [up, #32]!
- ldrd r6, r7, [vp, #32]!
- POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #32]!
-L(lo): LOGOP( r8, r4, r6)
- LOGOP( r9, r5, r7)
- tst n, n
- bne L(top)
-')
-
-L(end): POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #8]
-L(wd1): pop { r4-r9 }
- bx r14
-ifelse(UNROLL,4x2,`
-L(dne): POSTOP( r8)
- POSTOP( r9)
- strd r8, r9, [rp, #24]
- pop { r4-r9 }
- bx r14
-')
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/mul_1.asm b/gmp/mpn/arm/v7a/cora15/mul_1.asm
deleted file mode 100644
index 766ba5c57f..0000000000
--- a/gmp/mpn/arm/v7a/cora15/mul_1.asm
+++ /dev/null
@@ -1,104 +0,0 @@
-dnl ARM mpn_mul_1 optimised for A15.
-
-dnl Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb best
-C StrongARM: -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 5.25 3.25
-C Cortex-A15 2.25 this
-
-
-C This runs well on A15 but very poorly on A9. By scheduling loads and adds
-C it is possible to get good A9 performance as well, but at the cost of using
-C many more (callee-saves) registers.
-
-C This is armv5 code, optimized for the armv7a cpu A15. Its location in the
-C GMP file structure might be misleading.
-
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-define(`v0', `r3')
-
-ASM_START()
-PROLOGUE(mpn_mul_1c)
- ldr r12, [sp]
- b L(ent)
-EPILOGUE()
-PROLOGUE(mpn_mul_1)
- mov r12, #0
-L(ent): push {r4-r7}
-
- ldr r6, [up], #4
- tst n, #1
- beq L(bx0)
-
-L(bx1): umull r4, r7, r6, v0
- adds r4, r4, r12
- tst n, #2
- beq L(lo1)
- b L(lo3)
-
-L(bx0): umull r4, r5, r6, v0
- adds r4, r4, r12
- tst n, #2
- beq L(lo0)
- b L(lo2)
-
-L(top): ldr r6, [up], #4
- str r4, [rp], #4
- umull r4, r5, r6, v0
- adds r4, r4, r7
-L(lo0): ldr r6, [up], #4
- str r4, [rp], #4
- umull r4, r7, r6, v0
- adcs r4, r4, r5
-L(lo3): ldr r6, [up], #4
- str r4, [rp], #4
- umull r4, r5, r6, v0
- adcs r4, r4, r7
-L(lo2): ldr r6, [up], #4
- str r4, [rp], #4
- umull r4, r7, r6, v0
- adcs r4, r4, r5
-L(lo1): adc r7, r7, #0
- subs n, n, #4
- bgt L(top)
-
- str r4, [rp]
- mov r0, r7
- pop {r4-r7}
- bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
deleted file mode 100644
index d8cfe3f78f..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-
-ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
-
-include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
deleted file mode 100644
index b48204d926..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-
-ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
-
-include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
deleted file mode 100644
index 16c34a2699..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 5.25
-C Cortex-A15 2.25
-
-C TODO
-C * Consider using 4-way feed-in code.
-C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
-C insufficiently for A7 and A8.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
-
-ifdef(`DO_add', `
- define(`ADCSBCS', `adcs $1, $2, $3')
- define(`CLRCY', `cmn r13, #1')
- define(`RETVAL', `adc r0, $1, #0')
- define(`func', mpn_addlsh`'LSH`'_n)')
-ifdef(`DO_sub', `
- define(`ADCSBCS', `sbcs $1, $2, $3')
- define(`CLRCY', `cmp r13, #0')
- define(`RETVAL', `sbc $2, $2, $2
- cmn $2, #1
- adc r0, $1, #0')
- define(`func', mpn_sublsh`'LSH`'_n)')
-ifdef(`DO_rsb', `
- define(`ADCSBCS', `sbcs $1, $3, $2')
- define(`CLRCY', `cmp r13, #0')
- define(`RETVAL', `sbc r0, $1, #0')
- define(`func', mpn_rsblsh`'LSH`'_n)')
-
-
-ASM_START()
-PROLOGUE(func)
- push {r4-r10}
- vmov.i8 d0, #0 C could feed carry through here
- CLRCY
- tst n, #1
- beq L(bb0)
-
-L(bb1): vld1.32 {d3[0]}, [vp]!
- vsli.u32 d0, d3, #LSH
- ldr r12, [up], #4
- vmov.32 r5, d0[0]
- vshr.u32 d0, d3, #32-LSH
- ADCSBCS( r12, r12, r5)
- str r12, [rp], #4
- bics n, n, #1
- beq L(rtn)
-
-L(bb0): tst n, #2
- beq L(b00)
-
-L(b10): vld1.32 {d3}, [vp]!
- vsli.u64 d0, d3, #LSH
- ldmia up!, {r10,r12}
- vmov r4, r5, d0
- vshr.u64 d0, d3, #64-LSH
- ADCSBCS( r10, r10, r4)
- ADCSBCS( r12, r12, r5)
- stmia rp!, {r10,r12}
- bics n, n, #2
- beq L(rtn)
-
-L(b00): vld1.32 {d2}, [vp]!
- vsli.u64 d0, d2, #LSH
- vshr.u64 d1, d2, #64-LSH
- vld1.32 {d3}, [vp]!
- vsli.u64 d1, d3, #LSH
- vmov r6, r7, d0
- vshr.u64 d0, d3, #64-LSH
- sub n, n, #4
- tst n, n
- beq L(end)
-
- ALIGN(16)
-L(top): ldmia up!, {r8,r9,r10,r12}
- vld1.32 {d2}, [vp]!
- vsli.u64 d0, d2, #LSH
- vmov r4, r5, d1
- vshr.u64 d1, d2, #64-LSH
- ADCSBCS( r8, r8, r6)
- ADCSBCS( r9, r9, r7)
- vld1.32 {d3}, [vp]!
- vsli.u64 d1, d3, #LSH
- vmov r6, r7, d0
- vshr.u64 d0, d3, #64-LSH
- ADCSBCS( r10, r10, r4)
- ADCSBCS( r12, r12, r5)
- stmia rp!, {r8,r9,r10,r12}
- sub n, n, #4
- tst n, n
- bne L(top)
-
-L(end): ldmia up!, {r8,r9,r10,r12}
- vmov r4, r5, d1
- ADCSBCS( r8, r8, r6)
- ADCSBCS( r9, r9, r7)
- ADCSBCS( r10, r10, r4)
- ADCSBCS( r12, r12, r5)
- stmia rp!, {r8,r9,r10,r12}
-L(rtn): vmov.32 r0, d0[0]
- RETVAL( r0, r1)
- pop {r4-r10}
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/com.asm b/gmp/mpn/arm/v7a/cora15/neon/com.asm
deleted file mode 100644
index 9e7a629287..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/com.asm
+++ /dev/null
@@ -1,97 +0,0 @@
-dnl ARM Neon mpn_com optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM ?
-C XScale ?
-C Cortex-A8 ?
-C Cortex-A9 2.1
-C Cortex-A15 0.65
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_com)
- cmp n, #7
- ble L(bc)
-
-C Perform a few initial operation until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- vld1.32 {d0[0]}, [up]!
- sub n, n, #1
- vmvn d0, d0
- vst1.32 {d0[0]}, [rp]!
-L(al1): tst rp, #8
- beq L(al2)
- vld1.32 {d0}, [up]!
- sub n, n, #2
- vmvn d0, d0
- vst1.32 {d0}, [rp:64]!
-L(al2): vld1.32 {q2}, [up]!
- subs n, n, #12
- blt L(end)
-
- ALIGN(16)
-L(top): vld1.32 {q0}, [up]!
- vmvn q2, q2
- subs n, n, #8
- vst1.32 {q2}, [rp:128]!
- vld1.32 {q2}, [up]!
- vmvn q0, q0
- vst1.32 {q0}, [rp:128]!
- bge L(top)
-
-L(end): vmvn q2, q2
- vst1.32 {q2}, [rp:128]!
-
-C Handle last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- vld1.32 {q0}, [up]!
- vmvn q0, q0
- vst1.32 {q0}, [rp]!
-L(tl1): tst n, #2
- beq L(tl2)
- vld1.32 {d0}, [up]!
- vmvn d0, d0
- vst1.32 {d0}, [rp]!
-L(tl2): tst n, #1
- beq L(tl3)
- vld1.32 {d0[0]}, [up]
- vmvn d0, d0
- vst1.32 {d0[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm b/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
deleted file mode 100644
index 98fe535def..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
+++ /dev/null
@@ -1,110 +0,0 @@
-dnl ARM Neon mpn_copyd optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 1.75 slower than core register code
-C Cortex-A15 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
- add rp, rp, n, lsl #2
- add up, up, n, lsl #2
-
- cmp n, #7
- ble L(bc)
-
-C Copy until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- sub up, up, #4
- vld1.32 {d22[0]}, [up]
- sub n, n, #1
- sub rp, rp, #4
- vst1.32 {d22[0]}, [rp]
-L(al1): tst rp, #8
- beq L(al2)
- sub up, up, #8
- vld1.32 {d22}, [up]
- sub n, n, #2
- sub rp, rp, #8
- vst1.32 {d22}, [rp:64]
-L(al2): sub up, up, #16
- vld1.32 {d26-d27}, [up]
- subs n, n, #12
- sub rp, rp, #16 C offset rp for loop
- blt L(end)
-
- sub up, up, #16 C offset up for loop
- mov r12, #-16
-
- ALIGN(16)
-L(top): vld1.32 {d22-d23}, [up], r12
- vst1.32 {d26-d27}, [rp:128], r12
- vld1.32 {d26-d27}, [up], r12
- vst1.32 {d22-d23}, [rp:128], r12
- subs n, n, #8
- bge L(top)
-
- add up, up, #16 C undo up offset
- C rp offset undoing folded
-L(end): vst1.32 {d26-d27}, [rp:128]
-
-C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- sub up, up, #16
- vld1.32 {d22-d23}, [up]
- sub rp, rp, #16
- vst1.32 {d22-d23}, [rp]
-L(tl1): tst n, #2
- beq L(tl2)
- sub up, up, #8
- vld1.32 {d22}, [up]
- sub rp, rp, #8
- vst1.32 {d22}, [rp]
-L(tl2): tst n, #1
- beq L(tl3)
- sub up, up, #4
- vld1.32 {d22[0]}, [up]
- sub rp, rp, #4
- vst1.32 {d22[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm b/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
deleted file mode 100644
index 2e05afe5e8..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
+++ /dev/null
@@ -1,90 +0,0 @@
-dnl ARM Neon mpn_copyi optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 1.75 slower than core register code
-C Cortex-A15 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
- cmp n, #7
- ble L(bc)
-
-C Copy until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- vld1.32 {d22[0]}, [up]!
- sub n, n, #1
- vst1.32 {d22[0]}, [rp]!
-L(al1): tst rp, #8
- beq L(al2)
- vld1.32 {d22}, [up]!
- sub n, n, #2
- vst1.32 {d22}, [rp:64]!
-L(al2): vld1.32 {d26-d27}, [up]!
- subs n, n, #12
- blt L(end)
-
- ALIGN(16)
-L(top): vld1.32 {d22-d23}, [up]!
- vst1.32 {d26-d27}, [rp:128]!
- vld1.32 {d26-d27}, [up]!
- vst1.32 {d22-d23}, [rp:128]!
- subs n, n, #8
- bge L(top)
-
-L(end): vst1.32 {d26-d27}, [rp:128]!
-
-C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- vld1.32 {d22-d23}, [up]!
- vst1.32 {d22-d23}, [rp]!
-L(tl1): tst n, #2
- beq L(tl2)
- vld1.32 {d22}, [up]!
- vst1.32 {d22}, [rp]!
-L(tl2): tst n, #1
- beq L(tl3)
- vld1.32 {d22[0]}, [up]
- vst1.32 {d22[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm b/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
deleted file mode 100644
index 2c11d6debd..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
+++ /dev/null
@@ -1,177 +0,0 @@
-dnl ARM Neon mpn_rsh1add_n, mpn_rsh1sub_n.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 4-5
-C Cortex-A15 2.5
-
-C TODO
-C * Try to make this smaller, its size (384 bytes) is excessive.
-C * Try to reach 2.25 c/l on A15, to match the addlsh_1 family.
-C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
-C insufficiently for A7 and A8.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
-
-ifdef(`OPERATION_rsh1add_n', `
- define(`ADDSUBS', `adds $1, $2, $3')
- define(`ADCSBCS', `adcs $1, $2, $3')
- define(`IFADD', `$1')
- define(`IFSUB', `')
- define(`func', mpn_rsh1add_n)')
-ifdef(`OPERATION_rsh1sub_n', `
- define(`ADDSUBS', `subs $1, $2, $3')
- define(`ADCSBCS', `sbcs $1, $2, $3')
- define(`IFADD', `')
- define(`IFSUB', `$1')
- define(`func', mpn_rsh1sub_n)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
-
-ASM_START()
-PROLOGUE(func)
- push {r4-r10}
-
- ands r4, n, #3
- beq L(b00)
- cmp r4, #2
- blo L(b01)
- beq L(b10)
-
-L(b11): ldmia up!, {r9,r10,r12}
- ldmia vp!, {r5,r6,r7}
- ADDSUBS( r9, r9, r5)
- vmov d4, r9, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vshr.u64 d3, d4, #1
- vmov d1, r10, r12
- vsli.u64 d3, d1, #31
- vshr.u64 d2, d1, #1
- vst1.32 d3[0], [rp]!
- bics n, n, #3
- beq L(wd2)
-L(gt3): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- b L(mi0)
-
-L(b10): ldmia up!, {r10,r12}
- ldmia vp!, {r6,r7}
- ADDSUBS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vmov d4, r10, r12
- bics n, n, #2
- vshr.u64 d2, d4, #1
- beq L(wd2)
-L(gt2): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- b L(mi0)
-
-L(b01): ldr r12, [up], #4
- ldr r7, [vp], #4
- ADDSUBS( r12, r12, r7)
- vmov d4, r12, r12
- bics n, n, #1
- bne L(gt1)
- mov r5, r12, lsr #1
-IFADD(` adc r1, n, #0')
-IFSUB(` adc r1, n, #1')
- bfi r5, r1, #31, #1
- str r5, [rp]
- and r0, r12, #1
- pop {r4-r10}
- bx r14
-L(gt1): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- vshr.u64 d2, d4, #1
- ADCSBCS( r8, r8, r4)
- ADCSBCS( r9, r9, r5)
- vmov d0, r8, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vsli.u64 d2, d0, #31
- vshr.u64 d3, d0, #1
- vst1.32 d2[0], [rp]!
- b L(mi1)
-
-L(b00): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- ADDSUBS( r8, r8, r4)
- ADCSBCS( r9, r9, r5)
- vmov d4, r8, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vshr.u64 d3, d4, #1
- b L(mi1)
-
- ALIGN(16)
-L(top): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- vsli.u64 d3, d1, #63
- vshr.u64 d2, d1, #1
- vst1.32 d3, [rp]!
-L(mi0): ADCSBCS( r8, r8, r4)
- ADCSBCS( r9, r9, r5)
- vmov d0, r8, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vsli.u64 d2, d0, #63
- vshr.u64 d3, d0, #1
- vst1.32 d2, [rp]!
-L(mi1): vmov d1, r10, r12
- sub n, n, #4
- tst n, n
- bne L(top)
-
-L(end): vsli.u64 d3, d1, #63
- vshr.u64 d2, d1, #1
- vst1.32 d3, [rp]!
-L(wd2): vmov r4, r5, d2
-IFADD(` adc r1, n, #0')
-IFSUB(` adc r1, n, #1')
- bfi r5, r1, #31, #1
- stm rp, {r4,r5}
-
-L(rtn): vmov.32 r0, d4[0]
- and r0, r0, #1
- pop {r4-r10}
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/submul_1.asm b/gmp/mpn/arm/v7a/cora15/submul_1.asm
deleted file mode 100644
index ed7bfe820b..0000000000
--- a/gmp/mpn/arm/v7a/cora15/submul_1.asm
+++ /dev/null
@@ -1,159 +0,0 @@
-dnl ARM mpn_submul_1 optimised for A15.
-
-dnl Copyright 2012, 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb best
-C StrongARM: -
-C XScale ?
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 5.75 3.75
-C Cortex-A15 2.32 this
-
-C This code uses umlal and umaal for adding in the rp[] data, keeping the
-C recurrency path separate from any multiply instructions. It performs well on
-C A15, but not quite at the multiply bandwidth like the corresponding addmul_1
-C code.
-C
-C We don't use r12 due to ldrd and strd limitations.
-C
-C This loop complements U on the fly,
-C U' = B^n - 1 - U
-C and then uses that
-C R - U*v = R + U'*v + v - B^n v
-
-C Architecture requirements:
-C v5 -
-C v5t -
-C v5te ldrd strd
-C v6 umaal
-C v6t2 -
-C v7a -
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-define(`v0', `r3')
-
-define(`w0', `r10') define(`w1', `r11')
-define(`u0', `r8') define(`u1', `r9')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
- sub sp, sp, #32
- strd r10, r11, [sp, #24]
- strd r8, r9, [sp, #16]
- strd r6, r7, [sp, #8]
- strd r4, r5, [sp, #0]
-C push { r4-r11 }
-
- ands r6, n, #3
- sub n, n, #3
- beq L(b00)
- cmp r6, #2
- bcc L(b01)
- beq L(b10)
-
-L(b11): mov r6, #0
- ldr u1, [up], #-4
- ldr w1, [rp], #-16
- mvn u1, u1
- adds r7, v0, #0
- b L(mid)
-
-L(b00): ldrd u0, u1, [up]
- ldrd w0, w1, [rp], #-12
- mvn u0, u0
- mvn u1, u1
- mov r6, v0
- umaal w0, r6, u0, v0
- cmn r13, #0 C carry clear
- mov r7, #0
- str w0, [rp, #12]
- b L(mid)
-
-L(b10): ldrd u0, u1, [up], #8
- ldrd w0, w1, [rp]
- mvn u0, u0
- mvn u1, u1
- mov r4, v0
- umaal w0, r4, u0, v0
- mov r5, #0
- str w0, [rp], #-4
- umlal w1, r5, u1, v0
- adds n, n, #0
- bmi L(end)
- b L(top)
-
-L(b01): ldr u1, [up], #4
- ldr w1, [rp], #-8
- mvn u1, u1
- mov r5, v0
- mov r4, #0
- umaal w1, r5, u1, v0
- tst n, n
- bmi L(end)
-
-C ALIGN(16)
-L(top): ldrd u0, u1, [up, #0]
- adcs r4, r4, w1
- mvn u0, u0
- ldrd w0, w1, [rp, #12]
- mvn u1, u1
- mov r6, #0
- umlal w0, r6, u0, v0 C 1 2
- adcs r5, r5, w0
- mov r7, #0
- strd r4, r5, [rp, #8]
-L(mid): umaal w1, r7, u1, v0 C 2 3
- ldrd u0, u1, [up, #8]
- add up, up, #16
- adcs r6, r6, w1
- mvn u0, u0
- ldrd w0, w1, [rp, #20]
- mvn u1, u1
- mov r4, #0
- umlal w0, r4, u0, v0 C 3 4
- adcs r7, r7, w0
- mov r5, #0
- strd r6, r7, [rp, #16]!
- sub n, n, #4
- umlal w1, r5, u1, v0 C 0 1
- tst n, n
- bpl L(top)
-
-L(end): adcs r4, r4, w1
- str r4, [rp, #8]
- adc r0, r5, #0
- sub r0, v0, r0
- pop { r4-r11 }
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora9/gmp-mparam.h b/gmp/mpn/arm/v7a/cora9/gmp-mparam.h
deleted file mode 100644
index 9660257820..0000000000
--- a/gmp/mpn/arm/v7a/cora9/gmp-mparam.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* gmp-mparam.h -- Compiler/machine parameter header file.
-
-Copyright 1991, 1993, 1994, 1999-2003, 2009, 2010, 2012-2014 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
-or
-
- * the GNU General Public License as published by the Free Software
- Foundation; either version 2 of the License, or (at your option) any
- later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library. If not,
-see https://www.gnu.org/licenses/. */
-
-#define GMP_LIMB_BITS 32
-#define GMP_LIMB_BYTES 4
-
-/* 1000MHz Cortex-A9 */
-/* FFT tuning limit = 25000000 */
-/* Generated by tuneup.c, 2014-03-12, gcc 4.6 */
-
-#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
-#define USE_PREINV_DIVREM_1 1 /* native */
-#define DIV_QR_1N_PI1_METHOD 1
-#define DIV_QR_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
-#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 20
-
-#define MUL_TOOM22_THRESHOLD 45
-#define MUL_TOOM33_THRESHOLD 129
-#define MUL_TOOM44_THRESHOLD 387
-#define MUL_TOOM6H_THRESHOLD 517
-#define MUL_TOOM8H_THRESHOLD 774
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 137
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 222
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 137
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 235
-#define MUL_TOOM43_TO_TOOM54_THRESHOLD 208
-
-#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 54
-#define SQR_TOOM3_THRESHOLD 181
-#define SQR_TOOM4_THRESHOLD 490
-#define SQR_TOOM6_THRESHOLD 656
-#define SQR_TOOM8_THRESHOLD 0 /* always */
-
-#define MULMID_TOOM42_THRESHOLD 64
-
-#define MULMOD_BNM1_THRESHOLD 26
-#define SQRMOD_BNM1_THRESHOLD 28
-
-#define MUL_FFT_MODF_THRESHOLD 624 /* k = 5 */
-#define MUL_FFT_TABLE3 \
- { { 624, 5}, { 28, 6}, { 15, 5}, { 34, 6}, \
- { 18, 5}, { 37, 6}, { 28, 7}, { 15, 6}, \
- { 36, 7}, { 19, 6}, { 40, 7}, { 21, 6}, \
- { 43, 7}, { 23, 6}, { 47, 7}, { 25, 6}, \
- { 51, 7}, { 27, 6}, { 55, 7}, { 29, 8}, \
- { 15, 7}, { 31, 6}, { 63, 7}, { 37, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
- { 27, 7}, { 57, 9}, { 15, 8}, { 31, 7}, \
- { 65, 8}, { 35, 7}, { 71, 8}, { 43, 9}, \
- { 23, 8}, { 55,10}, { 15, 9}, { 31, 8}, \
- { 71, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \
- { 99, 9}, { 55,10}, { 31, 9}, { 79,10}, \
- { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
- { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \
- { 191,10}, { 111,11}, { 63,10}, { 159,11}, \
- { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511,10}, { 271,11}, \
- { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
- { 671,10}, { 351,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399, 9}, { 799,10}, { 415,11}, \
- { 223,12}, { 127,11}, { 255,10}, { 511, 9}, \
- { 1023,10}, { 543,11}, { 287,10}, { 575, 9}, \
- { 1151,11}, { 319,10}, { 671,11}, { 351,12}, \
- { 191,11}, { 383,10}, { 799,11}, { 415,10}, \
- { 831,13}, { 127,12}, { 255,11}, { 511,10}, \
- { 1023,11}, { 607,12}, { 319,11}, { 735,12}, \
- { 383,11}, { 863,12}, { 447,11}, { 959,13}, \
- { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
- { 1215,12}, { 639,11}, { 1279,12}, { 703,13}, \
- { 383,12}, { 767,11}, { 1535,12}, { 831,11}, \
- { 1663,12}, { 959,14}, { 255,13}, { 511,12}, \
- { 1023,11}, { 2047,12}, { 1215,13}, { 639,12}, \
- { 1407,13}, { 767,12}, { 1663,13}, { 895,12}, \
- { 1791,14}, { 511,13}, { 1023,12}, { 2111,13}, \
- { 1151,12}, { 2431,13}, { 1279,12}, { 2559,13}, \
- { 1407,14}, { 767,13}, { 1535,12}, { 3071,13}, \
- { 1663,12}, { 3455,13}, { 1791,15}, { 511,14}, \
- { 1023,13}, { 2047,12}, { 4095,13}, { 2175,12}, \
- { 4351,13}, { 2431,14}, { 1279,13}, { 2559,12}, \
- { 5119,13}, { 2815,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD 6784
-
-#define SQR_FFT_MODF_THRESHOLD 560 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 560, 5}, { 19, 4}, { 39, 5}, { 21, 4}, \
- { 43, 5}, { 29, 6}, { 15, 5}, { 33, 6}, \
- { 17, 5}, { 35, 6}, { 36, 7}, { 19, 6}, \
- { 40, 7}, { 21, 6}, { 43, 7}, { 23, 6}, \
- { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \
- { 27, 7}, { 55, 9}, { 15, 8}, { 31, 7}, \
- { 65, 8}, { 35, 7}, { 71, 8}, { 43, 9}, \
- { 23, 8}, { 55, 9}, { 31, 8}, { 71, 9}, \
- { 39, 8}, { 83, 9}, { 47, 8}, { 95, 9}, \
- { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
- { 103,11}, { 31,10}, { 63, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 191,10}, \
- { 111,11}, { 63,10}, { 159,11}, { 95,10}, \
- { 191, 9}, { 383,10}, { 207,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511, 8}, { 1023, 9}, \
- { 543,10}, { 287,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \
- { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
- { 799,10}, { 415, 9}, { 831,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 511, 9}, { 1023,10}, \
- { 543,11}, { 287,10}, { 575, 9}, { 1151,10}, \
- { 607,11}, { 319,10}, { 671,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 799,11}, \
- { 415,10}, { 831,13}, { 127,11}, { 511,10}, \
- { 1023,11}, { 543,10}, { 1087,11}, { 575,10}, \
- { 1151,11}, { 607,12}, { 319,11}, { 671,10}, \
- { 1343,11}, { 735,12}, { 383,11}, { 863,12}, \
- { 447,11}, { 959,12}, { 511,11}, { 1087,12}, \
- { 575,11}, { 1215,12}, { 639,11}, { 1343,12}, \
- { 703,11}, { 1407,13}, { 383,12}, { 767,11}, \
- { 1599,12}, { 831,11}, { 1663,12}, { 895,11}, \
- { 1791,12}, { 959,13}, { 511,12}, { 1023,11}, \
- { 2047,12}, { 1215,13}, { 639,12}, { 1407,13}, \
- { 767,12}, { 1663,13}, { 895,12}, { 1791,14}, \
- { 511,13}, { 1023,12}, { 2111,13}, { 1151,12}, \
- { 2431,13}, { 1279,12}, { 2559,13}, { 1407,14}, \
- { 767,13}, { 1535,12}, { 3071,13}, { 1663,12}, \
- { 3455,13}, { 1791,15}, { 511,14}, { 1023,13}, \
- { 2047,12}, { 4095,13}, { 2175,12}, { 4351,13}, \
- { 2431,14}, { 1279,13}, { 2559,12}, { 5119,13}, \
- { 2815,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 167
-#define SQR_FFT_THRESHOLD 5312
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 38
-#define MULLO_MUL_N_THRESHOLD 13463
-
-#define DC_DIV_QR_THRESHOLD 42
-#define DC_DIVAPPR_Q_THRESHOLD 100
-#define DC_BDIV_QR_THRESHOLD 43
-#define DC_BDIV_Q_THRESHOLD 104
-
-#define INV_MULMOD_BNM1_THRESHOLD 98
-#define INV_NEWTON_THRESHOLD 138
-#define INV_APPR_THRESHOLD 133
-
-#define BINV_NEWTON_THRESHOLD 333
-#define REDC_1_TO_REDC_2_THRESHOLD 2
-#define REDC_2_TO_REDC_N_THRESHOLD 142
-
-#define MU_DIV_QR_THRESHOLD 2350
-#define MU_DIVAPPR_Q_THRESHOLD 2259
-#define MUPI_DIV_QR_THRESHOLD 70
-#define MU_BDIV_QR_THRESHOLD 2089
-#define MU_BDIV_Q_THRESHOLD 2172
-
-#define POWM_SEC_TABLE 37,48,81,615,1925
-
-#define MATRIX22_STRASSEN_THRESHOLD 22
-#define HGCD_THRESHOLD 64
-#define HGCD_APPR_THRESHOLD 50
-#define HGCD_REDUCE_THRESHOLD 4284
-#define GCD_DC_THRESHOLD 416
-#define GCDEXT_DC_THRESHOLD 298
-#define JACOBI_BASE_METHOD 4
-
-#define GET_STR_DC_THRESHOLD 18
-#define GET_STR_PRECOMPUTE_THRESHOLD 33
-#define SET_STR_DC_THRESHOLD 140
-#define SET_STR_PRECOMPUTE_THRESHOLD 748
-
-#define FAC_DSC_THRESHOLD 309
-#define FAC_ODD_THRESHOLD 29