summaryrefslogtreecommitdiff
path: root/gmp/mpn/arm/v7a/cora15/neon
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/arm/v7a/cora15/neon')
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm43
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm43
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm144
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/com.asm97
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/copyd.asm110
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/copyi.asm90
-rw-r--r--gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm177
7 files changed, 0 insertions, 704 deletions
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
deleted file mode 100644
index d8cfe3f78f..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 1)
-
-ifdef(`OPERATION_addlsh1_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n)
-
-include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
deleted file mode 100644
index b48204d926..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm
+++ /dev/null
@@ -1,43 +0,0 @@
-dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-define(LSH, 2)
-
-ifdef(`OPERATION_addlsh2_n',`define(`DO_add')')
-ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')')
-ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')')
-
-MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n)
-
-include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm')
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
deleted file mode 100644
index 16c34a2699..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 5.25
-C Cortex-A15 2.25
-
-C TODO
-C * Consider using 4-way feed-in code.
-C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
-C insufficiently for A7 and A8.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
-
-ifdef(`DO_add', `
- define(`ADCSBCS', `adcs $1, $2, $3')
- define(`CLRCY', `cmn r13, #1')
- define(`RETVAL', `adc r0, $1, #0')
- define(`func', mpn_addlsh`'LSH`'_n)')
-ifdef(`DO_sub', `
- define(`ADCSBCS', `sbcs $1, $2, $3')
- define(`CLRCY', `cmp r13, #0')
- define(`RETVAL', `sbc $2, $2, $2
- cmn $2, #1
- adc r0, $1, #0')
- define(`func', mpn_sublsh`'LSH`'_n)')
-ifdef(`DO_rsb', `
- define(`ADCSBCS', `sbcs $1, $3, $2')
- define(`CLRCY', `cmp r13, #0')
- define(`RETVAL', `sbc r0, $1, #0')
- define(`func', mpn_rsblsh`'LSH`'_n)')
-
-
-ASM_START()
-PROLOGUE(func)
- push {r4-r10}
- vmov.i8 d0, #0 C could feed carry through here
- CLRCY
- tst n, #1
- beq L(bb0)
-
-L(bb1): vld1.32 {d3[0]}, [vp]!
- vsli.u32 d0, d3, #LSH
- ldr r12, [up], #4
- vmov.32 r5, d0[0]
- vshr.u32 d0, d3, #32-LSH
- ADCSBCS( r12, r12, r5)
- str r12, [rp], #4
- bics n, n, #1
- beq L(rtn)
-
-L(bb0): tst n, #2
- beq L(b00)
-
-L(b10): vld1.32 {d3}, [vp]!
- vsli.u64 d0, d3, #LSH
- ldmia up!, {r10,r12}
- vmov r4, r5, d0
- vshr.u64 d0, d3, #64-LSH
- ADCSBCS( r10, r10, r4)
- ADCSBCS( r12, r12, r5)
- stmia rp!, {r10,r12}
- bics n, n, #2
- beq L(rtn)
-
-L(b00): vld1.32 {d2}, [vp]!
- vsli.u64 d0, d2, #LSH
- vshr.u64 d1, d2, #64-LSH
- vld1.32 {d3}, [vp]!
- vsli.u64 d1, d3, #LSH
- vmov r6, r7, d0
- vshr.u64 d0, d3, #64-LSH
- sub n, n, #4
- tst n, n
- beq L(end)
-
- ALIGN(16)
-L(top): ldmia up!, {r8,r9,r10,r12}
- vld1.32 {d2}, [vp]!
- vsli.u64 d0, d2, #LSH
- vmov r4, r5, d1
- vshr.u64 d1, d2, #64-LSH
- ADCSBCS( r8, r8, r6)
- ADCSBCS( r9, r9, r7)
- vld1.32 {d3}, [vp]!
- vsli.u64 d1, d3, #LSH
- vmov r6, r7, d0
- vshr.u64 d0, d3, #64-LSH
- ADCSBCS( r10, r10, r4)
- ADCSBCS( r12, r12, r5)
- stmia rp!, {r8,r9,r10,r12}
- sub n, n, #4
- tst n, n
- bne L(top)
-
-L(end): ldmia up!, {r8,r9,r10,r12}
- vmov r4, r5, d1
- ADCSBCS( r8, r8, r6)
- ADCSBCS( r9, r9, r7)
- ADCSBCS( r10, r10, r4)
- ADCSBCS( r12, r12, r5)
- stmia rp!, {r8,r9,r10,r12}
-L(rtn): vmov.32 r0, d0[0]
- RETVAL( r0, r1)
- pop {r4-r10}
- bx r14
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/com.asm b/gmp/mpn/arm/v7a/cora15/neon/com.asm
deleted file mode 100644
index 9e7a629287..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/com.asm
+++ /dev/null
@@ -1,97 +0,0 @@
-dnl ARM Neon mpn_com optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM ?
-C XScale ?
-C Cortex-A8 ?
-C Cortex-A9 2.1
-C Cortex-A15 0.65
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_com)
- cmp n, #7
- ble L(bc)
-
-C Perform a few initial operation until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- vld1.32 {d0[0]}, [up]!
- sub n, n, #1
- vmvn d0, d0
- vst1.32 {d0[0]}, [rp]!
-L(al1): tst rp, #8
- beq L(al2)
- vld1.32 {d0}, [up]!
- sub n, n, #2
- vmvn d0, d0
- vst1.32 {d0}, [rp:64]!
-L(al2): vld1.32 {q2}, [up]!
- subs n, n, #12
- blt L(end)
-
- ALIGN(16)
-L(top): vld1.32 {q0}, [up]!
- vmvn q2, q2
- subs n, n, #8
- vst1.32 {q2}, [rp:128]!
- vld1.32 {q2}, [up]!
- vmvn q0, q0
- vst1.32 {q0}, [rp:128]!
- bge L(top)
-
-L(end): vmvn q2, q2
- vst1.32 {q2}, [rp:128]!
-
-C Handle last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- vld1.32 {q0}, [up]!
- vmvn q0, q0
- vst1.32 {q0}, [rp]!
-L(tl1): tst n, #2
- beq L(tl2)
- vld1.32 {d0}, [up]!
- vmvn d0, d0
- vst1.32 {d0}, [rp]!
-L(tl2): tst n, #1
- beq L(tl3)
- vld1.32 {d0[0]}, [up]
- vmvn d0, d0
- vst1.32 {d0[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm b/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
deleted file mode 100644
index 98fe535def..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm
+++ /dev/null
@@ -1,110 +0,0 @@
-dnl ARM Neon mpn_copyd optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 1.75 slower than core register code
-C Cortex-A15 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyd)
- add rp, rp, n, lsl #2
- add up, up, n, lsl #2
-
- cmp n, #7
- ble L(bc)
-
-C Copy until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- sub up, up, #4
- vld1.32 {d22[0]}, [up]
- sub n, n, #1
- sub rp, rp, #4
- vst1.32 {d22[0]}, [rp]
-L(al1): tst rp, #8
- beq L(al2)
- sub up, up, #8
- vld1.32 {d22}, [up]
- sub n, n, #2
- sub rp, rp, #8
- vst1.32 {d22}, [rp:64]
-L(al2): sub up, up, #16
- vld1.32 {d26-d27}, [up]
- subs n, n, #12
- sub rp, rp, #16 C offset rp for loop
- blt L(end)
-
- sub up, up, #16 C offset up for loop
- mov r12, #-16
-
- ALIGN(16)
-L(top): vld1.32 {d22-d23}, [up], r12
- vst1.32 {d26-d27}, [rp:128], r12
- vld1.32 {d26-d27}, [up], r12
- vst1.32 {d22-d23}, [rp:128], r12
- subs n, n, #8
- bge L(top)
-
- add up, up, #16 C undo up offset
- C rp offset undoing folded
-L(end): vst1.32 {d26-d27}, [rp:128]
-
-C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- sub up, up, #16
- vld1.32 {d22-d23}, [up]
- sub rp, rp, #16
- vst1.32 {d22-d23}, [rp]
-L(tl1): tst n, #2
- beq L(tl2)
- sub up, up, #8
- vld1.32 {d22}, [up]
- sub rp, rp, #8
- vst1.32 {d22}, [rp]
-L(tl2): tst n, #1
- beq L(tl3)
- sub up, up, #4
- vld1.32 {d22[0]}, [up]
- sub rp, rp, #4
- vst1.32 {d22[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm b/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
deleted file mode 100644
index 2e05afe5e8..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm
+++ /dev/null
@@ -1,90 +0,0 @@
-dnl ARM Neon mpn_copyi optimised for A15.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 1.75 slower than core register code
-C Cortex-A15 0.52
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`n', `r2')
-
-ASM_START()
-PROLOGUE(mpn_copyi)
- cmp n, #7
- ble L(bc)
-
-C Copy until rp is 128-bit aligned
- tst rp, #4
- beq L(al1)
- vld1.32 {d22[0]}, [up]!
- sub n, n, #1
- vst1.32 {d22[0]}, [rp]!
-L(al1): tst rp, #8
- beq L(al2)
- vld1.32 {d22}, [up]!
- sub n, n, #2
- vst1.32 {d22}, [rp:64]!
-L(al2): vld1.32 {d26-d27}, [up]!
- subs n, n, #12
- blt L(end)
-
- ALIGN(16)
-L(top): vld1.32 {d22-d23}, [up]!
- vst1.32 {d26-d27}, [rp:128]!
- vld1.32 {d26-d27}, [up]!
- vst1.32 {d22-d23}, [rp:128]!
- subs n, n, #8
- bge L(top)
-
-L(end): vst1.32 {d26-d27}, [rp:128]!
-
-C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we
-C arrive here via L(bc)
-L(bc): tst n, #4
- beq L(tl1)
- vld1.32 {d22-d23}, [up]!
- vst1.32 {d22-d23}, [rp]!
-L(tl1): tst n, #2
- beq L(tl2)
- vld1.32 {d22}, [up]!
- vst1.32 {d22}, [rp]!
-L(tl2): tst n, #1
- beq L(tl3)
- vld1.32 {d22[0]}, [up]
- vst1.32 {d22[0]}, [rp]
-L(tl3): bx lr
-EPILOGUE()
diff --git a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm b/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
deleted file mode 100644
index 2c11d6debd..0000000000
--- a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm
+++ /dev/null
@@ -1,177 +0,0 @@
-dnl ARM Neon mpn_rsh1add_n, mpn_rsh1sub_n.
-
-dnl Contributed to the GNU project by Torbjörn Granlund.
-
-dnl Copyright 2013 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
-dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
-dnl
-dnl or both in parallel, as here.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
-dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM -
-C XScale -
-C Cortex-A7 ?
-C Cortex-A8 ?
-C Cortex-A9 4-5
-C Cortex-A15 2.5
-
-C TODO
-C * Try to make this smaller, its size (384 bytes) is excessive.
-C * Try to reach 2.25 c/l on A15, to match the addlsh_1 family.
-C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps
-C insufficiently for A7 and A8.
-
-define(`rp', `r0')
-define(`up', `r1')
-define(`vp', `r2')
-define(`n', `r3')
-
-ifdef(`OPERATION_rsh1add_n', `
- define(`ADDSUBS', `adds $1, $2, $3')
- define(`ADCSBCS', `adcs $1, $2, $3')
- define(`IFADD', `$1')
- define(`IFSUB', `')
- define(`func', mpn_rsh1add_n)')
-ifdef(`OPERATION_rsh1sub_n', `
- define(`ADDSUBS', `subs $1, $2, $3')
- define(`ADCSBCS', `sbcs $1, $2, $3')
- define(`IFADD', `')
- define(`IFSUB', `$1')
- define(`func', mpn_rsh1sub_n)')
-
-MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
-
-ASM_START()
-PROLOGUE(func)
- push {r4-r10}
-
- ands r4, n, #3
- beq L(b00)
- cmp r4, #2
- blo L(b01)
- beq L(b10)
-
-L(b11): ldmia up!, {r9,r10,r12}
- ldmia vp!, {r5,r6,r7}
- ADDSUBS( r9, r9, r5)
- vmov d4, r9, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vshr.u64 d3, d4, #1
- vmov d1, r10, r12
- vsli.u64 d3, d1, #31
- vshr.u64 d2, d1, #1
- vst1.32 d3[0], [rp]!
- bics n, n, #3
- beq L(wd2)
-L(gt3): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- b L(mi0)
-
-L(b10): ldmia up!, {r10,r12}
- ldmia vp!, {r6,r7}
- ADDSUBS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vmov d4, r10, r12
- bics n, n, #2
- vshr.u64 d2, d4, #1
- beq L(wd2)
-L(gt2): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- b L(mi0)
-
-L(b01): ldr r12, [up], #4
- ldr r7, [vp], #4
- ADDSUBS( r12, r12, r7)
- vmov d4, r12, r12
- bics n, n, #1
- bne L(gt1)
- mov r5, r12, lsr #1
-IFADD(` adc r1, n, #0')
-IFSUB(` adc r1, n, #1')
- bfi r5, r1, #31, #1
- str r5, [rp]
- and r0, r12, #1
- pop {r4-r10}
- bx r14
-L(gt1): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- vshr.u64 d2, d4, #1
- ADCSBCS( r8, r8, r4)
- ADCSBCS( r9, r9, r5)
- vmov d0, r8, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vsli.u64 d2, d0, #31
- vshr.u64 d3, d0, #1
- vst1.32 d2[0], [rp]!
- b L(mi1)
-
-L(b00): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- ADDSUBS( r8, r8, r4)
- ADCSBCS( r9, r9, r5)
- vmov d4, r8, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vshr.u64 d3, d4, #1
- b L(mi1)
-
- ALIGN(16)
-L(top): ldmia up!, {r8,r9,r10,r12}
- ldmia vp!, {r4,r5,r6,r7}
- vsli.u64 d3, d1, #63
- vshr.u64 d2, d1, #1
- vst1.32 d3, [rp]!
-L(mi0): ADCSBCS( r8, r8, r4)
- ADCSBCS( r9, r9, r5)
- vmov d0, r8, r9
- ADCSBCS( r10, r10, r6)
- ADCSBCS( r12, r12, r7)
- vsli.u64 d2, d0, #63
- vshr.u64 d3, d0, #1
- vst1.32 d2, [rp]!
-L(mi1): vmov d1, r10, r12
- sub n, n, #4
- tst n, n
- bne L(top)
-
-L(end): vsli.u64 d3, d1, #63
- vshr.u64 d2, d1, #1
- vst1.32 d3, [rp]!
-L(wd2): vmov r4, r5, d2
-IFADD(` adc r1, n, #0')
-IFSUB(` adc r1, n, #1')
- bfi r5, r1, #31, #1
- stm rp, {r4,r5}
-
-L(rtn): vmov.32 r0, d4[0]
- and r0, r0, #1
- pop {r4-r10}
- bx r14
-EPILOGUE()