diff options
Diffstat (limited to 'gmp/mpn/arm/v7a/cora15/neon')
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm | 43 | ||||
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm | 43 | ||||
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm | 144 | ||||
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/com.asm | 97 | ||||
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/copyd.asm | 110 | ||||
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/copyi.asm | 90 | ||||
-rw-r--r-- | gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm | 177 |
7 files changed, 0 insertions, 704 deletions
diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm deleted file mode 100644 index d8cfe3f78f..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh1_n.asm +++ /dev/null @@ -1,43 +0,0 @@ -dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 1) - -ifdef(`OPERATION_addlsh1_n',`define(`DO_add')') -ifdef(`OPERATION_sublsh1_n',`define(`DO_sub')') -ifdef(`OPERATION_rsblsh1_n',`define(`DO_rsb')') - -MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n mpn_rsblsh1_n) - -include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm') diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm deleted file mode 100644 index b48204d926..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlsh2_n.asm +++ /dev/null @@ -1,43 +0,0 @@ -dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -define(LSH, 2) - -ifdef(`OPERATION_addlsh2_n',`define(`DO_add')') -ifdef(`OPERATION_sublsh2_n',`define(`DO_sub')') -ifdef(`OPERATION_rsblsh2_n',`define(`DO_rsb')') - -MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n mpn_rsblsh2_n) - -include_mpn(`arm/v7a/cora15/neon/aorsorrlshC_n.asm') diff --git a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm b/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm deleted file mode 100644 index 16c34a2699..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/aorsorrlshC_n.asm +++ /dev/null @@ -1,144 +0,0 @@ -dnl ARM mpn_addlshC_n, mpn_sublshC_n, mpn_rsblshC_n - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - - -C cycles/limb -C StrongARM - -C XScale - -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 5.25 -C Cortex-A15 2.25 - -C TODO -C * Consider using 4-way feed-in code. -C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps -C insufficiently for A7 and A8. - -define(`rp', `r0') -define(`up', `r1') -define(`vp', `r2') -define(`n', `r3') - -ifdef(`DO_add', ` - define(`ADCSBCS', `adcs $1, $2, $3') - define(`CLRCY', `cmn r13, #1') - define(`RETVAL', `adc r0, $1, #0') - define(`func', mpn_addlsh`'LSH`'_n)') -ifdef(`DO_sub', ` - define(`ADCSBCS', `sbcs $1, $2, $3') - define(`CLRCY', `cmp r13, #0') - define(`RETVAL', `sbc $2, $2, $2 - cmn $2, #1 - adc r0, $1, #0') - define(`func', mpn_sublsh`'LSH`'_n)') -ifdef(`DO_rsb', ` - define(`ADCSBCS', `sbcs $1, $3, $2') - define(`CLRCY', `cmp r13, #0') - define(`RETVAL', `sbc r0, $1, #0') - define(`func', mpn_rsblsh`'LSH`'_n)') - - -ASM_START() -PROLOGUE(func) - push {r4-r10} - vmov.i8 d0, #0 C could feed carry through here - CLRCY - tst n, #1 - beq L(bb0) - -L(bb1): vld1.32 {d3[0]}, [vp]! - vsli.u32 d0, d3, #LSH - ldr r12, [up], #4 - vmov.32 r5, d0[0] - vshr.u32 d0, d3, #32-LSH - ADCSBCS( r12, r12, r5) - str r12, [rp], #4 - bics n, n, #1 - beq L(rtn) - -L(bb0): tst n, #2 - beq L(b00) - -L(b10): vld1.32 {d3}, [vp]! - vsli.u64 d0, d3, #LSH - ldmia up!, {r10,r12} - vmov r4, r5, d0 - vshr.u64 d0, d3, #64-LSH - ADCSBCS( r10, r10, r4) - ADCSBCS( r12, r12, r5) - stmia rp!, {r10,r12} - bics n, n, #2 - beq L(rtn) - -L(b00): vld1.32 {d2}, [vp]! - vsli.u64 d0, d2, #LSH - vshr.u64 d1, d2, #64-LSH - vld1.32 {d3}, [vp]! - vsli.u64 d1, d3, #LSH - vmov r6, r7, d0 - vshr.u64 d0, d3, #64-LSH - sub n, n, #4 - tst n, n - beq L(end) - - ALIGN(16) -L(top): ldmia up!, {r8,r9,r10,r12} - vld1.32 {d2}, [vp]! - vsli.u64 d0, d2, #LSH - vmov r4, r5, d1 - vshr.u64 d1, d2, #64-LSH - ADCSBCS( r8, r8, r6) - ADCSBCS( r9, r9, r7) - vld1.32 {d3}, [vp]! - vsli.u64 d1, d3, #LSH - vmov r6, r7, d0 - vshr.u64 d0, d3, #64-LSH - ADCSBCS( r10, r10, r4) - ADCSBCS( r12, r12, r5) - stmia rp!, {r8,r9,r10,r12} - sub n, n, #4 - tst n, n - bne L(top) - -L(end): ldmia up!, {r8,r9,r10,r12} - vmov r4, r5, d1 - ADCSBCS( r8, r8, r6) - ADCSBCS( r9, r9, r7) - ADCSBCS( r10, r10, r4) - ADCSBCS( r12, r12, r5) - stmia rp!, {r8,r9,r10,r12} -L(rtn): vmov.32 r0, d0[0] - RETVAL( r0, r1) - pop {r4-r10} - bx r14 -EPILOGUE() diff --git a/gmp/mpn/arm/v7a/cora15/neon/com.asm b/gmp/mpn/arm/v7a/cora15/neon/com.asm deleted file mode 100644 index 9e7a629287..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/com.asm +++ /dev/null @@ -1,97 +0,0 @@ -dnl ARM Neon mpn_com optimised for A15. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C StrongARM ? -C XScale ? -C Cortex-A8 ? -C Cortex-A9 2.1 -C Cortex-A15 0.65 - -define(`rp', `r0') -define(`up', `r1') -define(`n', `r2') - -ASM_START() -PROLOGUE(mpn_com) - cmp n, #7 - ble L(bc) - -C Perform a few initial operation until rp is 128-bit aligned - tst rp, #4 - beq L(al1) - vld1.32 {d0[0]}, [up]! - sub n, n, #1 - vmvn d0, d0 - vst1.32 {d0[0]}, [rp]! -L(al1): tst rp, #8 - beq L(al2) - vld1.32 {d0}, [up]! - sub n, n, #2 - vmvn d0, d0 - vst1.32 {d0}, [rp:64]! -L(al2): vld1.32 {q2}, [up]! - subs n, n, #12 - blt L(end) - - ALIGN(16) -L(top): vld1.32 {q0}, [up]! - vmvn q2, q2 - subs n, n, #8 - vst1.32 {q2}, [rp:128]! - vld1.32 {q2}, [up]! - vmvn q0, q0 - vst1.32 {q0}, [rp:128]! - bge L(top) - -L(end): vmvn q2, q2 - vst1.32 {q2}, [rp:128]! - -C Handle last 0-7 limbs. Note that rp is aligned after loop, but not when we -C arrive here via L(bc) -L(bc): tst n, #4 - beq L(tl1) - vld1.32 {q0}, [up]! - vmvn q0, q0 - vst1.32 {q0}, [rp]! -L(tl1): tst n, #2 - beq L(tl2) - vld1.32 {d0}, [up]! - vmvn d0, d0 - vst1.32 {d0}, [rp]! -L(tl2): tst n, #1 - beq L(tl3) - vld1.32 {d0[0]}, [up] - vmvn d0, d0 - vst1.32 {d0[0]}, [rp] -L(tl3): bx lr -EPILOGUE() diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm b/gmp/mpn/arm/v7a/cora15/neon/copyd.asm deleted file mode 100644 index 98fe535def..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/copyd.asm +++ /dev/null @@ -1,110 +0,0 @@ -dnl ARM Neon mpn_copyd optimised for A15. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C StrongARM - -C XScale - -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 1.75 slower than core register code -C Cortex-A15 0.52 - -define(`rp', `r0') -define(`up', `r1') -define(`n', `r2') - -ASM_START() -PROLOGUE(mpn_copyd) - add rp, rp, n, lsl #2 - add up, up, n, lsl #2 - - cmp n, #7 - ble L(bc) - -C Copy until rp is 128-bit aligned - tst rp, #4 - beq L(al1) - sub up, up, #4 - vld1.32 {d22[0]}, [up] - sub n, n, #1 - sub rp, rp, #4 - vst1.32 {d22[0]}, [rp] -L(al1): tst rp, #8 - beq L(al2) - sub up, up, #8 - vld1.32 {d22}, [up] - sub n, n, #2 - sub rp, rp, #8 - vst1.32 {d22}, [rp:64] -L(al2): sub up, up, #16 - vld1.32 {d26-d27}, [up] - subs n, n, #12 - sub rp, rp, #16 C offset rp for loop - blt L(end) - - sub up, up, #16 C offset up for loop - mov r12, #-16 - - ALIGN(16) -L(top): vld1.32 {d22-d23}, [up], r12 - vst1.32 {d26-d27}, [rp:128], r12 - vld1.32 {d26-d27}, [up], r12 - vst1.32 {d22-d23}, [rp:128], r12 - subs n, n, #8 - bge L(top) - - add up, up, #16 C undo up offset - C rp offset undoing folded -L(end): vst1.32 {d26-d27}, [rp:128] - -C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we -C arrive here via L(bc) -L(bc): tst n, #4 - beq L(tl1) - sub up, up, #16 - vld1.32 {d22-d23}, [up] - sub rp, rp, #16 - vst1.32 {d22-d23}, [rp] -L(tl1): tst n, #2 - beq L(tl2) - sub up, up, #8 - vld1.32 {d22}, [up] - sub rp, rp, #8 - vst1.32 {d22}, [rp] -L(tl2): tst n, #1 - beq L(tl3) - sub up, up, #4 - vld1.32 {d22[0]}, [up] - sub rp, rp, #4 - vst1.32 {d22[0]}, [rp] -L(tl3): bx lr -EPILOGUE() diff --git a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm b/gmp/mpn/arm/v7a/cora15/neon/copyi.asm deleted file mode 100644 index 2e05afe5e8..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/copyi.asm +++ /dev/null @@ -1,90 +0,0 @@ -dnl ARM Neon mpn_copyi optimised for A15. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C StrongARM - -C XScale - -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 1.75 slower than core register code -C Cortex-A15 0.52 - -define(`rp', `r0') -define(`up', `r1') -define(`n', `r2') - -ASM_START() -PROLOGUE(mpn_copyi) - cmp n, #7 - ble L(bc) - -C Copy until rp is 128-bit aligned - tst rp, #4 - beq L(al1) - vld1.32 {d22[0]}, [up]! - sub n, n, #1 - vst1.32 {d22[0]}, [rp]! -L(al1): tst rp, #8 - beq L(al2) - vld1.32 {d22}, [up]! - sub n, n, #2 - vst1.32 {d22}, [rp:64]! -L(al2): vld1.32 {d26-d27}, [up]! - subs n, n, #12 - blt L(end) - - ALIGN(16) -L(top): vld1.32 {d22-d23}, [up]! - vst1.32 {d26-d27}, [rp:128]! - vld1.32 {d26-d27}, [up]! - vst1.32 {d22-d23}, [rp:128]! - subs n, n, #8 - bge L(top) - -L(end): vst1.32 {d26-d27}, [rp:128]! - -C Copy last 0-7 limbs. Note that rp is aligned after loop, but not when we -C arrive here via L(bc) -L(bc): tst n, #4 - beq L(tl1) - vld1.32 {d22-d23}, [up]! - vst1.32 {d22-d23}, [rp]! -L(tl1): tst n, #2 - beq L(tl2) - vld1.32 {d22}, [up]! - vst1.32 {d22}, [rp]! -L(tl2): tst n, #1 - beq L(tl3) - vld1.32 {d22[0]}, [up] - vst1.32 {d22[0]}, [rp] -L(tl3): bx lr -EPILOGUE() diff --git a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm b/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm deleted file mode 100644 index 2c11d6debd..0000000000 --- a/gmp/mpn/arm/v7a/cora15/neon/rsh1aors_n.asm +++ /dev/null @@ -1,177 +0,0 @@ -dnl ARM Neon mpn_rsh1add_n, mpn_rsh1sub_n. - -dnl Contributed to the GNU project by Torbjörn Granlund. - -dnl Copyright 2013 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. - -include(`../config.m4') - -C cycles/limb -C StrongARM - -C XScale - -C Cortex-A7 ? -C Cortex-A8 ? -C Cortex-A9 4-5 -C Cortex-A15 2.5 - -C TODO -C * Try to make this smaller, its size (384 bytes) is excessive. -C * Try to reach 2.25 c/l on A15, to match the addlsh_1 family. -C * This is ad-hoc scheduled, perhaps unnecessarily so for A15, and perhaps -C insufficiently for A7 and A8. - -define(`rp', `r0') -define(`up', `r1') -define(`vp', `r2') -define(`n', `r3') - -ifdef(`OPERATION_rsh1add_n', ` - define(`ADDSUBS', `adds $1, $2, $3') - define(`ADCSBCS', `adcs $1, $2, $3') - define(`IFADD', `$1') - define(`IFSUB', `') - define(`func', mpn_rsh1add_n)') -ifdef(`OPERATION_rsh1sub_n', ` - define(`ADDSUBS', `subs $1, $2, $3') - define(`ADCSBCS', `sbcs $1, $2, $3') - define(`IFADD', `') - define(`IFSUB', `$1') - define(`func', mpn_rsh1sub_n)') - -MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n) - -ASM_START() -PROLOGUE(func) - push {r4-r10} - - ands r4, n, #3 - beq L(b00) - cmp r4, #2 - blo L(b01) - beq L(b10) - -L(b11): ldmia up!, {r9,r10,r12} - ldmia vp!, {r5,r6,r7} - ADDSUBS( r9, r9, r5) - vmov d4, r9, r9 - ADCSBCS( r10, r10, r6) - ADCSBCS( r12, r12, r7) - vshr.u64 d3, d4, #1 - vmov d1, r10, r12 - vsli.u64 d3, d1, #31 - vshr.u64 d2, d1, #1 - vst1.32 d3[0], [rp]! - bics n, n, #3 - beq L(wd2) -L(gt3): ldmia up!, {r8,r9,r10,r12} - ldmia vp!, {r4,r5,r6,r7} - b L(mi0) - -L(b10): ldmia up!, {r10,r12} - ldmia vp!, {r6,r7} - ADDSUBS( r10, r10, r6) - ADCSBCS( r12, r12, r7) - vmov d4, r10, r12 - bics n, n, #2 - vshr.u64 d2, d4, #1 - beq L(wd2) -L(gt2): ldmia up!, {r8,r9,r10,r12} - ldmia vp!, {r4,r5,r6,r7} - b L(mi0) - -L(b01): ldr r12, [up], #4 - ldr r7, [vp], #4 - ADDSUBS( r12, r12, r7) - vmov d4, r12, r12 - bics n, n, #1 - bne L(gt1) - mov r5, r12, lsr #1 -IFADD(` adc r1, n, #0') -IFSUB(` adc r1, n, #1') - bfi r5, r1, #31, #1 - str r5, [rp] - and r0, r12, #1 - pop {r4-r10} - bx r14 -L(gt1): ldmia up!, {r8,r9,r10,r12} - ldmia vp!, {r4,r5,r6,r7} - vshr.u64 d2, d4, #1 - ADCSBCS( r8, r8, r4) - ADCSBCS( r9, r9, r5) - vmov d0, r8, r9 - ADCSBCS( r10, r10, r6) - ADCSBCS( r12, r12, r7) - vsli.u64 d2, d0, #31 - vshr.u64 d3, d0, #1 - vst1.32 d2[0], [rp]! - b L(mi1) - -L(b00): ldmia up!, {r8,r9,r10,r12} - ldmia vp!, {r4,r5,r6,r7} - ADDSUBS( r8, r8, r4) - ADCSBCS( r9, r9, r5) - vmov d4, r8, r9 - ADCSBCS( r10, r10, r6) - ADCSBCS( r12, r12, r7) - vshr.u64 d3, d4, #1 - b L(mi1) - - ALIGN(16) -L(top): ldmia up!, {r8,r9,r10,r12} - ldmia vp!, {r4,r5,r6,r7} - vsli.u64 d3, d1, #63 - vshr.u64 d2, d1, #1 - vst1.32 d3, [rp]! -L(mi0): ADCSBCS( r8, r8, r4) - ADCSBCS( r9, r9, r5) - vmov d0, r8, r9 - ADCSBCS( r10, r10, r6) - ADCSBCS( r12, r12, r7) - vsli.u64 d2, d0, #63 - vshr.u64 d3, d0, #1 - vst1.32 d2, [rp]! -L(mi1): vmov d1, r10, r12 - sub n, n, #4 - tst n, n - bne L(top) - -L(end): vsli.u64 d3, d1, #63 - vshr.u64 d2, d1, #1 - vst1.32 d3, [rp]! -L(wd2): vmov r4, r5, d2 -IFADD(` adc r1, n, #0') -IFSUB(` adc r1, n, #1') - bfi r5, r1, #31, #1 - stm rp, {r4,r5} - -L(rtn): vmov.32 r0, d4[0] - and r0, r0, #1 - pop {r4-r10} - bx r14 -EPILOGUE() |