diff options
Diffstat (limited to 'gmp/mpn/x86_64/copyi.asm')
-rw-r--r-- | gmp/mpn/x86_64/copyi.asm | 117 |
1 files changed, 49 insertions, 68 deletions
diff --git a/gmp/mpn/x86_64/copyi.asm b/gmp/mpn/x86_64/copyi.asm index bafce7a09e..506142be79 100644 --- a/gmp/mpn/x86_64/copyi.asm +++ b/gmp/mpn/x86_64/copyi.asm @@ -1,92 +1,73 @@ dnl AMD64 mpn_copyi -- copy limb vector, incrementing. -dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc. +dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. -dnl + dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or -dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. -dnl -dnl or both in parallel, as here. -dnl +dnl it under the terms of the GNU Lesser General Public License as published +dnl by the Free Software Foundation; either version 3 of the License, or (at +dnl your option) any later version. + dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. -dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +dnl License for more details. + +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C AMD K8,K9 1 -C AMD K10 1 -C AMD bd1 1.36 -C AMD bobcat 1.71 -C Intel P4 2-3 -C Intel core2 1 -C Intel NHM 1 -C Intel SBR 1 -C Intel atom 2 -C VIA nano 2 +C cycles/limb +C K8,K9: 1 +C K10: 1 +C P4: 2.8 +C P6-15: 1.2 -IFSTD(`define(`rp',`%rdi')') -IFSTD(`define(`up',`%rsi')') -IFSTD(`define(`n', `%rdx')') -IFDOS(`define(`rp',`%rcx')') -IFDOS(`define(`up',`%rdx')') -IFDOS(`define(`n', `%r8')') +C INPUT PARAMETERS +C rp rdi +C up rsi +C n rdx -ABI_SUPPORT(DOS64) -ABI_SUPPORT(STD64) +define(`rp',`%rdi') +define(`up',`%rsi') +define(`n',`%rdx') ASM_START() TEXT - ALIGN(64) - .byte 0,0,0,0,0,0 + ALIGN(16) PROLOGUE(mpn_copyi) - lea -8(rp), rp - sub $4, n + leaq -8(rp), rp + subq $4, n jc L(end) + ALIGN(16) +L(oop): movq (up), %r8 + movq 8(up), %r9 + leaq 32(rp), rp + movq 16(up), %r10 + movq 24(up), %r11 + leaq 32(up), up + movq %r8, -24(rp) + movq %r9, -16(rp) + subq $4, n + movq %r10, -8(rp) + movq %r11, (rp) + jnc L(oop) -L(top): mov (up), %rax - mov 8(up), %r9 - lea 32(rp), rp - mov 16(up), %r10 - mov 24(up), %r11 - lea 32(up), up - mov %rax, -24(rp) - mov %r9, -16(rp) - sub $4, n - mov %r10, -8(rp) - mov %r11, (rp) - jnc L(top) - -L(end): shr R32(n) +L(end): shrl %edx C edx = lowpart(n) jnc 1f - mov (up), %rax - mov %rax, 8(rp) - lea 8(rp), rp - lea 8(up), up -1: shr R32(n) + movq (up), %r8 + movq %r8, 8(rp) + leaq 8(rp), rp + leaq 8(up), up +1: shrl %edx C edx = lowpart(n) jnc 1f - mov (up), %rax - mov 8(up), %r9 - mov %rax, 8(rp) - mov %r9, 16(rp) + movq (up), %r8 + movq 8(up), %r9 + movq %r8, 8(rp) + movq %r9, 16(rp) 1: ret EPILOGUE() |