diff options
author | Pedro Alvarez <pedro.alvarez@codethink.co.uk> | 2016-05-27 17:39:31 +0100 |
---|---|---|
committer | Pedro Alvarez <pedro.alvarez@codethink.co.uk> | 2016-05-27 17:53:32 +0100 |
commit | 26c75cf8267919f81a1759c9c965a52c660233f9 (patch) | |
tree | cf2a39cf56c2c8ac45760854413ab233e6263974 /gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm | |
parent | 56892c1d217baea02092b51a09bbc924130ca84c (diff) | |
download | gcc-tarball-baserock/pedroalvarez/gcc-5.3.0-gmp432.tar.gz |
go to gmp 4.3.2baserock/pedroalvarez/gcc-5.3.0-gmp432
Diffstat (limited to 'gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm')
-rw-r--r-- | gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm | 76 |
1 files changed, 32 insertions, 44 deletions
diff --git a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm index 93b63b2018..46b0903c50 100644 --- a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm +++ b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm @@ -1,45 +1,33 @@ dnl Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y. -dnl Copyright 2001-2004, 2006 Free Software Foundation, Inc. - -dnl This file is part of the GNU MP Library. -dnl -dnl The GNU MP Library is free software; you can redistribute it and/or modify -dnl it under the terms of either: -dnl -dnl * the GNU Lesser General Public License as published by the Free -dnl Software Foundation; either version 3 of the License, or (at your -dnl option) any later version. -dnl -dnl or +dnl Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc. dnl -dnl * the GNU General Public License as published by the Free Software -dnl Foundation; either version 2 of the License, or (at your option) any -dnl later version. +dnl This file is part of the GNU MP Library. dnl -dnl or both in parallel, as here. +dnl The GNU MP Library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License as +dnl published by the Free Software Foundation; either version 3 of the +dnl License, or (at your option) any later version. dnl -dnl The GNU MP Library is distributed in the hope that it will be useful, but -dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -dnl for more details. +dnl The GNU MP Library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. dnl -dnl You should have received copies of the GNU General Public License and the -dnl GNU Lesser General Public License along with the GNU MP Library. If not, -dnl see https://www.gnu.org/licenses/. +dnl You should have received a copy of the GNU Lesser General Public License +dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. include(`../config.m4') -C cycles/limb -C dst!=src1,2 dst==src1 dst==src2 -C P6 model 0-8,10-12 - -C P6 model 9 (Banias) ? -C P6 model 13 (Dothan) ? -C P4 model 0-1 (Willamette) ? -C P4 model 2 (Northwood) 4.25 6 6 -C P4 model 3-4 (Prescott) 5 8.5 8.5 +C cycles/limb (approx) +C dst!=src1,2 dst==src1 dst==src2 +C P4 m2: 4.5 ?7.25 ?6.75 +C P4 m3: 5.3 ? ? +C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, +C mp_size_t size); +C C The slightly strange combination of indexing and pointer incrementing C that's used seems to work best. Not sure why, but %ecx,4 with src1 and/or C src2 is a slowdown. @@ -63,18 +51,18 @@ define(SAVE_EBX,`PARAM_SRC1') PROLOGUE(mpn_addlsh1_n) deflit(`FRAME',0) - mov PARAM_SRC1, %eax - mov %ebx, SAVE_EBX + movl PARAM_SRC1, %eax + movl %ebx, SAVE_EBX - mov PARAM_SRC2, %ebx + movl PARAM_SRC2, %ebx pxor %mm0, %mm0 C initial carry - mov PARAM_DST, %edx + movl PARAM_DST, %edx - mov PARAM_SIZE, %ecx + movl PARAM_SIZE, %ecx - lea (%edx,%ecx,4), %edx C dst end - neg %ecx C -size + leal (%edx,%ecx,4), %edx C dst end + negl %ecx C -size L(top): C eax src1 end @@ -83,24 +71,24 @@ L(top): C edx dst end C mm0 carry - movd (%ebx), %mm2 movd (%eax), %mm1 + movd (%ebx), %mm2 psrlq $32, %mm0 - lea 4(%eax), %eax - lea 4(%ebx), %ebx + leal 4(%eax), %eax + leal 4(%ebx), %ebx - psllq $1, %mm2 + paddq %mm2, %mm1 paddq %mm2, %mm1 paddq %mm1, %mm0 movd %mm0, (%edx,%ecx,4) - add $1, %ecx + addl $1, %ecx jnz L(top) psrlq $32, %mm0 - mov SAVE_EBX, %ebx + movl SAVE_EBX, %ebx movd %mm0, %eax emms ret |