summaryrefslogtreecommitdiff
path: root/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
diff options
context:
space:
mode:
Diffstat (limited to 'gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm')
-rw-r--r--gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm76
1 files changed, 32 insertions, 44 deletions
diff --git a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
index 93b63b2018..46b0903c50 100644
--- a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
@@ -1,45 +1,33 @@
dnl Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y.
-dnl Copyright 2001-2004, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of either:
-dnl
-dnl * the GNU Lesser General Public License as published by the Free
-dnl Software Foundation; either version 3 of the License, or (at your
-dnl option) any later version.
-dnl
-dnl or
+dnl Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
dnl
-dnl * the GNU General Public License as published by the Free Software
-dnl Foundation; either version 2 of the License, or (at your option) any
-dnl later version.
+dnl This file is part of the GNU MP Library.
dnl
-dnl or both in parallel, as here.
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-dnl for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
dnl
-dnl You should have received copies of the GNU General Public License and the
-dnl GNU Lesser General Public License along with the GNU MP Library. If not,
-dnl see https://www.gnu.org/licenses/.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C dst!=src1,2 dst==src1 dst==src2
-C P6 model 0-8,10-12 -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) ?
-C P4 model 0-1 (Willamette) ?
-C P4 model 2 (Northwood) 4.25 6 6
-C P4 model 3-4 (Prescott) 5 8.5 8.5
+C cycles/limb (approx)
+C dst!=src1,2 dst==src1 dst==src2
+C P4 m2: 4.5 ?7.25 ?6.75
+C P4 m3: 5.3 ? ?
+C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C
C The slightly strange combination of indexing and pointer incrementing
C that's used seems to work best. Not sure why, but %ecx,4 with src1 and/or
C src2 is a slowdown.
@@ -63,18 +51,18 @@ define(SAVE_EBX,`PARAM_SRC1')
PROLOGUE(mpn_addlsh1_n)
deflit(`FRAME',0)
- mov PARAM_SRC1, %eax
- mov %ebx, SAVE_EBX
+ movl PARAM_SRC1, %eax
+ movl %ebx, SAVE_EBX
- mov PARAM_SRC2, %ebx
+ movl PARAM_SRC2, %ebx
pxor %mm0, %mm0 C initial carry
- mov PARAM_DST, %edx
+ movl PARAM_DST, %edx
- mov PARAM_SIZE, %ecx
+ movl PARAM_SIZE, %ecx
- lea (%edx,%ecx,4), %edx C dst end
- neg %ecx C -size
+ leal (%edx,%ecx,4), %edx C dst end
+ negl %ecx C -size
L(top):
C eax src1 end
@@ -83,24 +71,24 @@ L(top):
C edx dst end
C mm0 carry
- movd (%ebx), %mm2
movd (%eax), %mm1
+ movd (%ebx), %mm2
psrlq $32, %mm0
- lea 4(%eax), %eax
- lea 4(%ebx), %ebx
+ leal 4(%eax), %eax
+ leal 4(%ebx), %ebx
- psllq $1, %mm2
+ paddq %mm2, %mm1
paddq %mm2, %mm1
paddq %mm1, %mm0
movd %mm0, (%edx,%ecx,4)
- add $1, %ecx
+ addl $1, %ecx
jnz L(top)
psrlq $32, %mm0
- mov SAVE_EBX, %ebx
+ movl SAVE_EBX, %ebx
movd %mm0, %eax
emms
ret