go to gmp 4.3.2baserock/pedroalvarez/gcc-5.3.0-gmp432

author: Pedro Alvarez <pedro.alvarez@codethink.co.uk> 2016-05-27 17:39:31 +0100
committer: Pedro Alvarez <pedro.alvarez@codethink.co.uk> 2016-05-27 17:53:32 +0100
commit: 26c75cf8267919f81a1759c9c965a52c660233f9 (patch)
tree: cf2a39cf56c2c8ac45760854413ab233e6263974 /gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
parent: 56892c1d217baea02092b51a09bbc924130ca84c (diff)
download: gcc-tarball-baserock/pedroalvarez/gcc-5.3.0-gmp432.tar.gz
1 files changed, 32 insertions, 44 deletions
diff --git a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
index 93b63b2018..46b0903c50 100644
--- a/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
+++ b/gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
@@ -1,45 +1,33 @@
 dnl  Intel Pentium-4 mpn_addlsh1_n -- mpn x+2*y.
 
-dnl  Copyright 2001-2004, 2006 Free Software Foundation, Inc.
-
-dnl  This file is part of the GNU MP Library.
-dnl
-dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-dnl  it under the terms of either:
-dnl
-dnl    * the GNU Lesser General Public License as published by the Free
-dnl      Software Foundation; either version 3 of the License, or (at your
-dnl      option) any later version.
-dnl
-dnl  or
+dnl  Copyright 2001, 2002, 2003, 2004, 2006 Free Software Foundation, Inc.
 dnl
-dnl    * the GNU General Public License as published by the Free Software
-dnl      Foundation; either version 2 of the License, or (at your option) any
-dnl      later version.
+dnl  This file is part of the GNU MP Library.
 dnl
-dnl  or both in parallel, as here.
+dnl  The GNU MP Library is free software; you can redistribute it and/or
+dnl  modify it under the terms of the GNU Lesser General Public License as
+dnl  published by the Free Software Foundation; either version 3 of the
+dnl  License, or (at your option) any later version.
 dnl
-dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-dnl  for more details.
+dnl  The GNU MP Library is distributed in the hope that it will be useful,
+dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+dnl  Lesser General Public License for more details.
 dnl
-dnl  You should have received copies of the GNU General Public License and the
-dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-dnl  see https://www.gnu.org/licenses/.
+dnl  You should have received a copy of the GNU Lesser General Public License
+dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
 
 include(`../config.m4')
 
 
-C					cycles/limb
-C			     dst!=src1,2  dst==src1  dst==src2
-C P6 model 0-8,10-12		-
-C P6 model 9   (Banias)		?
-C P6 model 13  (Dothan)		?
-C P4 model 0-1 (Willamette)	?
-C P4 model 2   (Northwood)	4.25	     6		6
-C P4 model 3-4 (Prescott)	5	     8.5	8.5
+C          cycles/limb (approx)
+C          dst!=src1,2  dst==src1  dst==src2
+C P4 m2:      4.5         ?7.25      ?6.75
+C P4 m3:      5.3         ?	     ?
 
+C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C
 C The slightly strange combination of indexing and pointer incrementing
 C that's used seems to work best.  Not sure why, but %ecx,4 with src1 and/or
 C src2 is a slowdown.
@@ -63,18 +51,18 @@ define(SAVE_EBX,`PARAM_SRC1')
 PROLOGUE(mpn_addlsh1_n)
 deflit(`FRAME',0)
 
-	mov	PARAM_SRC1, %eax
-	mov	%ebx, SAVE_EBX
+	movl	PARAM_SRC1, %eax
+	movl	%ebx, SAVE_EBX
 
-	mov	PARAM_SRC2, %ebx
+	movl	PARAM_SRC2, %ebx
 	pxor	%mm0, %mm0		C initial carry
 
-	mov	PARAM_DST, %edx
+	movl	PARAM_DST, %edx
 
-	mov	PARAM_SIZE, %ecx
+	movl	PARAM_SIZE, %ecx
 
-	lea	(%edx,%ecx,4), %edx	C dst end
-	neg	%ecx			C -size
+	leal	(%edx,%ecx,4), %edx	C dst end
+	negl	%ecx			C -size
 
 L(top):
 	C eax	src1 end
@@ -83,24 +71,24 @@ L(top):
 	C edx	dst end
 	C mm0	carry
 
-	movd	(%ebx), %mm2
 	movd	(%eax), %mm1
+	movd	(%ebx), %mm2
 	psrlq	$32, %mm0
-	lea	4(%eax), %eax
-	lea	4(%ebx), %ebx
+	leal	4(%eax), %eax
+	leal	4(%ebx), %ebx
 
-	psllq	$1, %mm2
+	paddq	%mm2, %mm1
 	paddq	%mm2, %mm1
 
 	paddq	%mm1, %mm0
 
 	movd	%mm0, (%edx,%ecx,4)
-	add	$1, %ecx
+	addl	$1, %ecx
 	jnz	L(top)
 
 
 	psrlq	$32, %mm0
-	mov	SAVE_EBX, %ebx
+	movl	SAVE_EBX, %ebx
 	movd	%mm0, %eax
 	emms
 	ret
author	Pedro Alvarez <pedro.alvarez@codethink.co.uk>	2016-05-27 17:39:31 +0100
committer	Pedro Alvarez <pedro.alvarez@codethink.co.uk>	2016-05-27 17:53:32 +0100
commit	26c75cf8267919f81a1759c9c965a52c660233f9 (patch)
tree	cf2a39cf56c2c8ac45760854413ab233e6263974 /gmp/mpn/x86/pentium4/sse2/addlsh1_n.asm
parent	56892c1d217baea02092b51a09bbc924130ca84c (diff)
download	gcc-tarball-baserock/pedroalvarez/gcc-5.3.0-gmp432.tar.gz