30 files changed, 3720 insertions, 0 deletions
diff --git a/gmp/mpn/x86/atom/aorrlsh1_n.asm b/gmp/mpn/x86/atom/aorrlsh1_n.asm
new file mode 100644
index 0000000000..cd1a650022
--- /dev/null
+++ b/gmp/mpn/x86/atom/aorrlsh1_n.asm
@@ -0,0 +1,53 @@
+dnl  Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+	define(M4_inst,        adc)
+	define(M4_opp,         sub)
+	define(M4_function,    mpn_addlsh1_n)
+	define(M4_function_c,  mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+	define(M4_inst,        sbb)
+	define(M4_opp,         add)
+	define(M4_function,    mpn_rsblsh1_n)
+	define(M4_function_c,  mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86/atom/aorrlsh2_n.asm b/gmp/mpn/x86/atom/aorrlsh2_n.asm
new file mode 100644
index 0000000000..10f4419de9
--- /dev/null
+++ b/gmp/mpn/x86/atom/aorrlsh2_n.asm
@@ -0,0 +1,53 @@
+dnl  Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+	define(M4_inst,        adcl)
+	define(M4_opp,         subl)
+	define(M4_function,    mpn_addlsh2_n)
+	define(M4_function_c,  mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+	define(M4_inst,        sbbl)
+	define(M4_opp,         addl)
+	define(M4_function,    mpn_rsblsh2_n)
+	define(M4_function_c,  mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
diff --git a/gmp/mpn/x86/atom/aorrlshC_n.asm b/gmp/mpn/x86/atom/aorrlshC_n.asm
new file mode 100644
index 0000000000..71cfe490d6
--- /dev/null
+++ b/gmp/mpn/x86/atom/aorrlshC_n.asm
@@ -0,0 +1,156 @@
+dnl  Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size);
+C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_signed_limb_t carry);
+
+C				cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CORB,	20)
+defframe(PARAM_SIZE,	16)
+defframe(PARAM_DBLD,	12)
+defframe(PARAM_SRC,	 8)
+defframe(PARAM_DST,	 4)
+
+dnl  re-use parameter space
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBP,`PARAM_DBLD')
+define(SAVE_VP,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+
+ASM_START()
+	TEXT
+	ALIGN(8)
+
+PROLOGUE(M4_function_c)
+deflit(`FRAME',0)
+	movl	PARAM_CORB, %eax
+	movl	%eax, %edx
+	shr	$LSH, %edx
+	andl	$1, %edx
+	M4_opp	%edx, %eax
+	jmp	L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+	xor	%eax, %eax
+	xor	%edx, %edx
+L(start_nc):
+	push	rp			FRAME_pushl()
+
+	mov	PARAM_SIZE, %ecx	C size
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	incl	%ecx			C size + 1
+	mov	PARAM_SRC, up
+	mov	vp, SAVE_VP
+	shr	%ecx			C (size+1)\2
+	mov	PARAM_DBLD, vp
+	mov	%ebp, SAVE_EBP
+	mov	%ecx, VAR_COUNT
+	jnc	L(entry)		C size odd
+
+	shr	%edx			C size even
+	mov	(vp), %ecx
+	lea	4(vp), vp
+	lea	(%eax,%ecx,M), %edx
+	mov	%ecx, %eax
+	lea	-4(up), up
+	lea	-4(rp), rp
+	jmp	L(enteven)
+
+	ALIGN(16)
+L(oop):
+	lea	(%eax,%ecx,M), %ebp
+	shr	$RSH, %ecx
+	mov	4(vp), %eax
+	shr	%edx
+	lea	8(vp), vp
+	M4_inst	(up), %ebp
+	lea	(%ecx,%eax,M), %edx
+	mov	%ebp, (rp)
+L(enteven):
+	M4_inst	4(up), %edx
+	lea	8(up), up
+	mov	%edx, 4(rp)
+	adc	%edx, %edx
+	shr	$RSH, %eax
+	lea	8(rp), rp
+L(entry):
+	mov	(vp), %ecx
+	decl	VAR_COUNT
+	jnz	L(oop)
+
+	lea	(%eax,%ecx,M), %ebp
+	shr	$RSH, %ecx
+	shr	%edx
+	mov	SAVE_VP, vp
+	M4_inst	(up), %ebp
+	mov	%ecx, %eax
+	mov	SAVE_UP, up
+	M4_inst	$0, %eax
+	mov	%ebp, (rp)
+	mov	SAVE_EBP, %ebp
+	pop	rp			FRAME_popl()
+	ret
+EPILOGUE()
+
+ASM_END()
diff --git a/gmp/mpn/x86/atom/aors_n.asm b/gmp/mpn/x86/atom/aors_n.asm
new file mode 100644
index 0000000000..45ec287c3a
--- /dev/null
+++ b/gmp/mpn/x86/atom/aors_n.asm
@@ -0,0 +1,159 @@
+dnl  Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 3
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+ifdef(`OPERATION_add_n', `
+	define(M4_inst,        adcl)
+	define(M4_function_n,  mpn_add_n)
+	define(M4_function_nc, mpn_add_nc)
+	define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+	define(M4_inst,        sbbl)
+	define(M4_function_n,  mpn_sub_n)
+	define(M4_function_nc, mpn_sub_nc)
+	define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                         mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C	                   mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size.  The return value is the carry bit from the top of the result (1
+C or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation.  Note values other than 1 or 0 here will lead to garbage
+C results.
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+define(`cy',  `%ecx')
+define(`r1',  `%ecx')
+define(`r2',  `%edx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function_n)
+	xor	cy, cy			C carry
+L(start):
+	mov	PARAM_SIZE, %eax	C size
+	mov	rp, SAVE_RP
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	mov	PARAM_SRC1, up
+	shr	%eax			C size >> 1
+	mov	vp, SAVE_VP
+	mov	PARAM_SRC2, vp
+	jz	L(one)			C size == 1
+	jc	L(three)		C size % 2 == 1
+
+	shr	cy
+	mov	(up), r2
+	lea	4(up), up
+	lea	4(vp), vp
+	lea	-4(rp), rp
+	jmp	L(entry)
+L(one):
+	shr	cy
+	mov	(up), r1
+	jmp	L(end)
+L(three):
+	shr	cy
+	mov	(up), r1
+
+	ALIGN(16)
+L(oop):
+	M4_inst	(vp), r1
+	lea	8(up), up
+	mov	-4(up), r2
+	lea	8(vp), vp
+	mov	r1, (rp)
+L(entry):
+	M4_inst	-4(vp), r2
+	lea	8(rp), rp
+	dec	%eax
+	mov	(up), r1
+	mov	r2, -4(rp)
+	jnz	L(oop)
+
+L(end):					C %eax is zero here
+	mov	SAVE_UP, up
+	M4_inst	(vp), r1
+	mov	SAVE_VP, vp
+	mov	r1, (rp)
+	adc	%eax, %eax
+	mov	SAVE_RP, rp
+	ret
+EPILOGUE()
+
+PROLOGUE(M4_function_nc)
+	mov	PARAM_CARRY, cy		C carry
+	jmp	L(start)
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/aorslshC_n.asm b/gmp/mpn/x86/atom/aorslshC_n.asm
new file mode 100644
index 0000000000..75ace65e51
--- /dev/null
+++ b/gmp/mpn/x86/atom/aorslshC_n.asm
@@ -0,0 +1,247 @@
+dnl  Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C				mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C				mp_signed_limb_t borrow);
+
+defframe(PARAM_CORB,	16)
+defframe(PARAM_SIZE,	12)
+defframe(PARAM_SRC,	 8)
+defframe(PARAM_DST,	 4)
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                          mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C                           mp_size_t size, mp_limb_t borrow);
+
+C if src1 == dst, _ip1 is used
+
+C					cycles/limb
+C				dst!=src1,src2	dst==src1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 7		 6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(GPARAM_CORB,	20)
+defframe(GPARAM_SIZE,	16)
+defframe(GPARAM_SRC2,	12)
+
+dnl  re-use parameter space
+define(SAVE_EBP,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp',  `%edi')
+define(`up',  `%esi')
+
+ASM_START()
+	TEXT
+	ALIGN(8)
+
+PROLOGUE(M4_ip_function_c)
+deflit(`FRAME',0)
+	movl	PARAM_CORB, %ecx
+	movl	%ecx, %edx
+	shr	$LSH, %edx
+	andl	$1, %edx
+	M4_opp	%edx, %ecx
+	jmp	L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_ip_function)
+deflit(`FRAME',0)
+
+	xor	%ecx, %ecx
+	xor	%edx, %edx
+L(start_nc):
+	push	rp			FRAME_pushl()
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	mov	PARAM_SRC, up
+	mov	%ebx, SAVE_EBX
+	mov	PARAM_SIZE, %ebx	C size
+L(inplace):
+	incl	%ebx			C size + 1
+	shr	%ebx			C (size+1)\2
+	mov	%ebp, SAVE_EBP
+	jnc	L(entry)		C size odd
+
+	add	%edx, %edx		C size even
+	mov	%ecx, %ebp
+	mov	(up), %ecx
+	lea	-4(rp), rp
+	lea	(%ebp,%ecx,M), %eax
+	lea	4(up), up
+	jmp	L(enteven)
+
+	ALIGN(16)
+L(oop):
+	lea	(%ecx,%eax,M), %ebp
+	shr	$RSH, %eax
+	mov	4(up), %ecx
+	add	%edx, %edx
+	lea	8(up), up
+	M4_inst	%ebp, (rp)
+	lea	(%eax,%ecx,M), %eax
+
+L(enteven):
+	M4_inst	%eax, 4(rp)
+	lea	8(rp), rp
+
+	sbb	%edx, %edx
+	shr	$RSH, %ecx
+
+L(entry):
+	mov	(up), %eax
+	decl	%ebx
+	jnz	L(oop)
+
+	lea	(%ecx,%eax,M), %ebp
+	shr	$RSH, %eax
+	shr	%edx
+	M4_inst	%ebp, (rp)
+	mov	SAVE_UP, up
+	adc	$0, %eax
+	mov	SAVE_EBP, %ebp
+	mov	SAVE_EBX, %ebx
+	pop	rp			FRAME_popl()
+	ret
+EPILOGUE()
+
+PROLOGUE(M4_function_c)
+deflit(`FRAME',0)
+	movl	GPARAM_CORB, %ecx
+	movl	%ecx, %edx
+	shr	$LSH, %edx
+	andl	$1, %edx
+	M4_opp	%edx, %ecx
+	jmp	L(generic_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+	xor	%ecx, %ecx
+	xor	%edx, %edx
+L(generic_nc):
+	push	rp			FRAME_pushl()
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	mov	PARAM_SRC, up
+	cmp	rp, up
+	mov	%ebx, SAVE_EBX
+	jne	L(general)
+	mov	GPARAM_SIZE, %ebx	C size
+	mov	GPARAM_SRC2, up
+	jmp	L(inplace)
+
+L(general):
+	mov	GPARAM_SIZE, %eax	C size
+	mov	%ebx, SAVE_EBX
+	incl	%eax			C size + 1
+	mov	up, %ebx		C vp
+	mov	GPARAM_SRC2, up		C up
+	shr	%eax			C (size+1)\2
+	mov	%ebp, SAVE_EBP
+	mov	%eax, GPARAM_SIZE
+	jnc	L(entry2)		C size odd
+
+	add	%edx, %edx		C size even
+	mov	%ecx, %ebp
+	mov	(up), %ecx
+	lea	-4(rp), rp
+	lea	-4(%ebx), %ebx
+	lea	(%ebp,%ecx,M), %eax
+	lea	4(up), up
+	jmp	L(enteven2)
+
+	ALIGN(16)
+L(oop2):
+	lea	(%ecx,%eax,M), %ebp
+	shr	$RSH, %eax
+	mov	4(up), %ecx
+	add	%edx, %edx
+	lea	8(up), up
+	mov	(%ebx), %edx
+	M4_inst	%ebp, %edx
+	lea	(%eax,%ecx,M), %eax
+	mov	%edx, (rp)
+L(enteven2):
+	mov	4(%ebx), %edx
+	lea	8(%ebx), %ebx
+	M4_inst	%eax, %edx
+	mov	%edx, 4(rp)
+	sbb	%edx, %edx
+	shr	$RSH, %ecx
+	lea	8(rp), rp
+L(entry2):
+	mov	(up), %eax
+	decl	GPARAM_SIZE
+	jnz	L(oop2)
+
+	lea	(%ecx,%eax,M), %ebp
+	shr	$RSH, %eax
+	shr	%edx
+	mov	(%ebx), %edx
+	M4_inst	%ebp, %edx
+	mov	%edx, (rp)
+	mov	SAVE_UP, up
+	adc	$0, %eax
+	mov	SAVE_EBP, %ebp
+	mov	SAVE_EBX, %ebx
+	pop	rp			FRAME_popl()
+	ret
+EPILOGUE()
+
+ASM_END()
diff --git a/gmp/mpn/x86/atom/bdiv_q_1.asm b/gmp/mpn/x86/atom/bdiv_q_1.asm
new file mode 100644
index 0000000000..31e908ec44
--- /dev/null
+++ b/gmp/mpn/x86/atom/bdiv_q_1.asm
@@ -0,0 +1,35 @@
+dnl  Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel
+dnl  division by 1-limb divisor, returning quotient only.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`x86/pentium/bdiv_q_1.asm')
diff --git a/gmp/mpn/x86/atom/cnd_add_n.asm b/gmp/mpn/x86/atom/cnd_add_n.asm
new file mode 100644
index 0000000000..50bf2ad64b
--- /dev/null
+++ b/gmp/mpn/x86/atom/cnd_add_n.asm
@@ -0,0 +1,113 @@
+dnl  X86 mpn_cnd_add_n optimised for Intel Atom.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9   (Banias)		 ?
+C P6 model 13  (Dothan)		 ?
+C P4 model 0-1 (Willamette)	 ?
+C P4 model 2   (Northwood)	 ?
+C P4 model 3-4 (Prescott)	 ?
+C Intel atom			 4.67
+C AMD K6			 ?
+C AMD K7			 ?
+C AMD K8			 ?
+
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebp')
+define(`n',   `%ecx')
+define(`cnd', `20(%esp)')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_cnd_add_n)
+	push	%edi
+	push	%esi
+	push	%ebx
+	push	%ebp
+
+	mov	cnd, %eax		C make cnd into a mask (1)
+	mov	24(%esp), rp
+	neg	%eax			C make cnd into a mask (1)
+	mov	28(%esp), up
+	sbb	%eax, %eax		C make cnd into a mask (1)
+	mov	32(%esp), vp
+	mov	%eax, cnd		C make cnd into a mask (1)
+	mov	36(%esp), n
+
+	xor	%edx, %edx
+
+	shr	$1, n
+	jnc	L(top)
+
+	mov	0(vp), %eax
+	and	cnd, %eax
+	lea	4(vp), vp
+	add	0(up), %eax
+	lea	4(rp), rp
+	lea	4(up), up
+	sbb	%edx, %edx
+	mov	%eax, -4(rp)
+	inc	n
+	dec	n
+	je	L(end)
+
+L(top):	sbb	%edx, %edx
+	mov	0(vp), %eax
+	and	cnd, %eax
+	lea	8(vp), vp
+	lea	8(rp), rp
+	mov	-4(vp), %ebx
+	and	cnd, %ebx
+	add	%edx, %edx
+	adc	0(up), %eax
+	lea	8(up), up
+	mov	%eax, -8(rp)
+	adc	-4(up), %ebx
+	dec	n
+	mov	%ebx, -4(rp)
+	jne	L(top)
+
+L(end):	mov	$0, %eax
+	adc	%eax, %eax
+
+	pop	%ebp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/cnd_sub_n.asm b/gmp/mpn/x86/atom/cnd_sub_n.asm
new file mode 100644
index 0000000000..221bedca37
--- /dev/null
+++ b/gmp/mpn/x86/atom/cnd_sub_n.asm
@@ -0,0 +1,124 @@
+dnl  X86 mpn_cnd_sub_n optimised for Intel Atom.
+
+dnl  Copyright 2013 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C P5				 ?
+C P6 model 0-8,10-12		 ?
+C P6 model 9   (Banias)		 ?
+C P6 model 13  (Dothan)		 ?
+C P4 model 0-1 (Willamette)	 ?
+C P4 model 2   (Northwood)	 ?
+C P4 model 3-4 (Prescott)	 ?
+C Intel atom			 5.67
+C AMD K6			 ?
+C AMD K7			 ?
+C AMD K8			 ?
+
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebp')
+define(`n',   `%ecx')
+define(`cnd', `20(%esp)')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_cnd_sub_n)
+	push	%edi
+	push	%esi
+	push	%ebx
+	push	%ebp
+
+	mov	cnd, %eax		C make cnd into a mask (1)
+	mov	24(%esp), rp
+	neg	%eax			C make cnd into a mask (1)
+	mov	28(%esp), up
+	sbb	%eax, %eax		C make cnd into a mask (1)
+	mov	32(%esp), vp
+	mov	%eax, cnd		C make cnd into a mask (1)
+	mov	36(%esp), n
+
+	xor	%edx, %edx
+
+	inc	n
+	shr	n
+	jnc	L(ent)
+
+	mov	0(vp), %eax
+	and	cnd, %eax
+	lea	4(vp), vp
+	mov	0(up), %edx
+	sub	%eax, %edx
+	lea	4(rp), rp
+	lea	4(up), up
+	mov	%edx, -4(rp)
+	sbb	%edx, %edx		C save cy
+
+L(ent):	mov	0(vp), %ebx
+	and	cnd, %ebx
+	add	%edx, %edx		C restore cy
+	mov	0(up), %edx
+	dec	n
+	je	L(end)
+
+L(top):	sbb	%ebx, %edx
+	mov	4(vp), %eax
+	mov	%edx, 0(rp)
+	sbb	%edx, %edx		C save cy
+	mov	8(vp), %ebx
+	lea	8(up), up
+	and	cnd, %ebx
+	and	cnd, %eax
+	add	%edx, %edx		C restore cy
+	mov	-4(up), %edx
+	lea	8(rp), rp
+	sbb	%eax, %edx
+	mov	%edx, -4(rp)
+	dec	n
+	mov	0(up), %edx
+	lea	8(vp), vp
+	jne	L(top)
+
+L(end):	sbb	%ebx, %edx
+	mov	%edx, 0(rp)
+
+	mov	$0, %eax
+	adc	%eax, %eax
+
+	pop	%ebp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/dive_1.asm b/gmp/mpn/x86/atom/dive_1.asm
new file mode 100644
index 0000000000..71036a15a4
--- /dev/null
+++ b/gmp/mpn/x86/atom/dive_1.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_divexact_1 -- mpn by limb exact division.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_divexact_1)
+include_mpn(`x86/pentium/dive_1.asm')
diff --git a/gmp/mpn/x86/atom/gmp-mparam.h b/gmp/mpn/x86/atom/gmp-mparam.h
new file mode 100644
index 0000000000..45df12806c
--- /dev/null
+++ b/gmp/mpn/x86/atom/gmp-mparam.h
@@ -0,0 +1,201 @@
+/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000-2011, 2014 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+  * the GNU Lesser General Public License as published by the Free
+    Software Foundation; either version 3 of the License, or (at your
+    option) any later version.
+
+or
+
+  * the GNU General Public License as published by the Free Software
+    Foundation; either version 2 of the License, or (at your option) any
+    later version.
+
+or both in parallel, as here.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received copies of the GNU General Public License and the
+GNU Lesser General Public License along with the GNU MP Library.  If not,
+see https://www.gnu.org/licenses/.  */
+
+#define GMP_LIMB_BITS 32
+#define GMP_LIMB_BYTES 4
+
+/* 1667 MHz Pineview (Atom D510) */
+/* FFT tuning limit = 25000000 */
+/* Generated by tuneup.c, 2014-03-14, gcc 4.5 */
+
+#define MOD_1_NORM_THRESHOLD                 3
+#define MOD_1_UNNORM_THRESHOLD               5
+#define MOD_1N_TO_MOD_1_1_THRESHOLD         11
+#define MOD_1U_TO_MOD_1_1_THRESHOLD          5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD        10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD         0  /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD     13
+#define USE_PREINV_DIVREM_1                  1  /* native */
+#define DIV_QR_1N_PI1_METHOD                 1
+#define DIV_QR_1_NORM_THRESHOLD              4
+#define DIV_QR_1_UNNORM_THRESHOLD        MP_SIZE_T_MAX  /* never */
+#define DIV_QR_2_PI2_THRESHOLD           MP_SIZE_T_MAX  /* never */
+#define DIVEXACT_1_THRESHOLD                 0  /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD           31
+
+#define MUL_TOOM22_THRESHOLD                20
+#define MUL_TOOM33_THRESHOLD                74
+#define MUL_TOOM44_THRESHOLD               178
+#define MUL_TOOM6H_THRESHOLD               270
+#define MUL_TOOM8H_THRESHOLD               399
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD      73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD     122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD     115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD     127
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD     106
+
+#define SQR_BASECASE_THRESHOLD               0  /* always (native) */
+#define SQR_TOOM2_THRESHOLD                 30
+#define SQR_TOOM3_THRESHOLD                105
+#define SQR_TOOM4_THRESHOLD                178
+#define SQR_TOOM6_THRESHOLD                303
+#define SQR_TOOM8_THRESHOLD                527
+
+#define MULMID_TOOM42_THRESHOLD             54
+
+#define MULMOD_BNM1_THRESHOLD               13
+#define SQRMOD_BNM1_THRESHOLD               18
+
+#define MUL_FFT_MODF_THRESHOLD             380  /* k = 5 */
+#define MUL_FFT_TABLE3                                      \
+  { {    380, 5}, {     19, 6}, {     10, 5}, {     21, 6}, \
+    {     11, 5}, {     23, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     39, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
+    {     31, 8}, {     67, 9}, {     39, 8}, {     79, 9}, \
+    {     47, 8}, {     95,10}, {     31, 9}, {     79,10}, \
+    {     47, 9}, {     95,11}, {     31,10}, {     63, 9}, \
+    {    127, 8}, {    255, 9}, {    135,10}, {     79, 9}, \
+    {    159,10}, {     95, 9}, {    191,11}, {     63,10}, \
+    {    127, 9}, {    255, 8}, {    511, 9}, {    271,10}, \
+    {    143, 9}, {    287, 8}, {    575,10}, {    159,11}, \
+    {     95,10}, {    191, 9}, {    383,12}, {     63,11}, \
+    {    127,10}, {    255, 9}, {    511,10}, {    271, 9}, \
+    {    543,10}, {    287, 9}, {    575,10}, {    303,11}, \
+    {    159,10}, {    319, 9}, {    639,10}, {    335, 9}, \
+    {    671,10}, {    351, 9}, {    703,11}, {    191,10}, \
+    {    383, 9}, {    767,10}, {    415, 9}, {    831,11}, \
+    {    223,10}, {    447,12}, {    127,11}, {    255,10}, \
+    {    543,11}, {    287,10}, {    607, 9}, {   1215,11}, \
+    {    319,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    447,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087,11}, {    607,10}, {   1215,12}, \
+    {    319,11}, {    735,12}, {    383,11}, {    831,12}, \
+    {    447,11}, {    959,13}, {    255,12}, {    511,11}, \
+    {   1087,12}, {    575,11}, {   1151,12}, {    703,11}, \
+    {   1471,13}, {    383,12}, {    831,11}, {   1663,12}, \
+    {    959,14}, {    255,13}, {    511,12}, {   1215,13}, \
+    {    639,12}, {   1471,11}, {   2943,13}, {    767,12}, \
+    {   1663,13}, {    895,12}, {   1919,14}, {    511,13}, \
+    {   1023,12}, {   2111,13}, {   1151,12}, {   2431,13}, \
+    {   1407,12}, {   2943,14}, {    767,13}, {   1663,12}, \
+    {   3455,13}, {   1919,15}, {    511,14}, {   1023,13}, \
+    {   2431,14}, {   1279,13}, {   2943,12}, {   5887,14}, \
+    {  16384,15}, {  32768,16} }
+#define MUL_FFT_TABLE3_SIZE 150
+#define MUL_FFT_THRESHOLD                 4544
+
+#define SQR_FFT_MODF_THRESHOLD             340  /* k = 5 */
+#define SQR_FFT_TABLE3                                      \
+  { {    340, 5}, {     21, 6}, {     11, 5}, {     23, 6}, \
+    {     12, 5}, {     25, 6}, {     21, 7}, {     11, 6}, \
+    {     25, 7}, {     13, 6}, {     27, 7}, {     15, 6}, \
+    {     31, 7}, {     21, 8}, {     11, 7}, {     27, 8}, \
+    {     15, 7}, {     35, 8}, {     19, 7}, {     41, 8}, \
+    {     23, 7}, {     47, 8}, {     27, 9}, {     15, 8}, \
+    {     39, 9}, {     23, 8}, {     51,10}, {     15, 9}, \
+    {     31, 8}, {     63, 9}, {     39, 8}, {     79, 9}, \
+    {     47,10}, {     31, 9}, {     79,10}, {     47, 9}, \
+    {     95,11}, {     31,10}, {     63, 9}, {    127, 8}, \
+    {    255,10}, {     79, 9}, {    159, 8}, {    319,10}, \
+    {     95, 9}, {    191,11}, {     63,10}, {    127, 9}, \
+    {    255, 8}, {    511, 9}, {    271,10}, {    143, 9}, \
+    {    287, 8}, {    575, 9}, {    303, 8}, {    607,10}, \
+    {    159, 9}, {    319,11}, {     95,10}, {    191, 9}, \
+    {    383,12}, {     63,11}, {    127,10}, {    255, 9}, \
+    {    511,10}, {    271, 9}, {    543,10}, {    287, 9}, \
+    {    575,10}, {    303, 9}, {    607,10}, {    319, 9}, \
+    {    639,10}, {    335, 9}, {    671,10}, {    351, 9}, \
+    {    703,11}, {    191,10}, {    383, 9}, {    767,10}, \
+    {    415,11}, {    223,10}, {    447,12}, {    127,11}, \
+    {    255,10}, {    543,11}, {    287,10}, {    607,11}, \
+    {    319,10}, {    671,11}, {    351,10}, {    703,12}, \
+    {    191,11}, {    383,10}, {    767,11}, {    415,10}, \
+    {    831,11}, {    479,13}, {    127,12}, {    255,11}, \
+    {    543,10}, {   1087,11}, {    607,12}, {    319,11}, \
+    {    671,10}, {   1343,11}, {    735,12}, {    383,11}, \
+    {    831,12}, {    447,11}, {    959,13}, {    255,12}, \
+    {    511,11}, {   1087,12}, {    575,11}, {   1215,12}, \
+    {    639,11}, {   1343,12}, {    703,11}, {   1407,13}, \
+    {    383,12}, {    831,11}, {   1663,12}, {    959,14}, \
+    {    255,13}, {    511,12}, {   1215,13}, {    639,12}, \
+    {   1471,13}, {    767,12}, {   1663,13}, {    895,12}, \
+    {   1791,14}, {    511,13}, {   1023,12}, {   2111,13}, \
+    {   1151,12}, {   2431,13}, {   1407,14}, {    767,13}, \
+    {   1663,12}, {   3455,13}, {   1791,15}, {    511,14}, \
+    {   1023,13}, {   2431,14}, {   1279,13}, {   2943,12}, \
+    {   5887,14}, {  16384,15}, {  32768,16} }
+#define SQR_FFT_TABLE3_SIZE 151
+#define SQR_FFT_THRESHOLD                 2880
+
+#define MULLO_BASECASE_THRESHOLD             6
+#define MULLO_DC_THRESHOLD                  48
+#define MULLO_MUL_N_THRESHOLD             8907
+
+#define DC_DIV_QR_THRESHOLD                 59
+#define DC_DIVAPPR_Q_THRESHOLD             250
+#define DC_BDIV_QR_THRESHOLD                59
+#define DC_BDIV_Q_THRESHOLD                169
+
+#define INV_MULMOD_BNM1_THRESHOLD           38
+#define INV_NEWTON_THRESHOLD               246
+#define INV_APPR_THRESHOLD                 246
+
+#define BINV_NEWTON_THRESHOLD              276
+#define REDC_1_TO_REDC_N_THRESHOLD          67
+
+#define MU_DIV_QR_THRESHOLD               1334
+#define MU_DIVAPPR_Q_THRESHOLD            1442
+#define MUPI_DIV_QR_THRESHOLD              114
+#define MU_BDIV_QR_THRESHOLD              1142
+#define MU_BDIV_Q_THRESHOLD               1334
+
+#define POWM_SEC_TABLE  1,22,98,416,1378
+
+#define MATRIX22_STRASSEN_THRESHOLD         13
+#define HGCD_THRESHOLD                     133
+#define HGCD_APPR_THRESHOLD                169
+#define HGCD_REDUCE_THRESHOLD             2479
+#define GCD_DC_THRESHOLD                   460
+#define GCDEXT_DC_THRESHOLD                342
+#define JACOBI_BASE_METHOD                   3
+
+#define GET_STR_DC_THRESHOLD                12
+#define GET_STR_PRECOMPUTE_THRESHOLD        23
+#define SET_STR_DC_THRESHOLD               321
+#define SET_STR_PRECOMPUTE_THRESHOLD      1099
+
+#define FAC_DSC_THRESHOLD                  198
+#define FAC_ODD_THRESHOLD                   34
diff --git a/gmp/mpn/x86/atom/logops_n.asm b/gmp/mpn/x86/atom/logops_n.asm
new file mode 100644
index 0000000000..3cb6d7310c
--- /dev/null
+++ b/gmp/mpn/x86/atom/logops_n.asm
@@ -0,0 +1,151 @@
+dnl  Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C				   cycles/limb
+C				op	nop	opn
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 3	 3.5	 3.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+define(M4_choose_op,
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_want_pre', `$4')
+define(`M4_inst',     `$3')
+define(`M4_want_post',`$2')
+')')
+define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
+define(M4post,`ifelse(M4_want_post,yes,`$1')')
+
+M4_choose_op( and_n,     , andl,    )
+M4_choose_op( andn_n,    , andl, yes)
+M4_choose_op( nand_n, yes, andl,    )
+M4_choose_op( ior_n,     ,  orl,    )
+M4_choose_op( iorn_n,    ,  orl, yes)
+M4_choose_op( nior_n, yes,  orl,    )
+M4_choose_op( xor_n,     , xorl,    )
+M4_choose_op( xnor_n, yes, xorl,    )
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size);
+C
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC1, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`vp',  `%ebx')
+define(`cnt', `%eax')
+define(`r1',  `%ecx')
+define(`r2',  `%edx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function)
+	mov	PARAM_SIZE, cnt		C size
+	mov	rp, SAVE_RP
+	mov	PARAM_DST, rp
+	mov	up, SAVE_UP
+	mov	PARAM_SRC1, up
+	shr	cnt			C size >> 1
+	mov	vp, SAVE_VP
+	mov	PARAM_SRC2, vp
+	mov	(up), r1
+	jz	L(end)			C size == 1
+	jnc	L(even)			C size % 2 == 0
+
+	ALIGN(16)
+L(oop):
+M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
+	M4_inst	(vp), r1
+	lea	8(up), up
+	mov	-4(up), r2
+M4post(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
+	lea	8(vp), vp
+	mov	r1, (rp)
+L(entry):
+M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r2)')
+	M4_inst	-4(vp), r2
+	lea	8(rp), rp
+M4post(`	notl_or_xorl_GMP_NUMB_MASK(r2)')
+	dec	cnt
+	mov	(up), r1
+	mov	r2, -4(rp)
+	jnz	L(oop)
+
+L(end):
+M4pre(`	notl_or_xorl_GMP_NUMB_MASK(r1)')
+	mov	SAVE_UP, up
+	M4_inst	(vp), r1
+M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)')
+	mov	SAVE_VP, vp
+	mov	r1, (rp)
+	mov	SAVE_RP, rp
+	ret
+
+L(even):
+	mov	r1, r2
+	lea	4(up), up
+	lea	4(vp), vp
+	lea	-4(rp), rp
+	jmp	L(entry)
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/lshift.asm b/gmp/mpn/x86/atom/lshift.asm
new file mode 100644
index 0000000000..f2c70dd3e8
--- /dev/null
+++ b/gmp/mpn/x86/atom/lshift.asm
@@ -0,0 +1,218 @@
+dnl  Intel Atom mpn_lshift -- mpn left shift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C			unsigned cnt);
+
+C				  cycles/limb
+C				cnt!=1	cnt==1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 5	 2.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`cnt',  `%ecx')
+
+ASM_START()
+	TEXT
+	ALIGN(8)
+deflit(`FRAME',0)
+PROLOGUE(mpn_lshift)
+	mov	PARAM_CNT, cnt
+	mov	PARAM_SIZE, %edx
+	mov	up, SAVE_UP
+	mov	PARAM_SRC, up
+	push	rp			FRAME_pushl()
+	mov	PARAM_DST, rp
+
+C We can use faster code for shift-by-1 under certain conditions.
+	cmp	$1,cnt
+	jne	L(normal)
+	cmpl	rp, up
+	jnc	L(special)		C jump if s_ptr + 1 >= res_ptr
+	leal	(up,%edx,4),%eax
+	cmpl	%eax,rp
+	jnc	L(special)		C jump if res_ptr >= s_ptr + size
+
+L(normal):
+	lea	-4(up,%edx,4), up
+	mov	%ebx, SAVE_EBX
+	lea	-4(rp,%edx,4), rp
+
+	shr	%edx
+	mov	(up), %eax
+	mov	%edx, VAR_COUNT
+	jnc	L(evn)
+
+	mov	%eax, %ebx
+	shl	%cl, %ebx
+	neg	cnt
+	shr	%cl, %eax
+	test	%edx, %edx
+	jnz	L(gt1)
+	mov	%ebx, (rp)
+	jmp	L(quit)
+
+L(gt1):	mov	%ebp, SAVE_EBP
+	push	%eax
+	mov	-4(up), %eax
+	mov	%eax, %ebp
+	shr	%cl, %eax
+	jmp	L(lo1)
+
+L(evn):	mov	%ebp, SAVE_EBP
+	neg	cnt
+	mov	%eax, %ebp
+	mov	-4(up), %edx
+	shr	%cl, %eax
+	mov	%edx, %ebx
+	shr	%cl, %edx
+	neg	cnt
+	decl	VAR_COUNT
+	lea	4(rp), rp
+	lea	-4(up), up
+	jz	L(end)
+	push	%eax			FRAME_pushl()
+
+	ALIGN(8)
+L(top):	shl	%cl, %ebp
+	or	%ebp, %edx
+	shl	%cl, %ebx
+	neg	cnt
+	mov	-4(up), %eax
+	mov	%eax, %ebp
+	mov	%edx, -4(rp)
+	shr	%cl, %eax
+	lea	-8(rp), rp
+L(lo1):	mov	-8(up), %edx
+	or	%ebx, %eax
+	mov	%edx, %ebx
+	shr	%cl, %edx
+	lea	-8(up), up
+	neg	cnt
+	mov	%eax, (rp)
+	decl	VAR_COUNT
+	jg	L(top)
+
+	pop	%eax			FRAME_popl()
+L(end):
+	shl	%cl, %ebp
+	shl	%cl, %ebx
+	or	%ebp, %edx
+	mov	SAVE_EBP, %ebp
+	mov	%edx, -4(rp)
+	mov	%ebx, -8(rp)
+
+L(quit):
+	mov	SAVE_UP, up
+	mov	SAVE_EBX, %ebx
+	pop	rp			FRAME_popl()
+	ret
+
+L(special):
+deflit(`FRAME',4)
+	lea	3(%edx), %eax		C size + 3
+	dec	%edx			C size - 1
+	mov	(up), %ecx
+	shr	$2, %eax		C (size + 3) / 4
+	and	$3, %edx		C (size - 1) % 4
+	jz	L(goloop)		C jmp if  size == 1 (mod 4)
+	shr	%edx
+	jnc	L(odd)			C jum if  size == 3 (mod 4)
+
+	add	%ecx, %ecx
+	lea	4(up), up
+	mov	%ecx, (rp)
+	mov	(up), %ecx
+	lea	4(rp), rp
+
+	dec	%edx
+	jnz	L(goloop)		C jump if  size == 0 (mod 4)
+L(odd):	lea	-8(up), up
+	lea	-8(rp), rp
+	jmp	L(sentry)		C reached if size == 2 or 3 (mod 4)
+
+L(sloop):
+	adc	%ecx, %ecx
+	mov	4(up), %edx
+	mov	%ecx, (rp)
+	adc	%edx, %edx
+	mov	8(up), %ecx
+	mov	%edx, 4(rp)
+L(sentry):
+	adc	%ecx, %ecx
+	mov	12(up), %edx
+	mov	%ecx, 8(rp)
+	adc	%edx, %edx
+	lea	16(up), up
+	mov	%edx, 12(rp)
+	lea	16(rp), rp
+	mov	(up), %ecx
+L(goloop):
+	decl	%eax
+	jnz	L(sloop)
+
+L(squit):
+	adc	%ecx, %ecx
+	mov	%ecx, (rp)
+	adc	%eax, %eax
+
+	mov	SAVE_UP, up
+	pop	rp			FRAME_popl()
+	ret
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/lshiftc.asm b/gmp/mpn/x86/atom/lshiftc.asm
new file mode 100644
index 0000000000..5be53ed19d
--- /dev/null
+++ b/gmp/mpn/x86/atom/lshiftc.asm
@@ -0,0 +1,159 @@
+dnl  Intel Atom mpn_lshiftc -- mpn left shift with complement.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C			 unsigned cnt);
+
+C				cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 5.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`cnt',  `%ecx')
+
+ASM_START()
+	TEXT
+
+PROLOGUE(mpn_lshiftc)
+deflit(`FRAME',0)
+	mov	PARAM_CNT, cnt
+	mov	PARAM_SIZE, %edx
+	mov	up, SAVE_UP
+	mov	PARAM_SRC, up
+	push	rp			FRAME_pushl()
+	mov	PARAM_DST, rp
+
+	lea	-4(up,%edx,4), up
+	mov	%ebx, SAVE_EBX
+	lea	-4(rp,%edx,4), rp
+
+	shr	%edx
+	mov	(up), %eax
+	mov	%edx, VAR_COUNT
+	jnc	L(evn)
+
+	mov	%eax, %ebx
+	shl	%cl, %ebx
+	neg	cnt
+	shr	%cl, %eax
+	test	%edx, %edx
+	jnz	L(gt1)
+	not	%ebx
+	mov	%ebx, (rp)
+	jmp	L(quit)
+
+L(gt1):	mov	%ebp, SAVE_EBP
+	push	%eax
+	mov	-4(up), %eax
+	mov	%eax, %ebp
+	shr	%cl, %eax
+	jmp	L(lo1)
+
+L(evn):	mov	%ebp, SAVE_EBP
+	neg	cnt
+	mov	%eax, %ebp
+	mov	-4(up), %edx
+	shr	%cl, %eax
+	mov	%edx, %ebx
+	shr	%cl, %edx
+	neg	cnt
+	decl	VAR_COUNT
+	lea	4(rp), rp
+	lea	-4(up), up
+	jz	L(end)
+	push	%eax			FRAME_pushl()
+
+L(top):	shl	%cl, %ebp
+	or	%ebp, %edx
+	shl	%cl, %ebx
+	neg	cnt
+	not	%edx
+	mov	-4(up), %eax
+	mov	%eax, %ebp
+	mov	%edx, -4(rp)
+	shr	%cl, %eax
+	lea	-8(rp), rp
+L(lo1):	mov	-8(up), %edx
+	or	%ebx, %eax
+	mov	%edx, %ebx
+	shr	%cl, %edx
+	not	%eax
+	lea	-8(up), up
+	neg	cnt
+	mov	%eax, (rp)
+	decl	VAR_COUNT
+	jg	L(top)
+
+	pop	%eax			FRAME_popl()
+L(end):
+	shl	%cl, %ebp
+	shl	%cl, %ebx
+	or	%ebp, %edx
+	mov	SAVE_EBP, %ebp
+	not	%edx
+	not	%ebx
+	mov	%edx, -4(rp)
+	mov	%ebx, -8(rp)
+
+L(quit):
+	mov	SAVE_UP, up
+	mov	SAVE_EBX, %ebx
+	pop	rp			FRAME_popl()
+	ret
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/mmx/copyd.asm b/gmp/mpn/x86/atom/mmx/copyd.asm
new file mode 100644
index 0000000000..b80fb033fe
--- /dev/null
+++ b/gmp/mpn/x86/atom/mmx/copyd.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_copyd -- copy limb vector, decrementing.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86/k7/mmx/copyd.asm')
diff --git a/gmp/mpn/x86/atom/mmx/copyi.asm b/gmp/mpn/x86/atom/mmx/copyi.asm
new file mode 100644
index 0000000000..49b6b8d662
--- /dev/null
+++ b/gmp/mpn/x86/atom/mmx/copyi.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_copyi -- copy limb vector, incrementing.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86/k7/mmx/copyi.asm')
diff --git a/gmp/mpn/x86/atom/mmx/hamdist.asm b/gmp/mpn/x86/atom/mmx/hamdist.asm
new file mode 100644
index 0000000000..3fe8253240
--- /dev/null
+++ b/gmp/mpn/x86/atom/mmx/hamdist.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_hamdist -- hamming distance.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86/k7/mmx/popham.asm')
diff --git a/gmp/mpn/x86/atom/mod_34lsub1.asm b/gmp/mpn/x86/atom/mod_34lsub1.asm
new file mode 100644
index 0000000000..6d57ba385d
--- /dev/null
+++ b/gmp/mpn/x86/atom/mod_34lsub1.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_34lsub1)
+include_mpn(`x86/p6/mod_34lsub1.asm')
diff --git a/gmp/mpn/x86/atom/mode1o.asm b/gmp/mpn/x86/atom/mode1o.asm
new file mode 100644
index 0000000000..c9ee6bd2db
--- /dev/null
+++ b/gmp/mpn/x86/atom/mode1o.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_modexact_1_odd -- exact division style remainder.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd)
+include_mpn(`x86/pentium/mode1o.asm')
diff --git a/gmp/mpn/x86/atom/rshift.asm b/gmp/mpn/x86/atom/rshift.asm
new file mode 100644
index 0000000000..1cb5dbefe9
--- /dev/null
+++ b/gmp/mpn/x86/atom/rshift.asm
@@ -0,0 +1,152 @@
+dnl  Intel Atom mpn_rshift -- mpn right shift.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  Converted from AMD64 by Marco Bodrato.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C			unsigned cnt);
+
+C				cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+dnl  re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`cnt',  `%ecx')
+
+ASM_START()
+	TEXT
+	ALIGN(8)
+deflit(`FRAME',0)
+PROLOGUE(mpn_rshift)
+	mov	PARAM_CNT, cnt
+	mov	PARAM_SIZE, %edx
+	mov	up, SAVE_UP
+	mov	PARAM_SRC, up
+	push	rp			FRAME_pushl()
+	mov	PARAM_DST, rp
+	mov	%ebx, SAVE_EBX
+
+	shr	%edx
+	mov	(up), %eax
+	mov	%edx, VAR_COUNT
+	jnc	L(evn)
+
+	mov	%eax, %ebx
+	shr	%cl, %ebx
+	neg	cnt
+	shl	%cl, %eax
+	test	%edx, %edx
+	jnz	L(gt1)
+	mov	%ebx, (rp)
+	jmp	L(quit)
+
+L(gt1):	mov	%ebp, SAVE_EBP
+	push	%eax
+	mov	4(up), %eax
+	mov	%eax, %ebp
+	shl	%cl, %eax
+	jmp	L(lo1)
+
+L(evn):	mov	%ebp, SAVE_EBP
+	neg	cnt
+	mov	%eax, %ebp
+	mov	4(up), %edx
+	shl	%cl, %eax
+	mov	%edx, %ebx
+	shl	%cl, %edx
+	neg	cnt
+	decl	VAR_COUNT
+	lea	-4(rp), rp
+	lea	4(up), up
+	jz	L(end)
+	push	%eax			FRAME_pushl()
+
+	ALIGN(8)
+L(top):	shr	%cl, %ebp
+	or	%ebp, %edx
+	shr	%cl, %ebx
+	neg	cnt
+	mov	4(up), %eax
+	mov	%eax, %ebp
+	mov	%edx, 4(rp)
+	shl	%cl, %eax
+	lea	8(rp), rp
+L(lo1):	mov	8(up), %edx
+	or	%ebx, %eax
+	mov	%edx, %ebx
+	shl	%cl, %edx
+	lea	8(up), up
+	neg	cnt
+	mov	%eax, (rp)
+	decl	VAR_COUNT
+	jg	L(top)
+
+	pop	%eax			FRAME_popl()
+L(end):
+	shr	%cl, %ebp
+	shr	%cl, %ebx
+	or	%ebp, %edx
+	mov	SAVE_EBP, %ebp
+	mov	%edx, 4(rp)
+	mov	%ebx, 8(rp)
+
+L(quit):
+	mov	SAVE_UP, up
+	mov	SAVE_EBX, %ebx
+	pop	rp			FRAME_popl()
+	ret
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/sse2/aorsmul_1.asm b/gmp/mpn/x86/atom/sse2/aorsmul_1.asm
new file mode 100644
index 0000000000..969a14a919
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/aorsmul_1.asm
@@ -0,0 +1,174 @@
+dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C			    cycles/limb
+C P5				 -
+C P6 model 0-8,10-12		 -
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 8
+C AMD K6
+C AMD K7			 -
+C AMD K8
+C AMD K10
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n',  `%ecx')
+
+ifdef(`OPERATION_addmul_1',`
+	define(ADDSUB,  add)
+	define(func_1,  mpn_addmul_1)
+	define(func_1c, mpn_addmul_1c)')
+ifdef(`OPERATION_submul_1',`
+	define(ADDSUB,  sub)
+	define(func_1,  mpn_submul_1)
+	define(func_1c, mpn_submul_1c)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+	TEXT
+	ALIGN(16)
+PROLOGUE(func_1)
+	xor	%edx, %edx
+L(ent):	push	%edi
+	push	%esi
+	push	%ebx
+	mov	16(%esp), rp
+	mov	20(%esp), up
+	mov	24(%esp), n
+	movd	28(%esp), %mm7
+	test	$1, n
+	jz	L(fi0or2)
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	shr	$2, n
+	jnc	L(fi1)
+
+L(fi3):	lea	-8(up), up
+	lea	-8(rp), rp
+	movd	12(up), %mm1
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	add	$1, n			C increment and clear carry
+	jmp	L(lo3)
+
+L(fi1):	movd	%mm0, %ebx
+	jz	L(wd1)
+	movd	4(up), %mm1
+	pmuludq	%mm7, %mm1
+	jmp	L(lo1)
+
+L(fi0or2):
+	movd	(up), %mm1
+	pmuludq	%mm7, %mm1
+	shr	$2, n
+	movd	4(up), %mm0
+	jc	L(fi2)
+	lea	-4(up), up
+	lea	-4(rp), rp
+	movd	%mm1, %eax
+	pmuludq	%mm7, %mm0
+	jmp	L(lo0)
+
+L(fi2):	lea	4(up), up
+	add	$1, n			C increment and clear carry
+	movd	%mm1, %eax
+	lea	-12(rp), rp
+	jmp	L(lo2)
+
+C	ALIGN(16)			C alignment seems irrelevant
+L(top):	movd	4(up), %mm1
+	adc	$0, %edx
+	ADDSUB	%eax, 12(rp)
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+L(lo1):	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	ADDSUB	%ebx, (rp)
+L(lo0):	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	movd	%mm0, %ebx
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	ADDSUB	%eax, 4(rp)
+L(lo3):	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	ADDSUB	%ebx, 8(rp)
+L(lo2):	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	dec	n
+	jnz	L(top)
+
+L(end):	adc	n, %edx			C n is zero here
+	ADDSUB	%eax, 12(rp)
+	movd	%mm0, %ebx
+	lea	16(rp), rp
+L(wd1):	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %eax
+	adc	n, %eax
+	ADDSUB	%ebx, (rp)
+	emms
+	adc	n, %eax
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()
+PROLOGUE(func_1c)
+	mov	20(%esp), %edx		C carry
+	jmp	L(ent)
+EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm b/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm
new file mode 100644
index 0000000000..782e914019
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/bdiv_dbm1c.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom  mpn_bdiv_dbm1.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_dbm1c)
+include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm')
diff --git a/gmp/mpn/x86/atom/sse2/divrem_1.asm b/gmp/mpn/x86/atom/sse2/divrem_1.asm
new file mode 100644
index 0000000000..f84709a22e
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/divrem_1.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_divrem_1 -- mpn by limb division.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1)
+include_mpn(`x86/pentium4/sse2/divrem_1.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mod_1_1.asm b/gmp/mpn/x86/atom/sse2/mod_1_1.asm
new file mode 100644
index 0000000000..ae6581d9b6
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/mod_1_1.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom/SSE2 mpn_mod_1_1.
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1_1p)
+include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mod_1_4.asm b/gmp/mpn/x86/atom/sse2/mod_1_4.asm
new file mode 100644
index 0000000000..31faa3f0a3
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/mod_1_4.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom/SSE2 mpn_mod_1_4.
+
+dnl  Copyright 2009, 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1s_4p)
+include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
diff --git a/gmp/mpn/x86/atom/sse2/mul_1.asm b/gmp/mpn/x86/atom/sse2/mul_1.asm
new file mode 100644
index 0000000000..aa3bb974bb
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/mul_1.asm
@@ -0,0 +1,124 @@
+dnl  Intel Atom mpn_mul_1.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C			    cycles/limb
+C			    cycles/limb
+C P5				 -
+C P6 model 0-8,10-12		 -
+C P6 model 9  (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0  (Willamette)
+C P4 model 1  (?)
+C P4 model 2  (Northwood)
+C P4 model 3  (Prescott)
+C P4 model 4  (Nocona)
+C Intel Atom			 7.5
+C AMD K6			 -
+C AMD K7			 -
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL,  16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC,  8)
+defframe(PARAM_DST,  4)
+
+define(`rp', `%edx')
+define(`up', `%esi')
+define(`n',  `%ecx')
+
+ASM_START()
+	TEXT
+	ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_mul_1c)
+	movd	PARAM_CARRY, %mm6	C carry
+	jmp	L(ent)
+EPILOGUE()
+
+	ALIGN(8)			C for compact code
+PROLOGUE(mpn_mul_1)
+	pxor	%mm6, %mm6
+L(ent):	push	%esi			FRAME_pushl()
+	mov	PARAM_SRC, up
+	mov	PARAM_SIZE, %eax	C size
+	movd	PARAM_MUL, %mm7
+	movd	(up), %mm0
+	mov	%eax, n
+	and	$3, %eax
+	pmuludq	%mm7, %mm0
+	mov	PARAM_DST, rp
+	jz	L(lo0)
+	cmp	$2, %eax
+	lea	-16(up,%eax,4),up
+	lea	-16(rp,%eax,4),rp
+	jc	L(lo1)
+	jz	L(lo2)
+	jmp	L(lo3)
+
+	ALIGN(16)
+L(top):	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+L(lo0):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+L(lo3):	paddq	%mm0, %mm6
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 4(rp)
+	psrlq	$32, %mm6
+L(lo2):	paddq	%mm0, %mm6
+	movd	12(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 8(rp)
+	psrlq	$32, %mm6
+L(lo1):	paddq	%mm0, %mm6
+	sub	$4, n
+	movd	%mm6, 12(rp)
+	lea	16(up), up
+	ja	L(top)
+
+	psrlq	$32, %mm6
+	movd	%mm6, %eax
+	emms
+	pop	%esi			FRAME_popl()
+	ret
+EPILOGUE()
+ASM_END()
diff --git a/gmp/mpn/x86/atom/sse2/mul_basecase.asm b/gmp/mpn/x86/atom/sse2/mul_basecase.asm
new file mode 100644
index 0000000000..97d3aeb5ad
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/mul_basecase.asm
@@ -0,0 +1,501 @@
+dnl  x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in
+dnl  a third limb vector.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
+C    4 large loops into one; we could use it for the outer loop branch.
+C  * Optimise code outside of inner loops.
+C  * Write combined addmul_1 feed-in a wind-down code, and use when iterating
+C    outer each loop.  ("Overlapping software pipelining")
+C  * Postpone push of ebx until we know vn > 1.  Perhaps use caller-saves regs
+C    for inlined mul_1, allowing us to postpone all pushes.
+C  * Perhaps write special code for vn <= un < M, for some small M.
+
+C void mpn_mul_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xn,
+C                        mp_srcptr yp, mp_size_t yn);
+C
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`un',  `%ecx')
+define(`vp',  `%ebp')
+define(`vn',  `36(%esp)')
+
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+	push	%edi
+	push	%esi
+	push	%ebx
+	push	%ebp
+	mov	20(%esp), rp
+	mov	24(%esp), up
+	mov	28(%esp), un
+	mov	32(%esp), vp
+
+	movd	(up), %mm0
+	movd	(vp), %mm7
+	pmuludq	%mm7, %mm0
+	pxor	%mm6, %mm6
+
+	mov	un, %eax
+	and	$3, %eax
+	jz	L(of0)
+	cmp	$2, %eax
+	jc	L(of1)
+	jz	L(of2)
+
+C ================================================================
+	jmp	L(m3)
+	ALIGN(16)
+L(lm3):	movd	-4(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+	paddq	%mm0, %mm6
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -4(rp)
+	psrlq	$32, %mm6
+L(m3):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 4(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	sub	$4, un
+	movd	%mm6, 8(rp)
+	lea	16(up), up
+	ja	L(lm3)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 12(rp)
+
+	decl	vn
+	jz	L(done)
+	lea	-8(rp), rp
+
+L(ol3):	mov	28(%esp), un
+	neg	un
+	lea	4(vp), vp
+	movd	(vp), %mm7	C read next V limb
+	mov	24(%esp), up
+	lea	16(rp,un,4), rp
+
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	sar	$2, un
+	movd	4(up), %mm1
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	lea	-8(up), up
+	xor	%edx, %edx	C zero edx and CF
+	jmp	L(a3)
+
+L(la3):	movd	4(up), %mm1
+	adc	$0, %edx
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%ebx, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	movd	%mm0, %ebx
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%eax, 4(rp)
+L(a3):	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%ebx, 8(rp)
+	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	jnz	L(la3)
+
+	adc	un, %edx	C un is zero here
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %eax
+	adc	un, %eax
+	add	%ebx, 16(rp)
+	adc	un, %eax
+	mov	%eax, 20(rp)
+
+	decl	vn
+	jnz	L(ol3)
+	jmp	L(done)
+
+C ================================================================
+	ALIGN(16)
+L(lm0):	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+L(of0):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 4(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	12(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 8(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	sub	$4, un
+	movd	%mm6, 12(rp)
+	lea	16(up), up
+	ja	L(lm0)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 16(rp)
+
+	decl	vn
+	jz	L(done)
+	lea	-4(rp), rp
+
+L(ol0):	mov	28(%esp), un
+	neg	un
+	lea	4(vp), vp
+	movd	(vp), %mm7	C read next V limb
+	mov	24(%esp), up
+	lea	20(rp,un,4), rp
+
+	movd	(up), %mm1
+	pmuludq	%mm7, %mm1
+	sar	$2, un
+	movd	4(up), %mm0
+	lea	-4(up), up
+	movd	%mm1, %eax
+	pmuludq	%mm7, %mm0
+	xor	%edx, %edx	C zero edx and CF
+	jmp	L(a0)
+
+L(la0):	movd	4(up), %mm1
+	adc	$0, %edx
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%ebx, (rp)
+L(a0):	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	movd	%mm0, %ebx
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%eax, 4(rp)
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%ebx, 8(rp)
+	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	jnz	L(la0)
+
+	adc	un, %edx	C un is zero here
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %eax
+	adc	un, %eax
+	add	%ebx, 16(rp)
+	adc	un, %eax
+	mov	%eax, 20(rp)
+
+	decl	vn
+	jnz	L(ol0)
+	jmp	L(done)
+
+C ================================================================
+	ALIGN(16)
+L(lm1):	movd	-12(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+	paddq	%mm0, %mm6
+	movd	-8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -12(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	-4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -8(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -4(rp)
+	psrlq	$32, %mm6
+L(of1):	paddq	%mm0, %mm6
+	sub	$4, un
+	movd	%mm6, (rp)
+	lea	16(up), up
+	ja	L(lm1)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 4(rp)
+
+	decl	vn
+	jz	L(done)
+	lea	-16(rp), rp
+
+L(ol1):	mov	28(%esp), un
+	neg	un
+	lea	4(vp), vp
+	movd	(vp), %mm7	C read next V limb
+	mov	24(%esp), up
+	lea	24(rp,un,4), rp
+
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	sar	$2, un
+	movd	%mm0, %ebx
+	movd	4(up), %mm1
+	pmuludq	%mm7, %mm1
+	xor	%edx, %edx	C zero edx and CF
+	inc	un
+	jmp	L(a1)
+
+L(la1):	movd	4(up), %mm1
+	adc	$0, %edx
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+L(a1):	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%ebx, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	movd	%mm0, %ebx
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%eax, 4(rp)
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%ebx, 8(rp)
+	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	jnz	L(la1)
+
+	adc	un, %edx	C un is zero here
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %eax
+	adc	un, %eax
+	add	%ebx, 16(rp)
+	adc	un, %eax
+	mov	%eax, 20(rp)
+
+	decl	vn
+	jnz	L(ol1)
+	jmp	L(done)
+
+C ================================================================
+	ALIGN(16)
+L(lm2):	movd	-8(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+	paddq	%mm0, %mm6
+	movd	-4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -8(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -4(rp)
+	psrlq	$32, %mm6
+L(of2):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	sub	$4, un
+	movd	%mm6, 4(rp)
+	lea	16(up), up
+	ja	L(lm2)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 8(rp)
+
+	decl	vn
+	jz	L(done)
+	lea	-12(rp), rp
+
+L(ol2):	mov	28(%esp), un
+	neg	un
+	lea	4(vp), vp
+	movd	(vp), %mm7	C read next V limb
+	mov	24(%esp), up
+	lea	12(rp,un,4), rp
+
+	movd	(up), %mm1
+	pmuludq	%mm7, %mm1
+	sar	$2, un
+	movd	4(up), %mm0
+	lea	4(up), up
+	movd	%mm1, %eax
+	xor	%edx, %edx	C zero edx and CF
+	jmp	L(lo2)
+
+L(la2):	movd	4(up), %mm1
+	adc	$0, %edx
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%ebx, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	movd	%mm0, %ebx
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%eax, 4(rp)
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %edx
+	movd	%mm1, %eax
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%ebx, 8(rp)
+L(lo2):	psrlq	$32, %mm1
+	adc	%edx, %eax
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	jnz	L(la2)
+
+	adc	un, %edx	C un is zero here
+	add	%eax, 12(rp)
+	movd	%mm0, %ebx
+	psrlq	$32, %mm0
+	adc	%edx, %ebx
+	movd	%mm0, %eax
+	adc	un, %eax
+	add	%ebx, 16(rp)
+	adc	un, %eax
+	mov	%eax, 20(rp)
+
+	decl	vn
+	jnz	L(ol2)
+C	jmp	L(done)
+
+C ================================================================
+L(done):
+	emms
+	pop	%ebp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sse2/popcount.asm b/gmp/mpn/x86/atom/sse2/popcount.asm
new file mode 100644
index 0000000000..7847aec8e6
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/popcount.asm
@@ -0,0 +1,35 @@
+dnl  Intel Atom mpn_popcount -- population count.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
diff --git a/gmp/mpn/x86/atom/sse2/sqr_basecase.asm b/gmp/mpn/x86/atom/sse2/sqr_basecase.asm
new file mode 100644
index 0000000000..af19ed854d
--- /dev/null
+++ b/gmp/mpn/x86/atom/sse2/sqr_basecase.asm
@@ -0,0 +1,634 @@
+dnl  x86 mpn_sqr_basecase -- square an mpn number, optimised for atom.
+
+dnl  Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C  * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
+C    4 large loops into one; we could use it for the outer loop branch.
+C  * Optimise code outside of inner loops.
+C  * Write combined addmul_1 feed-in a wind-down code, and use when iterating
+C    outer each loop.  ("Overlapping software pipelining")
+C  * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone
+C    all pushes.
+C  * Perhaps write special code for n < M, for some small M.
+C  * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps
+C    with even less pipelined code.
+C  * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left.
+C    Consider breaking out earlier, saving high the cost of short loops.
+
+C void mpn_sqr_basecase (mp_ptr wp,
+C                        mp_srcptr xp, mp_size_t xn);
+
+define(`rp',  `%edi')
+define(`up',  `%esi')
+define(`n',   `%ecx')
+
+define(`un',  `%ebp')
+
+	TEXT
+	ALIGN(16)
+PROLOGUE(mpn_sqr_basecase)
+	push	%edi
+	push	%esi
+	mov	12(%esp), rp
+	mov	16(%esp), up
+	mov	20(%esp), n
+
+	lea	4(rp), rp	C write triangular product starting at rp[1]
+	dec	n
+	movd	(up), %mm7
+
+	jz	L(one)
+	lea	4(up), up
+	push	%ebx
+	push	%ebp
+	mov	n, %eax
+
+	movd	(up), %mm0
+	neg	n
+	pmuludq	%mm7, %mm0
+	pxor	%mm6, %mm6
+	mov	n, un
+
+	and	$3, %eax
+	jz	L(of0)
+	cmp	$2, %eax
+	jc	L(of1)
+	jz	L(of2)
+
+C ================================================================
+	jmp	L(m3)
+	ALIGN(16)
+L(lm3):	movd	-4(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+	paddq	%mm0, %mm6
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -4(rp)
+	psrlq	$32, %mm6
+L(m3):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 4(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	add	$4, un
+	movd	%mm6, 8(rp)
+	lea	16(up), up
+	js	L(lm3)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 12(rp)
+
+	inc	n
+C	jz	L(done)
+  lea	-12(up), up
+  lea	4(rp), rp
+	jmp	L(ol2)
+
+C ================================================================
+	ALIGN(16)
+L(lm0):	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+L(of0):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 4(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	12(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, 8(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	add	$4, un
+	movd	%mm6, 12(rp)
+	lea	16(up), up
+	js	L(lm0)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 16(rp)
+
+	inc	n
+C	jz	L(done)
+  lea	-8(up), up
+  lea	8(rp), rp
+	jmp	L(ol3)
+
+C ================================================================
+	ALIGN(16)
+L(lm1):	movd	-12(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+	paddq	%mm0, %mm6
+	movd	-8(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -12(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	-4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -8(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -4(rp)
+	psrlq	$32, %mm6
+L(of1):	paddq	%mm0, %mm6
+	add	$4, un
+	movd	%mm6, (rp)
+	lea	16(up), up
+	js	L(lm1)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 4(rp)
+
+	inc	n
+	jz	L(done)		C goes away when we add special n=2 code
+  lea	-20(up), up
+  lea	-4(rp), rp
+	jmp	L(ol0)
+
+C ================================================================
+	ALIGN(16)
+L(lm2):	movd	-8(up), %mm0
+	pmuludq	%mm7, %mm0
+	psrlq	$32, %mm6
+	lea	16(rp), rp
+	paddq	%mm0, %mm6
+	movd	-4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -8(rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, -4(rp)
+	psrlq	$32, %mm6
+L(of2):	paddq	%mm0, %mm6
+	movd	4(up), %mm0
+	pmuludq	%mm7, %mm0
+	movd	%mm6, (rp)
+	psrlq	$32, %mm6
+	paddq	%mm0, %mm6
+	add	$4, un
+	movd	%mm6, 4(rp)
+	lea	16(up), up
+	js	L(lm2)
+
+	psrlq	$32, %mm6
+	movd	%mm6, 8(rp)
+
+	inc	n
+C	jz	L(done)
+  lea	-16(up), up
+C  lea	(rp), rp
+C	jmp	L(ol1)
+
+C ================================================================
+
+L(ol1):	lea	4(up,n,4), up
+	movd	(up), %mm7	C read next U invariant limb
+	lea	8(rp,n,4), rp
+	mov	n, un
+
+	movd	4(up), %mm1
+	pmuludq	%mm7, %mm1
+	sar	$2, un
+	movd	%mm1, %ebx
+	inc	un
+	jz	L(re1)
+
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	xor	%edx, %edx	C zero edx and CF
+	jmp	L(a1)
+
+L(la1):	adc	$0, %edx
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%eax, (rp)
+L(a1):	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	movd	%mm0, %eax
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%ebx, 4(rp)
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%eax, 8(rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	movd	4(up), %mm1
+	jnz	L(la1)
+
+	adc	un, %edx	C un is zero here
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	adc	un, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %eax
+	adc	un, %eax
+	add	%ebx, 4(rp)
+	adc	un, %eax
+	mov	%eax, 8(rp)
+
+	inc	n
+
+C ================================================================
+
+L(ol0):	lea	(up,n,4), up
+	movd	4(up), %mm7	C read next U invariant limb
+	lea	4(rp,n,4), rp
+	mov	n, un
+
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	sar	$2, un
+	movd	12(up), %mm1
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	xor	%edx, %edx	C zero edx and CF
+	jmp	L(a0)
+
+L(la0):	adc	$0, %edx
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	movd	%mm0, %eax
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%ebx, 4(rp)
+L(a0):	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%eax, 8(rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	movd	4(up), %mm1
+	jnz	L(la0)
+
+	adc	un, %edx	C un is zero here
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	adc	un, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %eax
+	adc	un, %eax
+	add	%ebx, 4(rp)
+	adc	un, %eax
+	mov	%eax, 8(rp)
+
+	inc	n
+
+C ================================================================
+
+L(ol3):	lea	12(up,n,4), up
+	movd	-8(up), %mm7	C read next U invariant limb
+	lea	(rp,n,4), rp	C put rp back
+	mov	n, un
+
+	movd	-4(up), %mm1
+	pmuludq	%mm7, %mm1
+	sar	$2, un
+	movd	%mm1, %ebx
+	movd	(up), %mm0
+	xor	%edx, %edx	C zero edx and CF
+	jmp	L(a3)
+
+L(la3):	adc	$0, %edx
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	movd	%mm0, %eax
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%ebx, 4(rp)
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%eax, 8(rp)
+L(a3):	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	movd	4(up), %mm1
+	jnz	L(la3)
+
+	adc	un, %edx	C un is zero here
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	adc	un, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %eax
+	adc	un, %eax
+	add	%ebx, 4(rp)
+	adc	un, %eax
+	mov	%eax, 8(rp)
+
+	inc	n
+
+C ================================================================
+
+L(ol2):	lea	8(up,n,4), up
+	movd	-4(up), %mm7	C read next U invariant limb
+	lea	12(rp,n,4), rp
+	mov	n, un
+
+	movd	(up), %mm0
+	pmuludq	%mm7, %mm0
+	xor	%edx, %edx
+	sar	$2, un
+	movd	4(up), %mm1
+	test	un, un		C clear carry
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	inc	un
+	jnz	L(a2)
+	jmp	L(re2)
+
+L(la2):	adc	$0, %edx
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+L(a2):	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	movd	8(up), %mm0
+	pmuludq	%mm7, %mm0
+	adc	$0, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	movd	%mm0, %eax
+	movd	12(up), %mm1
+	pmuludq	%mm7, %mm1
+	adc	$0, %edx
+	add	%ebx, 4(rp)
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	lea	16(up), up
+	movd	(up), %mm0
+	adc	$0, %edx
+	add	%eax, 8(rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %edx
+	pmuludq	%mm7, %mm0
+	inc	un
+	movd	4(up), %mm1
+	jnz	L(la2)
+
+	adc	un, %edx	C un is zero here
+	add	%ebx, 12(rp)
+	movd	%mm0, %eax
+	pmuludq	%mm7, %mm1
+	lea	16(rp), rp
+	psrlq	$32, %mm0
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	adc	un, %edx
+	add	%eax, (rp)
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %eax
+	adc	un, %eax
+	add	%ebx, 4(rp)
+	adc	un, %eax
+	mov	%eax, 8(rp)
+
+	inc	n
+	jmp	L(ol1)
+
+C ================================================================
+L(re2):	psrlq	$32, %mm0
+	movd	(up), %mm7	C read next U invariant limb
+	adc	%edx, %eax
+	movd	%mm0, %edx
+	movd	%mm1, %ebx
+	adc	un, %edx
+	add	%eax, (rp)
+	lea	4(rp), rp
+	psrlq	$32, %mm1
+	adc	%edx, %ebx
+	movd	%mm1, %eax
+	movd	4(up), %mm1
+	adc	un, %eax
+	add	%ebx, (rp)
+	pmuludq	%mm7, %mm1
+	adc	un, %eax
+	mov	%eax, 4(rp)
+	movd	%mm1, %ebx
+
+L(re1):	psrlq	$32, %mm1
+	add	%ebx, 4(rp)
+	movd	%mm1, %eax
+	adc	un, %eax
+	xor	n, n		C make n zeroness assumption below true
+	mov	%eax, 8(rp)
+
+L(done):			C n is zero here
+	mov	24(%esp), up
+	mov	28(%esp), %eax
+
+	movd	(up), %mm0
+	inc	%eax
+	pmuludq	%mm0, %mm0
+	lea	4(up), up
+	mov	20(%esp), rp
+	shr	%eax
+	movd	%mm0, (rp)
+	psrlq	$32, %mm0
+	lea	-12(rp), rp
+	mov	%eax, 28(%esp)
+	jnc	L(odd)
+
+	movd	%mm0, %ebp
+	movd	(up), %mm0
+	lea	8(rp), rp
+	pmuludq	%mm0, %mm0
+	lea	-4(up), up
+	add	8(rp), %ebp
+	movd	%mm0, %edx
+	adc	12(rp), %edx
+	rcr	n
+	jmp	L(ent)
+
+C	ALIGN(16)		C alignment seems irrelevant
+L(top):	movd	(up), %mm1
+	adc	n, n
+	movd	%mm0, %eax
+	pmuludq	%mm1, %mm1
+	movd	4(up), %mm0
+	adc	(rp), %eax
+	movd	%mm1, %ebx
+	pmuludq	%mm0, %mm0
+	psrlq	$32, %mm1
+	adc	4(rp), %ebx
+	movd	%mm1, %ebp
+	movd	%mm0, %edx
+	adc	8(rp), %ebp
+	adc	12(rp), %edx
+	rcr	n		C FIXME: isn't this awfully slow on atom???
+	adc	%eax, (rp)
+	adc	%ebx, 4(rp)
+L(ent):	lea	8(up), up
+	adc	%ebp, 8(rp)
+	psrlq	$32, %mm0
+	adc	%edx, 12(rp)
+L(odd):	decl	28(%esp)
+	lea	16(rp), rp
+	jnz	L(top)
+
+L(end):	adc	n, n
+	movd	%mm0, %eax
+	adc	n, %eax
+	mov	%eax, (rp)
+
+L(rtn):	emms
+	pop	%ebp
+	pop	%ebx
+	pop	%esi
+	pop	%edi
+	ret
+
+L(one):	pmuludq	%mm7, %mm7
+	movq	%mm7, -4(rp)
+	emms
+	pop	%esi
+	pop	%edi
+	ret
+EPILOGUE()
diff --git a/gmp/mpn/x86/atom/sublsh1_n.asm b/gmp/mpn/x86/atom/sublsh1_n.asm
new file mode 100644
index 0000000000..d3e7e5b5cb
--- /dev/null
+++ b/gmp/mpn/x86/atom/sublsh1_n.asm
@@ -0,0 +1,34 @@
+dnl  Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1)
+include_mpn(`x86/k7/sublsh1_n.asm')
diff --git a/gmp/mpn/x86/atom/sublsh2_n.asm b/gmp/mpn/x86/atom/sublsh2_n.asm
new file mode 100644
index 0000000000..79405cf9f4
--- /dev/null
+++ b/gmp/mpn/x86/atom/sublsh2_n.asm
@@ -0,0 +1,57 @@
+dnl  Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
+
+dnl  Contributed to the GNU project by Marco Bodrato.
+
+dnl  Copyright 2011 Free Software Foundation, Inc.
+
+dnl  This file is part of the GNU MP Library.
+dnl
+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
+dnl  it under the terms of either:
+dnl
+dnl    * the GNU Lesser General Public License as published by the Free
+dnl      Software Foundation; either version 3 of the License, or (at your
+dnl      option) any later version.
+dnl
+dnl  or
+dnl
+dnl    * the GNU General Public License as published by the Free Software
+dnl      Foundation; either version 2 of the License, or (at your option) any
+dnl      later version.
+dnl
+dnl  or both in parallel, as here.
+dnl
+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+dnl  for more details.
+dnl
+dnl  You should have received copies of the GNU General Public License and the
+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
+dnl  see https://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+	define(M4_inst,		adcl)
+	define(M4_opp,		subl)
+	define(M4_function,	mpn_addlsh2_n)
+	define(M4_function_c,	mpn_addlsh2_nc)
+	define(M4_ip_function_c, mpn_addlsh2_nc_ip1)
+	define(M4_ip_function,	mpn_addlsh2_n_ip1)
+',`ifdef(`OPERATION_sublsh2_n', `
+	define(M4_inst,		sbbl)
+	define(M4_opp,		addl)
+	define(M4_function,	mpn_sublsh2_n)
+	define(M4_function_c,	mpn_sublsh2_nc)
+	define(M4_ip_function_c, mpn_sublsh2_nc_ip1)
+	define(M4_ip_function,	mpn_sublsh2_n_ip1)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1)
+
+include_mpn(`x86/atom/aorslshC_n.asm')