Initial revision

author: tege <tege@gmplib.org> 1996-05-08 09:10:48 +0200
committer: tege <tege@gmplib.org> 1996-05-08 09:10:48 +0200
commit: c6d715868f53b08c62a129ffd77fb585fd89c43b (patch)
tree: 82f36d2d8cbe7e07ad3e18d5c6e047e8796d861e /mpn
download: gmp-c6d715868f53b08c62a129ffd77fb585fd89c43b.tar.gz
207 files changed, 21582 insertions, 0 deletions
diff --git a/mpn/Makefile.in b/mpn/Makefile.in
new file mode 100644
index 000000000..5028d14b1
--- /dev/null
+++ b/mpn/Makefile.in
@@ -0,0 +1,92 @@
+# Makefile for GNU MP/mpn functions
+# Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+srcdir = .
+
+MPN_OBJECTS = This gets filled in by configure.in.
+MPN_LINKS = This gets filled in by configure.in.
+CC = gcc
+CPP = $(CC) -E
+CFLAGS = -g -O
+INCLUDES = -I. -I.. -I$(srcdir) -I$(srcdir)/..
+AR = ar
+AR_FLAGS = rc
+SFLAGS=
+
+#### host and target specific makefile fragments come in here.
+###
+
+libmpn.a: Makefile.in mp_bases.o $(MPN_OBJECTS)
+	rm -f $@
+	$(AR) $(AR_FLAGS) $@ mp_bases.o $(MPN_OBJECTS)
+
+.SUFFIXES: .c .s .S
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(XCFLAGS) $<
+
+.s.o:
+	$(CC) -c $(CFLAGS) $<
+
+.S.o:
+	$(CPP) $(SFLAGS) $(INCLUDES) $(CFLAGS) $< | grep -v '^#' >tmp-$*.s
+	$(CC) -c tmp-$*.s -o $@
+	rm -f tmp-$*.s
+
+clean mostlyclean:
+	rm -f *.o tmp-* libmpn.a
+	#-cd tests; $(MAKE) $@
+distclean maintainer-clean: clean
+	rm -f asm-syntax.h Makefile config.status $(MPN_LINKS)
+	-cd tests; $(MAKE) $@
+
+Makefile: $(srcdir)/Makefile.in
+	$(SHELL) ./config.status
+
+
+# Maybe configure could add dependencies here..?
+
+H = $(srcdir)/../gmp.h $(srcdir)/../gmp-impl.h gmp-mparam.h
+L = $(srcdir)/../longlong.h
+
+mp_bases.o: $(srcdir)/mp_bases.c $(H)
+bdivmod.o: bdivmod.c $(H) $(L)
+cmp.o: cmp.c $(H)
+divmod_1.o: divmod_1.c $(H) $(L)
+divrem.o: divrem.c $(H) $(L)
+divrem_1.o: divrem_1.c $(H) $(L)
+dump.o: dump.c $(H)
+gcd.o: gcd.c $(H) $(L)
+gcd_1.o: gcd_1.c $(H) $(L)
+gcdext.o: gcdext.c $(H) $(L)
+get_str.o: get_str.c $(H) $(L)
+hamdist.o: hamdist.c $(H)
+inlines.o: inlines.c $(srcdir)/../gmp.h
+mod_1.o: mod_1.c $(H) $(L)
+mul.o: mul.c $(H)
+mul_n.o: mul_n.c $(H)
+perfsqr.o: perfsqr.c $(H) $(L)
+popcount.o: popcount.c $(H)
+pre_mod_1.o: pre_mod_1.c $(H) $(L)
+random2.o: random2.c $(H)
+scan0.o: scan0.c $(H) $(L)
+scan1.o: scan1.c $(H) $(L)
+set_str.o: set_str.c $(H)
+sqrtrem.o: sqrtrem.c $(H) $(L)
diff --git a/mpn/README b/mpn/README
new file mode 100644
index 000000000..3da559e50
--- /dev/null
+++ b/mpn/README
@@ -0,0 +1,15 @@
+This directory contains all code for the mpn layer of GMP.
+
+Most subdirectories contain machine-dependent code, written in assembly or
+C.  The `generic' subdirectory contains default code, used when there is no
+machine-dependent replacement for a particular machine.
+
+There is one subdirectory for each architecture.  Note that e.g., 32-bit
+sparc and 64-bit sparc cannot share any code, and are therefore considered
+completely different architecture.
+
+A particular machine will only use code from one such subdirectory, and the
+`generic' subdirectory.  The architecture-specific subdirectory contains a
+hierachy of directories for various architecture variants and
+implementations; the top-most level contains code that runs correctly on all
+variants.
diff --git a/mpn/a29k/add_n.s b/mpn/a29k/add_n.s
new file mode 100644
index 000000000..74c20e3f7
--- /dev/null
+++ b/mpn/a29k/add_n.s
@@ -0,0 +1,120 @@
+; 29000 __mpn_add -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers.  gr116 is used for this purpose.  Note that
+; gr116==0 means that carry should be set.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_add_n
+	.word	0x60000
+___mpn_add_n:
+	srl	gr117,lr5,3
+	sub	gr118,gr117,1
+	jmpt	gr118,Ltail
+	 constn	gr116,-1		; init cy reg
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr104,lr4
+	add	lr4,lr4,32
+
+	subr	gr116,gr116,0		; restore carry
+	addc	gr96,gr96,gr104
+	addc	gr97,gr97,gr105
+	addc	gr98,gr98,gr106
+	addc	gr99,gr99,gr107
+	addc	gr100,gr100,gr108
+	addc	gr101,gr101,gr109
+	addc	gr102,gr102,gr110
+	addc	gr103,gr103,gr111
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr96,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+	and	lr5,lr5,(8-1)
+Ltail:	sub	gr118,lr5,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	gr117,lr5,2		; count for jmpfdec
+
+	mtsr	cr,gr118
+	loadm	0,0,gr96,lr3
+	mtsr	cr,gr118
+	loadm	0,0,gr104,lr4
+
+	subr	gr116,gr116,0		; restore carry
+
+	jmpfdec	gr117,L1
+	 addc	gr96,gr96,gr104
+	jmp	Lstore
+	 mtsr	cr,gr118
+L1:	jmpfdec	gr117,L2
+	 addc	gr97,gr97,gr105
+	jmp	Lstore
+	 mtsr	cr,gr118
+L2:	jmpfdec	gr117,L3
+	 addc	gr98,gr98,gr106
+	jmp	Lstore
+	 mtsr	cr,gr118
+L3:	jmpfdec	gr117,L4
+	 addc	gr99,gr99,gr107
+	jmp	Lstore
+	 mtsr	cr,gr118
+L4:	jmpfdec	gr117,L5
+	 addc	gr100,gr100,gr108
+	jmp	Lstore
+	 mtsr	cr,gr118
+L5:	jmpfdec	gr117,L6
+	 addc	gr101,gr101,gr109
+	jmp	Lstore
+	 mtsr	cr,gr118
+L6:	addc	gr102,gr102,gr110
+
+Lstore:	storem	0,0,gr96,lr2
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+Lend:	jmpi	lr0
+	 add	gr96,gr116,1
diff --git a/mpn/a29k/addmul_1.s b/mpn/a29k/addmul_1.s
new file mode 100644
index 000000000..8c0ec96ce
--- /dev/null
+++ b/mpn/a29k/addmul_1.s
@@ -0,0 +1,113 @@
+; 29000 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+; add the product to a second limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; size		lr4
+; s2_limb	lr5
+
+	.cputype 29050
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_addmul_1
+	.word	0x60000
+___mpn_addmul_1:
+	sub	lr4,lr4,8
+	jmpt	lr4,Ltail
+	 const	gr120,0			; init cylimb reg
+
+	srl	gr117,lr4,3		; divide by 8
+	sub	gr117,gr117,1		; count for jmpfdec
+
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+
+	multiplu gr104,gr96,lr5
+	multmu	 gr96,gr96,lr5
+	multiplu gr105,gr97,lr5
+	multmu	 gr97,gr97,lr5
+	multiplu gr106,gr98,lr5
+	multmu	 gr98,gr98,lr5
+	multiplu gr107,gr99,lr5
+	multmu	 gr99,gr99,lr5
+	multiplu gr108,gr100,lr5
+	multmu	 gr100,gr100,lr5
+	multiplu gr109,gr101,lr5
+	multmu	 gr101,gr101,lr5
+	multiplu gr110,gr102,lr5
+	multmu	 gr102,gr102,lr5
+	multiplu gr111,gr103,lr5
+	multmu	 gr103,gr103,lr5
+
+	add	gr104,gr104,gr120
+	addc	gr105,gr105,gr96
+	addc	gr106,gr106,gr97
+	addc	gr107,gr107,gr98
+	addc	gr108,gr108,gr99
+	addc	gr109,gr109,gr100
+	addc	gr110,gr110,gr101
+	addc	gr111,gr111,gr102
+	addc	gr120,gr103,0
+
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr2
+
+	add	gr104,gr96,gr104
+	addc	gr105,gr97,gr105
+	addc	gr106,gr98,gr106
+	addc	gr107,gr99,gr107
+	addc	gr108,gr100,gr108
+	addc	gr109,gr101,gr109
+	addc	gr110,gr102,gr110
+	addc	gr111,gr103,gr111
+	addc	gr120,gr120,0
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr104,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+Ltail:	and	lr4,lr4,(8-1)
+	sub	gr118,lr4,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2
+	sub	lr2,lr2,4		; offset res_ptr by one limb
+
+Loop2:	load	0,0,gr116,lr3
+	add	lr3,lr3,4
+	multiplu gr117,gr116,lr5
+	multmu	gr118,gr116,lr5
+	add	lr2,lr2,4
+	load	0,0,gr119,lr2
+	add	gr117,gr117,gr120
+	addc	gr118,gr118,0
+	add	gr117,gr117,gr119
+	store	0,0,gr117,lr2
+	jmpfdec	lr4,Loop2
+	 addc	gr120,gr118,0
+
+Lend:	jmpi	lr0
+	 or	gr96,gr120,0		; copy
diff --git a/mpn/a29k/lshift.s b/mpn/a29k/lshift.s
new file mode 100644
index 000000000..7554e2cbb
--- /dev/null
+++ b/mpn/a29k/lshift.s
@@ -0,0 +1,93 @@
+; 29000 __mpn_lshift --
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_lshift
+	.word	0x60000
+___mpn_lshift:
+	sll	gr116,lr4,2
+	add	lr3,gr116,lr3
+	add	lr2,gr116,lr2
+	sub	lr3,lr3,4
+	load	0,0,gr119,lr3
+
+	subr	gr116,lr5,32
+	srl	gr96,gr119,gr116	; return value
+	sub	lr4,lr4,1		; actual loop count is SIZE - 1
+
+	srl	gr117,lr4,3		; chuck count = (actual count) / 8
+	cpeq	gr118,gr117,0
+	jmpt	gr118,Ltail
+	 mtsr	fc,lr5
+
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	sub	lr3,lr3,32
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr100,lr3
+
+	extract	gr109,gr119,gr107
+	extract	gr108,gr107,gr106
+	extract	gr107,gr106,gr105
+	extract	gr106,gr105,gr104
+	extract	gr105,gr104,gr103
+	extract	gr104,gr103,gr102
+	extract	gr103,gr102,gr101
+	extract	gr102,gr101,gr100
+
+	sub	lr2,lr2,32
+	mtsrim	cr,(8-1)
+	storem	0,0,gr102,lr2
+	jmpfdec	gr117,Loop
+	 or	gr119,gr100,0
+
+; Code for the last up-to-7 limbs.
+
+	and	lr4,lr4,(8-1)
+Ltail:	cpeq	gr118,lr4,0
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2		; count for jmpfdec
+
+Loop2:	sub	lr3,lr3,4
+	load	0,0,gr116,lr3
+	extract	gr117,gr119,gr116
+	sub	lr2,lr2,4
+	store	0,0,gr117,lr2
+	jmpfdec	lr4,Loop2
+	 or	gr119,gr116,0
+
+Lend:	extract	gr117,gr119,0
+	sub	lr2,lr2,4
+	jmpi	lr0
+	 store	0,0,gr117,lr2
diff --git a/mpn/a29k/mul_1.s b/mpn/a29k/mul_1.s
new file mode 100644
index 000000000..5d120f48e
--- /dev/null
+++ b/mpn/a29k/mul_1.s
@@ -0,0 +1,97 @@
+; 29000 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; size		lr4
+; s2_limb	lr5
+
+	.cputype 29050
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_mul_1
+	.word	0x60000
+___mpn_mul_1:
+	sub	lr4,lr4,8
+	jmpt	lr4,Ltail
+	 const	gr120,0			; init cylimb reg
+
+	srl	gr117,lr4,3		; divide by 8
+	sub	gr117,gr117,1		; count for jmpfdec
+
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+
+	multiplu gr104,gr96,lr5
+	multmu	 gr96,gr96,lr5
+	multiplu gr105,gr97,lr5
+	multmu	 gr97,gr97,lr5
+	multiplu gr106,gr98,lr5
+	multmu	 gr98,gr98,lr5
+	multiplu gr107,gr99,lr5
+	multmu	 gr99,gr99,lr5
+	multiplu gr108,gr100,lr5
+	multmu	 gr100,gr100,lr5
+	multiplu gr109,gr101,lr5
+	multmu	 gr101,gr101,lr5
+	multiplu gr110,gr102,lr5
+	multmu	 gr102,gr102,lr5
+	multiplu gr111,gr103,lr5
+	multmu	 gr103,gr103,lr5
+
+	add	gr104,gr104,gr120
+	addc	gr105,gr105,gr96
+	addc	gr106,gr106,gr97
+	addc	gr107,gr107,gr98
+	addc	gr108,gr108,gr99
+	addc	gr109,gr109,gr100
+	addc	gr110,gr110,gr101
+	addc	gr111,gr111,gr102
+	addc	gr120,gr103,0
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr104,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+Ltail:	and	lr4,lr4,(8-1)
+	sub	gr118,lr4,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2
+	sub	lr2,lr2,4		; offset res_ptr by one limb
+
+Loop2:	load	0,0,gr116,lr3
+	add	lr3,lr3,4
+	multiplu gr117,gr116,lr5
+	multmu	gr118,gr116,lr5
+	add	lr2,lr2,4
+	add	gr117,gr117,gr120
+	store	0,0,gr117,lr2
+	jmpfdec	lr4,Loop2
+	 addc	gr120,gr118,0
+
+Lend:	jmpi	lr0
+	 or	gr96,gr120,0		; copy
diff --git a/mpn/a29k/rshift.s b/mpn/a29k/rshift.s
new file mode 100644
index 000000000..fe53b71e2
--- /dev/null
+++ b/mpn/a29k/rshift.s
@@ -0,0 +1,89 @@
+; 29000 __mpn_rshift --
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_rshift
+	.word	0x60000
+___mpn_rshift:
+	load	0,0,gr119,lr3
+	add	lr3,lr3,4
+
+	subr	gr116,lr5,32
+	sll	gr96,gr119,gr116	; return value
+	sub	lr4,lr4,1		; actual loop count is SIZE - 1
+
+	srl	gr117,lr4,3		; chuck count = (actual count) / 8
+	cpeq	gr118,gr117,0
+	jmpt	gr118,Ltail
+	 mtsr	fc,gr116
+
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr100,lr3
+	add	lr3,lr3,32
+
+	extract	gr98,gr100,gr119
+	extract	gr99,gr101,gr100
+	extract	gr100,gr102,gr101
+	extract	gr101,gr103,gr102
+	extract	gr102,gr104,gr103
+	extract	gr103,gr105,gr104
+	extract	gr104,gr106,gr105
+	extract	gr105,gr107,gr106
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr98,lr2
+	add	lr2,lr2,32
+	jmpfdec	gr117,Loop
+	 or	gr119,gr107,0
+
+; Code for the last up-to-7 limbs.
+
+	and	lr4,lr4,(8-1)
+Ltail:	cpeq	gr118,lr4,0
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2		; count for jmpfdec
+
+Loop2:	load	0,0,gr100,lr3
+	add	lr3,lr3,4
+	extract	gr117,gr100,gr119
+	store	0,0,gr117,lr2
+	add	lr2,lr2,4
+	jmpfdec	lr4,Loop2
+	 or	gr119,gr100,0
+
+Lend:	srl	gr117,gr119,lr5
+	jmpi	lr0
+	 store	0,0,gr117,lr2
diff --git a/mpn/a29k/sub_n.s b/mpn/a29k/sub_n.s
new file mode 100644
index 000000000..3c8d61065
--- /dev/null
+++ b/mpn/a29k/sub_n.s
@@ -0,0 +1,120 @@
+; 29000 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; s2_ptr	lr4
+; size		lr5
+
+; We use the loadm/storem instructions and operate on chunks of 8
+; limbs/per iteration, until less than 8 limbs remain.
+
+; The 29k has no addition or subtraction instructions that doesn't
+; affect carry, so we need to save and restore that as soon as we
+; adjust the pointers.  gr116 is used for this purpose.  Note that
+; gr116==0 means that carry should be set.
+
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_sub_n
+	.word	0x60000
+___mpn_sub_n:
+	srl	gr117,lr5,3
+	sub	gr118,gr117,1
+	jmpt	gr118,Ltail
+	 constn	gr116,-1		; init cy reg
+	sub	gr117,gr117,2		; count for jmpfdec
+
+; Main loop working 8 limbs/iteration.
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr104,lr4
+	add	lr4,lr4,32
+
+	subr	gr116,gr116,0		; restore carry
+	subc	gr96,gr96,gr104
+	subc	gr97,gr97,gr105
+	subc	gr98,gr98,gr106
+	subc	gr99,gr99,gr107
+	subc	gr100,gr100,gr108
+	subc	gr101,gr101,gr109
+	subc	gr102,gr102,gr110
+	subc	gr103,gr103,gr111
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr96,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+; Code for the last up-to-7 limbs.
+; This code might look very strange, but it's hard to write it
+; differently without major slowdown.
+
+	and	lr5,lr5,(8-1)
+Ltail:	sub	gr118,lr5,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	gr117,lr5,2		; count for jmpfdec
+
+	mtsr	cr,gr118
+	loadm	0,0,gr96,lr3
+	mtsr	cr,gr118
+	loadm	0,0,gr104,lr4
+
+	subr	gr116,gr116,0		; restore carry
+
+	jmpfdec	gr117,L1
+	 subc	gr96,gr96,gr104
+	jmp	Lstore
+	 mtsr	cr,gr118
+L1:	jmpfdec	gr117,L2
+	 subc	gr97,gr97,gr105
+	jmp	Lstore
+	 mtsr	cr,gr118
+L2:	jmpfdec	gr117,L3
+	 subc	gr98,gr98,gr106
+	jmp	Lstore
+	 mtsr	cr,gr118
+L3:	jmpfdec	gr117,L4
+	 subc	gr99,gr99,gr107
+	jmp	Lstore
+	 mtsr	cr,gr118
+L4:	jmpfdec	gr117,L5
+	 subc	gr100,gr100,gr108
+	jmp	Lstore
+	 mtsr	cr,gr118
+L5:	jmpfdec	gr117,L6
+	 subc	gr101,gr101,gr109
+	jmp	Lstore
+	 mtsr	cr,gr118
+L6:	subc	gr102,gr102,gr110
+
+Lstore:	storem	0,0,gr96,lr2
+	subc	gr116,gr116,gr116	; gr116 = not(cy)
+
+Lend:	jmpi	lr0
+	 add	gr96,gr116,1
diff --git a/mpn/a29k/submul_1.s b/mpn/a29k/submul_1.s
new file mode 100644
index 000000000..ca2ef72a9
--- /dev/null
+++ b/mpn/a29k/submul_1.s
@@ -0,0 +1,116 @@
+; 29000 __mpn_submul_1 -- Multiply a limb vector with a single limb and
+; subtract the product from a second limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	lr2
+; s1_ptr	lr3
+; size		lr4
+; s2_limb	lr5
+
+	.cputype 29050
+	.sect .lit,lit
+	.text
+	.align	4
+	.global	___mpn_submul_1
+	.word	0x60000
+___mpn_submul_1:
+	sub	lr4,lr4,8
+	jmpt	lr4,Ltail
+	 const	gr120,0			; init cylimb reg
+
+	srl	gr117,lr4,3		; divide by 8
+	sub	gr117,gr117,1		; count for jmpfdec
+
+Loop:	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr3
+	add	lr3,lr3,32
+
+	multiplu gr104,gr96,lr5
+	multmu	 gr96,gr96,lr5
+	multiplu gr105,gr97,lr5
+	multmu	 gr97,gr97,lr5
+	multiplu gr106,gr98,lr5
+	multmu	 gr98,gr98,lr5
+	multiplu gr107,gr99,lr5
+	multmu	 gr99,gr99,lr5
+	multiplu gr108,gr100,lr5
+	multmu	 gr100,gr100,lr5
+	multiplu gr109,gr101,lr5
+	multmu	 gr101,gr101,lr5
+	multiplu gr110,gr102,lr5
+	multmu	 gr102,gr102,lr5
+	multiplu gr111,gr103,lr5
+	multmu	 gr103,gr103,lr5
+
+	add	gr104,gr104,gr120
+	addc	gr105,gr105,gr96
+	addc	gr106,gr106,gr97
+	addc	gr107,gr107,gr98
+	addc	gr108,gr108,gr99
+	addc	gr109,gr109,gr100
+	addc	gr110,gr110,gr101
+	addc	gr111,gr111,gr102
+	addc	gr120,gr103,0
+
+	mtsrim	cr,(8-1)
+	loadm	0,0,gr96,lr2
+
+	sub	gr96,gr96,gr104
+	subc	gr97,gr97,gr105
+	subc	gr98,gr98,gr106
+	subc	gr99,gr99,gr107
+	subc	gr100,gr100,gr108
+	subc	gr101,gr101,gr109
+	subc	gr102,gr102,gr110
+	subc	gr103,gr103,gr111
+
+	add	gr104,gr103,gr111	; invert carry from previus sub
+	addc	gr120,gr120,0
+
+	mtsrim	cr,(8-1)
+	storem	0,0,gr96,lr2
+	jmpfdec	gr117,Loop
+	 add	lr2,lr2,32
+
+Ltail:	and	lr4,lr4,(8-1)
+	sub	gr118,lr4,1		; count for CR
+	jmpt	gr118,Lend
+	 sub	lr4,lr4,2
+	sub	lr2,lr2,4		; offset res_ptr by one limb
+
+Loop2:	load	0,0,gr116,lr3
+	add	lr3,lr3,4
+	multiplu gr117,gr116,lr5
+	multmu	gr118,gr116,lr5
+	add	lr2,lr2,4
+	load	0,0,gr119,lr2
+	add	gr117,gr117,gr120
+	addc	gr118,gr118,0
+	sub	gr119,gr119,gr117
+	add	gr104,gr119,gr117	; invert carry from previus sub
+	store	0,0,gr119,lr2
+	jmpfdec	lr4,Loop2
+	 addc	gr120,gr118,0
+
+Lend:	jmpi	lr0
+	 or	gr96,gr120,0		; copy
diff --git a/mpn/alpha/README b/mpn/alpha/README
new file mode 100644
index 000000000..55c0a2917
--- /dev/null
+++ b/mpn/alpha/README
@@ -0,0 +1,53 @@
+This directory contains mpn functions optimized for DEC Alpha processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+EV4
+
+1. This chip has very limited store bandwidth.  The on-chip L1 cache is
+write-through, and a cache line is transfered from the store buffer to the
+off-chip L2 in as much 15 cycles on most systems.  This delay hurts
+mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift.
+
+2. Pairing is possible between memory instructions and integer arithmetic
+instructions.
+
+3. mulq and umulh is documented to have a latency of 23 cycles, but 2 of
+these cycles are pipelined.  Thus, multiply instructions can be issued at a
+rate of one each 21nd cycle.
+
+EV5
+
+1. The memory bandwidth of this chip seems excellent, both for loads and
+stores.  Even when the working set is larger than the on-chip L1 and L2
+caches, the perfromance remain almost unaffected.
+
+2. mulq has a measured latency of 13 cycles and an issue rate of 1 each 8th
+cycle.  umulh has a measured latency of 15 cycles and an issue rate of 1
+each 10th cycle.  But the exact timing is somewhat confusing.
+
+3. mpn_add_n.  With 4-fold unrolling, we need 37 instructions, whereof 12
+   are memory operations.  This will take at least
+	ceil(37/2) [dual issue] + 1 [taken branch] = 20 cycles
+   We have 12 memory cycles, plus 4 after-store conflict cycles, or 16 data
+   cache cycles, which should be completely hidden in the 20 issue cycles.
+   The computation is inherently serial, with these dependencies:
+     addq
+     /   \
+   addq  cmpult
+     |     |
+   cmpult  |
+       \  /
+        or
+   I.e., there is a 4 cycle path for each limb, making 16 cycles the absolute
+   minimum.  We could replace the `or' with a cmoveq/cmovne, which would save
+   a cycle on EV5, but that might waste a cycle on EV4.  Also, cmov takes 2
+   cycles.
+     addq
+     /   \
+   addq  cmpult
+     |      \
+   cmpult -> cmovne
+
+STATUS
+
diff --git a/mpn/alpha/add_n.s b/mpn/alpha/add_n.s
new file mode 100644
index 000000000..426556e39
--- /dev/null
+++ b/mpn/alpha/add_n.s
@@ -0,0 +1,120 @@
+ # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$16
+ # s1_ptr	$17
+ # s2_ptr	$18
+ # size		$19
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_add_n
+	.ent	__mpn_add_n
+__mpn_add_n:
+	.frame	$30,0,$26,0
+
+	ldq	$3,0($17)
+	ldq	$4,0($18)
+
+	subq	$19,1,$19
+	and	$19,4-1,$2	# number of limbs in first loop
+	bis	$31,$31,$0
+	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
+
+	subq	$19,$2,$19
+
+.Loop0:	subq	$2,1,$2
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	addq	$17,8,$17
+	addq	$18,8,$18
+	bis	$5,$5,$3
+	bis	$6,$6,$4
+	addq	$16,8,$16
+	bne	$2,.Loop0
+
+.L0:	beq	$19,.Lend
+
+	.align	3
+.Loop:	subq	$19,4,$19
+
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	ldq	$3,16($17)
+	addq	$6,$0,$6
+	ldq	$4,16($18)
+	cmpult	$6,$0,$1
+	addq	$5,$6,$6
+	cmpult	$6,$5,$0
+	stq	$6,8($16)
+	or	$0,$1,$0
+
+	ldq	$5,24($17)
+	addq	$4,$0,$4
+	ldq	$6,24($18)
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,16($16)
+	or	$0,$1,$0
+
+	ldq	$3,32($17)
+	addq	$6,$0,$6
+	ldq	$4,32($18)
+	cmpult	$6,$0,$1
+	addq	$5,$6,$6
+	cmpult	$6,$5,$0
+	stq	$6,24($16)
+	or	$0,$1,$0
+
+	addq	$17,32,$17
+	addq	$18,32,$18
+	addq	$16,32,$16
+	bne	$19,.Loop
+
+.Lend:	addq	$4,$0,$4
+	cmpult	$4,$0,$1
+	addq	$3,$4,$4
+	cmpult	$4,$3,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+	ret	$31,($26),1
+
+	.end	__mpn_add_n
diff --git a/mpn/alpha/addmul_1.s b/mpn/alpha/addmul_1.s
new file mode 100644
index 000000000..048238ae9
--- /dev/null
+++ b/mpn/alpha/addmul_1.s
@@ -0,0 +1,92 @@
+ # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ # the result to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # s2_limb	r19
+
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_addmul_1
+	.ent	__mpn_addmul_1 2
+__mpn_addmul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,.Lend1	# jump if size was == 1
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	addq	$5,$3,$3
+	cmpult	$3,$5,$4
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	beq	$18,.Lend2	# jump if size was == 2
+
+	.align	3
+.Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	addq	$5,$0,$0	# combine carries
+	bne	$18,.Loop
+
+.Lend2:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$5,$0,$0	# combine carries
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+.Lend1:	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$0,$5,$0
+	ret	$31,($26),1
+
+	.end	__mpn_addmul_1
diff --git a/mpn/alpha/ev5/add_n.s b/mpn/alpha/ev5/add_n.s
new file mode 100644
index 000000000..1251a1fb7
--- /dev/null
+++ b/mpn/alpha/ev5/add_n.s
@@ -0,0 +1,148 @@
+ # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$16
+ # s1_ptr	$17
+ # s2_ptr	$18
+ # size		$19
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_add_n
+	.ent	__mpn_add_n
+__mpn_add_n:
+	.frame	$30,0,$26,0
+
+	or	$31,$31,$25		# clear cy
+	subq	$19,4,$19		# decr loop cnt
+	blt	$19,.Lend2		# if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+	ldq	$0,0($18)
+	ldq	$1,8($18)
+	ldq	$4,0($17)
+	ldq	$5,8($17)
+	addq	$17,32,$17		# update s1_ptr
+	ldq	$2,16($18)
+	addq	$0,$4,$20		# 1st main add
+	ldq	$3,24($18)
+	subq	$19,4,$19		# decr loop cnt
+	ldq	$6,-16($17)
+	cmpult	$20,$0,$25		# compute cy from last add
+	ldq	$7,-8($17)
+	addq	$1,$25,$28		# cy add
+	addq	$18,32,$18		# update s2_ptr
+	addq	$5,$28,$21		# 2nd main add
+	cmpult	$28,$25,$8		# compute cy from last add
+	blt	$19,.Lend1		# if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+	.align	4
+.Loop:	cmpult	$21,$28,$25		# compute cy from last add
+	ldq	$0,0($18)
+	or	$8,$25,$25		# combine cy from the two adds
+	ldq	$1,8($18)
+	addq	$2,$25,$28		# cy add
+	ldq	$4,0($17)
+	addq	$28,$6,$22		# 3rd main add
+	ldq	$5,8($17)
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$22,$28,$25		# compute cy from last add
+	stq	$20,0($16)
+	or	$8,$25,$25		# combine cy from the two adds
+	stq	$21,8($16)
+	addq	$3,$25,$28		# cy add
+	addq	$28,$7,$23		# 4th main add
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$23,$28,$25		# compute cy from last add
+	addq	$17,32,$17		# update s1_ptr
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$16,32,$16		# update res_ptr
+	addq	$0,$25,$28		# cy add
+	ldq	$2,16($18)
+	addq	$4,$28,$20		# 1st main add
+	ldq	$3,24($18)
+	cmpult	$28,$25,$8		# compute cy from last add
+	ldq	$6,-16($17)
+	cmpult	$20,$28,$25		# compute cy from last add
+	ldq	$7,-8($17)
+	or	$8,$25,$25		# combine cy from the two adds
+	subq	$19,4,$19		# decr loop cnt
+	stq	$22,-16($16)
+	addq	$1,$25,$28		# cy add
+	stq	$23,-8($16)
+	addq	$5,$28,$21		# 2nd main add
+	addq	$18,32,$18		# update s2_ptr
+	cmpult	$28,$25,$8		# compute cy from last add
+	bge	$19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1:	cmpult	$21,$28,$25		# compute cy from last add
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$2,$25,$28		# cy add
+	addq	$28,$6,$22		# 3rd main add
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$22,$28,$25		# compute cy from last add
+	stq	$20,0($16)
+	or	$8,$25,$25		# combine cy from the two adds
+	stq	$21,8($16)
+	addq	$3,$25,$28		# cy add
+	addq	$28,$7,$23		# 4th main add
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$23,$28,$25		# compute cy from last add
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$16,32,$16		# update res_ptr
+	stq	$22,-16($16)
+	stq	$23,-8($16)
+.Lend2:	addq	$19,4,$19		# restore loop cnt
+	beq	$19,.Lret
+ # Start software pipeline for 2nd loop
+	ldq	$0,0($18)
+	ldq	$4,0($17)
+	subq	$19,1,$19
+	beq	$19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+	.align	4
+.Loop0:	addq	$0,$25,$28		# cy add
+	ldq	$0,8($18)
+	addq	$4,$28,$20		# main add
+	ldq	$4,8($17)
+	addq	$18,8,$18
+	cmpult	$28,$25,$8		# compute cy from last add
+	addq	$17,8,$17
+	stq	$20,0($16)
+	cmpult	$20,$28,$25		# compute cy from last add
+	subq	$19,1,$19		# decr loop cnt
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$16,8,$16
+	bne	$19,.Loop0
+.Lend0:	addq	$0,$25,$28		# cy add
+	addq	$4,$28,$20		# main add
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$20,$28,$25		# compute cy from last add
+	stq	$20,0($16)
+	or	$8,$25,$25		# combine cy from the two adds
+
+.Lret:	or	$25,$31,$0		# return cy
+	ret	$31,($26),1
+	.end	__mpn_add_n
diff --git a/mpn/alpha/ev5/lshift.s b/mpn/alpha/ev5/lshift.s
new file mode 100644
index 000000000..ced55b720
--- /dev/null
+++ b/mpn/alpha/ev5/lshift.s
@@ -0,0 +1,174 @@
+ # Alpha EV5 __mpn_lshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # cnt		r19
+
+ # This code runs at 3.25 cycles/limb on the EV5.
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_lshift
+	.ent	__mpn_lshift
+__mpn_lshift:
+	.frame	$30,0,$26,0
+
+	s8addq	$18,$17,$17	# make r17 point at end of s1
+	ldq	$4,-8($17)	# load first limb
+	subq	$31,$19,$20
+	s8addq	$18,$16,$16	# make r16 point at end of RES
+	subq	$18,1,$18
+	and	$18,4-1,$28	# number of limbs in first loop
+	srl	$4,$20,$0	# compute function result
+
+	beq	$28,.L0
+	subq	$18,$28,$18
+
+	.align	3
+.Loop0:	ldq	$3,-16($17)
+	subq	$16,8,$16
+	sll	$4,$19,$5
+	subq	$17,8,$17
+	subq	$28,1,$28
+	srl	$3,$20,$6
+	or	$3,$3,$4
+	or	$5,$6,$8
+	stq	$8,0($16)
+	bne	$28,.Loop0
+
+.L0:	sll	$4,$19,$24
+	beq	$18,.Lend
+ # warm up phase 1
+	ldq	$1,-16($17)
+	subq	$18,4,$18
+	ldq	$2,-24($17)
+	ldq	$3,-32($17)
+	ldq	$4,-40($17)
+	beq	$18,.Lend1
+ # warm up phase 2
+	srl	$1,$20,$7
+	sll	$1,$19,$21
+	srl	$2,$20,$8
+	ldq	$1,-48($17)
+	sll	$2,$19,$22
+	ldq	$2,-56($17)
+	srl	$3,$20,$5
+	or	$7,$24,$7
+	sll	$3,$19,$23
+	or	$8,$21,$8
+	srl	$4,$20,$6
+	ldq	$3,-64($17)
+	sll	$4,$19,$24
+	ldq	$4,-72($17)
+	subq	$18,4,$18
+	beq	$18,.Lend2
+	.align  4
+ # main loop
+.Loop:	stq	$7,-8($16)
+	or	$5,$22,$5
+	stq	$8,-16($16)
+	or	$6,$23,$6
+
+	srl	$1,$20,$7
+	subq	$18,4,$18
+	sll	$1,$19,$21
+	unop	# ldq	$31,-96($17)
+
+	srl	$2,$20,$8
+	ldq	$1,-80($17)
+	sll	$2,$19,$22
+	ldq	$2,-88($17)
+
+	stq	$5,-24($16)
+	or	$7,$24,$7
+	stq	$6,-32($16)
+	or	$8,$21,$8
+
+	srl	$3,$20,$5
+	unop	# ldq	$31,-96($17)
+	sll	$3,$19,$23
+	subq	$16,32,$16
+
+	srl	$4,$20,$6
+	ldq	$3,-96($17)
+	sll	$4,$19,$24
+	ldq	$4,-104($17)
+
+	subq	$17,32,$17
+	bne	$18,.Loop
+ # cool down phase 2/1
+.Lend2:	stq	$7,-8($16)
+	or	$5,$22,$5
+	stq	$8,-16($16)
+	or	$6,$23,$6
+	srl	$1,$20,$7
+	sll	$1,$19,$21
+	srl	$2,$20,$8
+	sll	$2,$19,$22
+	stq	$5,-24($16)
+	or	$7,$24,$7
+	stq	$6,-32($16)
+	or	$8,$21,$8
+	srl	$3,$20,$5
+	sll	$3,$19,$23
+	srl	$4,$20,$6
+	sll	$4,$19,$24
+ # cool down phase 2/2
+	stq	$7,-40($16)
+	or	$5,$22,$5
+	stq	$8,-48($16)
+	or	$6,$23,$6
+	stq	$5,-56($16)
+	stq	$6,-64($16)
+ # cool down phase 2/3
+	stq	$24,-72($16)
+	ret	$31,($26),1
+
+ # cool down phase 1/1
+.Lend1:	srl	$1,$20,$7
+	sll	$1,$19,$21
+	srl	$2,$20,$8
+	sll	$2,$19,$22
+	srl	$3,$20,$5
+	or	$7,$24,$7
+	sll	$3,$19,$23
+	or	$8,$21,$8
+	srl	$4,$20,$6
+	sll	$4,$19,$24
+ # cool down phase 1/2
+	stq	$7,-8($16)
+	or	$5,$22,$5
+	stq	$8,-16($16)
+	or	$6,$23,$6
+	stq	$5,-24($16)
+	stq	$6,-32($16)
+	stq	$24,-40($16)
+	ret	$31,($26),1
+
+.Lend:	stq	$24,-8($16)
+	ret	$31,($26),1
+	.end	__mpn_lshift
diff --git a/mpn/alpha/ev5/rshift.s b/mpn/alpha/ev5/rshift.s
new file mode 100644
index 000000000..6e24fef96
--- /dev/null
+++ b/mpn/alpha/ev5/rshift.s
@@ -0,0 +1,172 @@
+ # Alpha EV5 __mpn_rshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # cnt		r19
+
+ # This code runs at 3.25 cycles/limb on the EV5.
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_rshift
+	.ent	__mpn_rshift
+__mpn_rshift:
+	.frame	$30,0,$26,0
+
+	ldq	$4,0($17)	# load first limb
+	subq	$31,$19,$20
+	subq	$18,1,$18
+	and	$18,4-1,$28	# number of limbs in first loop
+	sll	$4,$20,$0	# compute function result
+
+	beq	$28,.L0
+	subq	$18,$28,$18
+
+	.align	3
+.Loop0:	ldq	$3,8($17)
+	addq	$16,8,$16
+	srl	$4,$19,$5
+	addq	$17,8,$17
+	subq	$28,1,$28
+	sll	$3,$20,$6
+	or	$3,$3,$4
+	or	$5,$6,$8
+	stq	$8,-8($16)
+	bne	$28,.Loop0
+
+.L0:	srl	$4,$19,$24
+	beq	$18,.Lend
+ # warm up phase 1
+	ldq	$1,8($17)
+	subq	$18,4,$18
+	ldq	$2,16($17)
+	ldq	$3,24($17)
+	ldq	$4,32($17)
+	beq	$18,.Lend1
+ # warm up phase 2
+	sll	$1,$20,$7
+	srl	$1,$19,$21
+	sll	$2,$20,$8
+	ldq	$1,40($17)
+	srl	$2,$19,$22
+	ldq	$2,48($17)
+	sll	$3,$20,$5
+	or	$7,$24,$7
+	srl	$3,$19,$23
+	or	$8,$21,$8
+	sll	$4,$20,$6
+	ldq	$3,56($17)
+	srl	$4,$19,$24
+	ldq	$4,64($17)
+	subq	$18,4,$18
+	beq	$18,.Lend2
+	.align  4
+ # main loop
+.Loop:	stq	$7,0($16)
+	or	$5,$22,$5
+	stq	$8,8($16)
+	or	$6,$23,$6
+
+	sll	$1,$20,$7
+	subq	$18,4,$18
+	srl	$1,$19,$21
+	unop	# ldq	$31,-96($17)
+
+	sll	$2,$20,$8
+	ldq	$1,72($17)
+	srl	$2,$19,$22
+	ldq	$2,80($17)
+
+	stq	$5,16($16)
+	or	$7,$24,$7
+	stq	$6,24($16)
+	or	$8,$21,$8
+
+	sll	$3,$20,$5
+	unop	# ldq	$31,-96($17)
+	srl	$3,$19,$23
+	addq	$16,32,$16
+
+	sll	$4,$20,$6
+	ldq	$3,88($17)
+	srl	$4,$19,$24
+	ldq	$4,96($17)
+
+	addq	$17,32,$17
+	bne	$18,.Loop
+ # cool down phase 2/1
+.Lend2:	stq	$7,0($16)
+	or	$5,$22,$5
+	stq	$8,8($16)
+	or	$6,$23,$6
+	sll	$1,$20,$7
+	srl	$1,$19,$21
+	sll	$2,$20,$8
+	srl	$2,$19,$22
+	stq	$5,16($16)
+	or	$7,$24,$7
+	stq	$6,24($16)
+	or	$8,$21,$8
+	sll	$3,$20,$5
+	srl	$3,$19,$23
+	sll	$4,$20,$6
+	srl	$4,$19,$24
+ # cool down phase 2/2
+	stq	$7,32($16)
+	or	$5,$22,$5
+	stq	$8,40($16)
+	or	$6,$23,$6
+	stq	$5,48($16)
+	stq	$6,56($16)
+ # cool down phase 2/3
+	stq	$24,64($16)
+	ret	$31,($26),1
+
+ # cool down phase 1/1
+.Lend1:	sll	$1,$20,$7
+	srl	$1,$19,$21
+	sll	$2,$20,$8
+	srl	$2,$19,$22
+	sll	$3,$20,$5
+	or	$7,$24,$7
+	srl	$3,$19,$23
+	or	$8,$21,$8
+	sll	$4,$20,$6
+	srl	$4,$19,$24
+ # cool down phase 1/2
+	stq	$7,0($16)
+	or	$5,$22,$5
+	stq	$8,8($16)
+	or	$6,$23,$6
+	stq	$5,16($16)
+	stq	$6,24($16)
+	stq	$24,32($16)
+	ret	$31,($26),1
+
+.Lend:	stq	$24,0($16)
+	ret	$31,($26),1
+	.end	__mpn_rshift
diff --git a/mpn/alpha/ev5/sub_n.s b/mpn/alpha/ev5/sub_n.s
new file mode 100644
index 000000000..6743af50b
--- /dev/null
+++ b/mpn/alpha/ev5/sub_n.s
@@ -0,0 +1,149 @@
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$16
+ # s1_ptr	$17
+ # s2_ptr	$18
+ # size		$19
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_sub_n
+	.ent	__mpn_sub_n
+__mpn_sub_n:
+	.frame	$30,0,$26,0
+
+	or	$31,$31,$25		# clear cy
+	subq	$19,4,$19		# decr loop cnt
+	blt	$19,.Lend2		# if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+	ldq	$0,0($18)
+	ldq	$1,8($18)
+	ldq	$4,0($17)
+	ldq	$5,8($17)
+	addq	$17,32,$17		# update s1_ptr
+	ldq	$2,16($18)
+	subq	$4,$0,$20		# 1st main sub
+	ldq	$3,24($18)
+	subq	$19,4,$19		# decr loop cnt
+	ldq	$6,-16($17)
+	cmpult	$4,$20,$25		# compute cy from last sub
+	ldq	$7,-8($17)
+	addq	$1,$25,$28		# cy add
+	addq	$18,32,$18		# update s2_ptr
+	subq	$5,$28,$21		# 2nd main sub
+	cmpult	$28,$25,$8		# compute cy from last add
+	blt	$19,.Lend1		# if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+	.align	4
+.Loop:	cmpult	$5,$21,$25		# compute cy from last add
+	ldq	$0,0($18)
+	or	$8,$25,$25		# combine cy from the two adds
+	ldq	$1,8($18)
+	addq	$2,$25,$28		# cy add
+	ldq	$4,0($17)
+	subq	$6,$28,$22		# 3rd main sub
+	ldq	$5,8($17)
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$6,$22,$25		# compute cy from last add
+	stq	$20,0($16)
+	or	$8,$25,$25		# combine cy from the two adds
+	stq	$21,8($16)
+	addq	$3,$25,$28		# cy add
+	subq	$7,$28,$23		# 4th main sub
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$7,$23,$25		# compute cy from last add
+	addq	$17,32,$17		# update s1_ptr
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$16,32,$16		# update res_ptr
+	addq	$0,$25,$28		# cy add
+	ldq	$2,16($18)
+	subq	$4,$28,$20		# 1st main sub
+	ldq	$3,24($18)
+	cmpult	$28,$25,$8		# compute cy from last add
+	ldq	$6,-16($17)
+	cmpult	$4,$20,$25		# compute cy from last add
+	ldq	$7,-8($17)
+	or	$8,$25,$25		# combine cy from the two adds
+	subq	$19,4,$19		# decr loop cnt
+	stq	$22,-16($16)
+	addq	$1,$25,$28		# cy add
+	stq	$23,-8($16)
+	subq	$5,$28,$21		# 2nd main sub
+	addq	$18,32,$18		# update s2_ptr
+	cmpult	$28,$25,$8		# compute cy from last add
+	bge	$19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1:	cmpult	$5,$21,$25		# compute cy from last add
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$2,$25,$28		# cy add
+	subq	$6,$28,$22		# 3rd main sub
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$6,$22,$25		# compute cy from last add
+	stq	$20,0($16)
+	or	$8,$25,$25		# combine cy from the two adds
+	stq	$21,8($16)
+	addq	$3,$25,$28		# cy add
+	subq	$7,$28,$23		# 4th main sub
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$7,$23,$25		# compute cy from last add
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$16,32,$16		# update res_ptr
+	stq	$22,-16($16)
+	stq	$23,-8($16)
+.Lend2:	addq	$19,4,$19		# restore loop cnt
+	beq	$19,.Lret
+ # Start software pipeline for 2nd loop
+	ldq	$0,0($18)
+	ldq	$4,0($17)
+	subq	$19,1,$19
+	beq	$19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+	.align	4
+.Loop0:	addq	$0,$25,$28		# cy add
+	ldq	$0,8($18)
+	subq	$4,$28,$20		# main sub
+	ldq	$1,8($17)
+	addq	$18,8,$18
+	cmpult	$28,$25,$8		# compute cy from last add
+	addq	$17,8,$17
+	stq	$20,0($16)
+	cmpult	$4,$20,$25		# compute cy from last add
+	subq	$19,1,$19		# decr loop cnt
+	or	$8,$25,$25		# combine cy from the two adds
+	addq	$16,8,$16
+	or	$1,$31,$4
+	bne	$19,.Loop0
+.Lend0:	addq	$0,$25,$28		# cy add
+	subq	$4,$28,$20		# main sub
+	cmpult	$28,$25,$8		# compute cy from last add
+	cmpult	$4,$20,$25		# compute cy from last add
+	stq	$20,0($16)
+	or	$8,$25,$25		# combine cy from the two adds
+
+.Lret:	or	$25,$31,$0		# return cy
+	ret	$31,($26),1
+	.end	__mpn_sub_n
diff --git a/mpn/alpha/gmp-mparam.h b/mpn/alpha/gmp-mparam.h
new file mode 100644
index 000000000..a3c66974d
--- /dev/null
+++ b/mpn/alpha/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/mpn/alpha/lshift.s b/mpn/alpha/lshift.s
new file mode 100644
index 000000000..13bd24a42
--- /dev/null
+++ b/mpn/alpha/lshift.s
@@ -0,0 +1,109 @@
+ # Alpha 21064 __mpn_lshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # cnt		r19
+
+ # This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+ # it would take 4 cycles/limb.  It should be possible to get down to 3
+ # cycles/limb since both ldq and stq can be paired with the other used
+ # instructions.  But there are many restrictions in the 21064 pipeline that
+ # makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+ # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ # 2. Only aligned instruction pairs can be paired.
+ # 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_lshift
+	.ent	__mpn_lshift
+__mpn_lshift:
+	.frame	$30,0,$26,0
+
+	s8addq	$18,$17,$17	# make r17 point at end of s1
+	ldq	$4,-8($17)	# load first limb
+	subq	$17,8,$17
+	subq	$31,$19,$7
+	s8addq	$18,$16,$16	# make r16 point at end of RES
+	subq	$18,1,$18
+	and	$18,4-1,$20	# number of limbs in first loop
+	srl	$4,$7,$0	# compute function result
+
+	beq	$20,.L0
+	subq	$18,$20,$18
+
+	.align	3
+.Loop0:
+	ldq	$3,-8($17)
+	subq	$16,8,$16
+	subq	$17,8,$17
+	subq	$20,1,$20
+	sll	$4,$19,$5
+	srl	$3,$7,$6
+	bis	$3,$3,$4
+	bis	$5,$6,$8
+	stq	$8,0($16)
+	bne	$20,.Loop0
+
+.L0:	beq	$18,.Lend
+
+	.align	3
+.Loop:	ldq	$3,-8($17)
+	subq	$16,32,$16
+	subq	$18,4,$18
+	sll	$4,$19,$5
+	srl	$3,$7,$6
+
+	ldq	$4,-16($17)
+	sll	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,24($16)
+	srl	$4,$7,$2
+
+	ldq	$3,-24($17)
+	sll	$4,$19,$5
+	bis	$1,$2,$8
+	stq	$8,16($16)
+	srl	$3,$7,$6
+
+	ldq	$4,-32($17)
+	sll	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,8($16)
+	srl	$4,$7,$2
+
+	subq	$17,32,$17
+	bis	$1,$2,$8
+	stq	$8,0($16)
+
+	bgt	$18,.Loop
+
+.Lend:	sll	$4,$19,$8
+	stq	$8,-8($16)
+	ret	$31,($26),1
+	.end	__mpn_lshift
diff --git a/mpn/alpha/mul_1.s b/mpn/alpha/mul_1.s
new file mode 100644
index 000000000..a1f5a94b9
--- /dev/null
+++ b/mpn/alpha/mul_1.s
@@ -0,0 +1,85 @@
+ # Alpha 21064 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+ # the result in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # s2_limb	r19
+
+ # This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
+
+ # To improve performance for long multiplications, we would use
+ # 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
+ # these instructions without slowing down the general code: 1. We can
+ # only have two prefetches in operation at any time in the Alpha
+ # architecture.  2. There will seldom be any special alignment
+ # between RES_PTR and S1_PTR.  Maybe we can simply divide the current
+ # loop into an inner and outer loop, having the inner loop handle
+ # exactly one prefetch block?
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_mul_1
+	.ent	__mpn_mul_1 2
+__mpn_mul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	bic	$31,$31,$4	# clear cy_limb
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,Lend1	# jump if size was == 1
+	ldq	$2,8($17)	# $2 = s1_limb
+	subq	$18,1,$18	# size--
+	stq	$3,0($16)
+	beq	$18,Lend2	# jump if size was == 2
+
+	.align	3
+Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,16($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	stq	$3,8($16)
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$16,8,$16	# res_ptr++
+	bne	$18,Loop
+
+Lend2:	mulq	$2,$19,$3	# $3 = prod_low
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	stq	$3,8($16)
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+Lend1:	stq	$3,0($16)
+	ret	$31,($26),1
+
+	.end	__mpn_mul_1
diff --git a/mpn/alpha/rshift.s b/mpn/alpha/rshift.s
new file mode 100644
index 000000000..389054ab0
--- /dev/null
+++ b/mpn/alpha/rshift.s
@@ -0,0 +1,107 @@
+ # Alpha 21064 __mpn_rshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # cnt		r19
+
+ # This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+ # it would take 4 cycles/limb.  It should be possible to get down to 3
+ # cycles/limb since both ldq and stq can be paired with the other used
+ # instructions.  But there are many restrictions in the 21064 pipeline that
+ # makes it hard, if not impossible, to get down to 3 cycles/limb:
+
+ # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ # 2. Only aligned instruction pairs can be paired.
+ # 3. The store buffer or silo might not be able to deal with the bandwidth.
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_rshift
+	.ent	__mpn_rshift
+__mpn_rshift:
+	.frame	$30,0,$26,0
+
+	ldq	$4,0($17)	# load first limb
+	addq	$17,8,$17
+	subq	$31,$19,$7
+	subq	$18,1,$18
+	and	$18,4-1,$20	# number of limbs in first loop
+	sll	$4,$7,$0	# compute function result
+
+	beq	$20,.L0
+	subq	$18,$20,$18
+
+	.align	3
+.Loop0:
+	ldq	$3,0($17)
+	addq	$16,8,$16
+	addq	$17,8,$17
+	subq	$20,1,$20
+	srl	$4,$19,$5
+	sll	$3,$7,$6
+	bis	$3,$3,$4
+	bis	$5,$6,$8
+	stq	$8,-8($16)
+	bne	$20,.Loop0
+
+.L0:	beq	$18,.Lend
+
+	.align	3
+.Loop:	ldq	$3,0($17)
+	addq	$16,32,$16
+	subq	$18,4,$18
+	srl	$4,$19,$5
+	sll	$3,$7,$6
+
+	ldq	$4,8($17)
+	srl	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,-32($16)
+	sll	$4,$7,$2
+
+	ldq	$3,16($17)
+	srl	$4,$19,$5
+	bis	$1,$2,$8
+	stq	$8,-24($16)
+	sll	$3,$7,$6
+
+	ldq	$4,24($17)
+	srl	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,-16($16)
+	sll	$4,$7,$2
+
+	addq	$17,32,$17
+	bis	$1,$2,$8
+	stq	$8,-8($16)
+
+	bgt	$18,.Loop
+
+.Lend:	srl	$4,$19,$8
+	stq	$8,0($16)
+	ret	$31,($26),1
+	.end	__mpn_rshift
diff --git a/mpn/alpha/sub_n.s b/mpn/alpha/sub_n.s
new file mode 100644
index 000000000..3c90c1169
--- /dev/null
+++ b/mpn/alpha/sub_n.s
@@ -0,0 +1,120 @@
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$16
+ # s1_ptr	$17
+ # s2_ptr	$18
+ # size		$19
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_sub_n
+	.ent	__mpn_sub_n
+__mpn_sub_n:
+	.frame	$30,0,$26,0
+
+	ldq	$3,0($17)
+	ldq	$4,0($18)
+
+	subq	$19,1,$19
+	and	$19,4-1,$2	# number of limbs in first loop
+	bis	$31,$31,$0
+	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
+
+	subq	$19,$2,$19
+
+.Loop0:	subq	$2,1,$2
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	addq	$17,8,$17
+	addq	$18,8,$18
+	bis	$5,$5,$3
+	bis	$6,$6,$4
+	addq	$16,8,$16
+	bne	$2,.Loop0
+
+.L0:	beq	$19,.Lend
+
+	.align	3
+.Loop:	subq	$19,4,$19
+
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	ldq	$3,16($17)
+	addq	$6,$0,$6
+	ldq	$4,16($18)
+	cmpult	$6,$0,$1
+	subq	$5,$6,$6
+	cmpult	$5,$6,$0
+	stq	$6,8($16)
+	or	$0,$1,$0
+
+	ldq	$5,24($17)
+	addq	$4,$0,$4
+	ldq	$6,24($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,16($16)
+	or	$0,$1,$0
+
+	ldq	$3,32($17)
+	addq	$6,$0,$6
+	ldq	$4,32($18)
+	cmpult	$6,$0,$1
+	subq	$5,$6,$6
+	cmpult	$5,$6,$0
+	stq	$6,24($16)
+	or	$0,$1,$0
+
+	addq	$17,32,$17
+	addq	$18,32,$18
+	addq	$16,32,$16
+	bne	$19,.Loop
+
+.Lend:	addq	$4,$0,$4
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+	ret	$31,($26),1
+
+	.end	__mpn_sub_n
diff --git a/mpn/alpha/submul_1.s b/mpn/alpha/submul_1.s
new file mode 100644
index 000000000..1ed0c6a8d
--- /dev/null
+++ b/mpn/alpha/submul_1.s
@@ -0,0 +1,92 @@
+ # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and
+ # subtract the result from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	r16
+ # s1_ptr	r17
+ # size		r18
+ # s2_limb	r19
+
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	__mpn_submul_1
+	.ent	__mpn_submul_1 2
+__mpn_submul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,.Lend1	# jump if size was == 1
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	subq	$5,$3,$3
+	cmpult	$5,$3,$4
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	beq	$18,.Lend2	# jump if size was == 2
+
+	.align	3
+.Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	addq	$5,$0,$0	# combine carries
+	bne	$18,.Loop
+
+.Lend2:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$5,$0,$0	# combine carries
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+.Lend1:	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$0,$5,$0
+	ret	$31,($26),1
+
+	.end	__mpn_submul_1
diff --git a/mpn/alpha/udiv_qrnnd.S b/mpn/alpha/udiv_qrnnd.S
new file mode 100644
index 000000000..d3d2cee93
--- /dev/null
+++ b/mpn/alpha/udiv_qrnnd.S
@@ -0,0 +1,151 @@
+ # Alpha 21064 __udiv_qrnnd
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+        .set noreorder
+        .set noat
+.text
+        .align	3
+        .globl	__udiv_qrnnd
+        .ent	__udiv_qrnnd
+__udiv_qrnnd:
+        .frame $30,0,$26,0
+        .prologue 0
+#define cnt	$2
+#define tmp	$3
+#define rem_ptr	$16
+#define n1	$17
+#define n0	$18
+#define d	$19
+#define qb	$20
+
+	ldiq	cnt,16
+	blt	d,.Largedivisor
+
+.Loop1:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	d,n1,qb
+	subq	n1,d,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,.Loop1
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+.Largedivisor:
+	and	n0,1,$4
+
+	srl	n0,1,n0
+	sll	n1,63,tmp
+	or	tmp,n0,n0
+	srl	n1,1,n1
+
+	and	d,1,$6
+	srl	d,1,$5
+	addq	$5,$6,$5
+
+.Loop2:	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addq	n1,n1,n1
+	bis	n1,tmp,n1
+	addq	n0,n0,n0
+	cmpule	$5,n1,qb
+	subq	n1,$5,tmp
+	cmovne	qb,tmp,n1
+	bis	n0,qb,n0
+	subq	cnt,1,cnt
+	bgt	cnt,.Loop2
+
+	addq	n1,n1,n1
+	addq	$4,n1,n1
+	bne	$6,.LOdd
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+.LOdd:
+	/* q' in n0. r' in n1 */
+	addq	n1,n0,n1
+	cmpult	n1,n0,tmp	# tmp := carry from addq
+	beq	tmp,.LLp6
+	addq	n0,1,n0
+	subq	n1,d,n1
+.LLp6:	cmpult	n1,d,tmp
+	bne	tmp,.LLp7
+	addq	n0,1,n0
+	subq	n1,d,n1
+.LLp7:
+	stq	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+	.end	__udiv_qrnnd
diff --git a/mpn/bsd.h b/mpn/bsd.h
new file mode 100644
index 000000000..8a0cbced2
--- /dev/null
+++ b/mpn/bsd.h
@@ -0,0 +1,5 @@
+#if __STDC__
+#define C_SYMBOL_NAME(name) _##name
+#else
+#define C_SYMBOL_NAME(name) _/**/name
+#endif
diff --git a/mpn/clipper/add_n.s b/mpn/clipper/add_n.s
new file mode 100644
index 000000000..40ed1bd32
--- /dev/null
+++ b/mpn/clipper/add_n.s
@@ -0,0 +1,26 @@
+.text
+	.align 16
+.globl ___mpn_add_n
+___mpn_add_n:
+	subq	$8,sp
+	storw	r6,(sp)
+	loadw	12(sp),r2
+	loadw	16(sp),r3
+	loadq	$0,r6		; clear carry-save register
+
+.Loop:	loadw	(r1),r4
+	loadw	(r2),r5
+	addwc	r6,r6		; restore carry from r6
+	addwc	r5,r4
+	storw	r4,(r0)
+	subwc	r6,r6		; save carry in r6
+	addq	$4,r0
+	addq	$4,r1
+	addq	$4,r2
+	subq	$1,r3
+	brne	.Loop
+
+	negw	r6,r0
+	loadw	(sp),r6
+	addq	$8,sp
+	ret	sp
diff --git a/mpn/clipper/mul_1.s b/mpn/clipper/mul_1.s
new file mode 100644
index 000000000..f20ece02a
--- /dev/null
+++ b/mpn/clipper/mul_1.s
@@ -0,0 +1,25 @@
+.text
+	.align	16
+.globl	___mpn_mul_1
+___mpn_mul_1:
+	subq	$8,sp
+	storw	r6,(sp)
+	loadw	12(sp),r2
+	loadw	16(sp),r3
+	loadq	$0,r6		; clear carry limb
+
+.Loop:	loadw	(r1),r4
+	mulwux	r3,r4
+	addw	r6,r4		; add old carry limb into low product limb
+	loadq	$0,r6
+	addwc	r5,r6		; propagate cy into high product limb
+	storw	r4,(r0)
+	addq	$4,r0
+	addq	$4,r1
+	subq	$1,r2
+	brne	.Loop
+
+	movw	r6,r0
+	loadw	0(sp),r6
+	addq	$8,sp
+	ret	sp
diff --git a/mpn/clipper/sub_n.s b/mpn/clipper/sub_n.s
new file mode 100644
index 000000000..6f47d47c9
--- /dev/null
+++ b/mpn/clipper/sub_n.s
@@ -0,0 +1,26 @@
+.text
+	.align 16
+.globl ___mpn_sub_n
+___mpn_sub_n:
+	subq	$8,sp
+	storw	r6,(sp)
+	loadw	12(sp),r2
+	loadw	16(sp),r3
+	loadq	$0,r6		; clear carry-save register
+
+.Loop:	loadw	(r1),r4
+	loadw	(r2),r5
+	addwc	r6,r6		; restore carry from r6
+	subwc	r5,r4
+	storw	r4,(r0)
+	subwc	r6,r6		; save carry in r6
+	addq	$4,r0
+	addq	$4,r1
+	addq	$4,r2
+	subq	$1,r3
+	brne	.Loop
+
+	negw	r6,r0
+	loadw	(sp),r6
+	addq	$8,sp
+	ret	sp
diff --git a/mpn/config/t-freebsd b/mpn/config/t-freebsd
new file mode 100644
index 000000000..ba02fa768
--- /dev/null
+++ b/mpn/config/t-freebsd
@@ -0,0 +1 @@
+SFLAGS=-DBROKEN_ALIGN
diff --git a/mpn/configure.in b/mpn/configure.in
new file mode 100644
index 000000000..187cf9e9d
--- /dev/null
+++ b/mpn/configure.in
@@ -0,0 +1,165 @@
+# This file is a shell script fragment that supplies the information
+# necessary for a configure script to process the program in
+# this directory.  For more information, look at ../configure.
+
+configdirs=
+srctrigger=powerpc32
+srcname="GNU Multi-Precision library/mpn"
+
+# per-host:
+
+# per-target:
+
+case "${target}" in
+  sparc9*-*-* | sparc64*-*-* | ultrasparc*-*-*) path="sparc64" ;;
+  sparc8*-*-* | microsparc*-*-*)
+	path="sparc32/v8 sparc"; ;;
+  supersparc*-*-*)
+	path="sparc32/v8/supersparc sparc32/v8 sparc"
+	extra_functions="udiv" ;;
+  sparc*-*-*) path="sparc32"
+	if [ x$floating_point = xno ]
+	  then extra_functions="udiv_nfp"
+	  else extra_functions="udiv_fp"
+	fi
+	;;
+  hppa7000*-*-*) path="hppa/hppa1_1 hppa"; extra_functions="udiv_qrnnd" ;;
+  hppa1.0*-*-*) path="hppa"; extra_functions="udiv_qrnnd" ;;
+  hppa*-*-*)					# assume pa7100
+    path="hppa/hppa1_1/pa7100 hppa/hppa1_1 hppa"
+    extra_functions="udiv_qrnnd" ;;
+  cray2-cray-unicos* | [xy]mp-cray-unicos* | [ctj]90-cray-unicos*)
+    path="cray" ;;
+  alphaev5-*-*) path="alpha/ev5 alpha"; extra_functions="udiv_qrnnd" ;;
+  alpha*-*-*) path="alpha"; extra_functions="udiv_qrnnd" ;;
+  am29000*-*-*) path="am29000" ;;
+  a29k*-*-*) path="am29000" ;;
+  i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | \
+  i[34]86*-*-*bsd*)			# x86 running BSD or Linux with a.out
+	echo '#define BSD_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h
+	path="x86" ;;
+  i[56]86*-*-linuxaout* | pentium-*-linuxaout* | pentiumpro-*-linuxaout* | \
+  i[56]86*-*-linuxoldld* | pentium-*-linuxoldld* | pentiumpro-*-linuxoldld* | \
+  i[56]86*-*-*bsd* | pentium-*-*bsd* | pentiumpro-*-*bsd*)
+	echo '#define BSD_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h
+	path="x86/pentium x86" ;;
+  i[34]86*-*-*)				# x86 with ELF/SysV format
+	echo '#define ELF_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h
+	path="x86" ;;
+  i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)	# x86 with ELF/SysV format
+	echo '#define ELF_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/x86/syntax.h"' >>asm-syntax.h
+	path="x86/pentium x86" ;;
+  i960*-*-*) path="i960" ;;
+# Motorola 68k configurations.  Let m68k mean 68020-68040.
+# mc68000 or mc68060 configurations need to be specified explicitly
+  m680[234]0*-*-linuxaout* | m68k*-*-linuxaout*)
+        echo '#define MIT_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h
+	path="m68k/mc68020 m68k" ;;
+  m68060*-*-linuxaout*)
+        echo '#define MIT_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h
+	path="m68k" ;;
+  m680[234]0*-*-linux* | m68k*-*-linux*)
+	echo '#define ELF_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h
+	path="m68k/mc68020 m68k" ;;
+  m68060*-*-linux*)
+	echo '#define ELF_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h
+	path="m68k" ;;
+  m68000*-*-* | m68060*-*-*)
+        echo '#define MIT_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h
+	path="m68k/mc68000" ;;
+  m680[234]0*-*-* | m68k*-*-*)
+        echo '#define MIT_SYNTAX' >asm-syntax.h
+	echo '#include "'$srcdir'/m68k/syntax.h"' >>asm-syntax.h
+	path="m68k/mc68020 m68k" ;;
+  m88k*-*-* | m88k*-*-*) path="m88k" ;;
+  m88110*-*-*) path="m88k/mc88110 m88k" ;;
+  ns32k*-*-*) path="n32k" ;;
+  ppc601-*-*) path="power powerpc32" ;;
+  ppc620-*-* | powerpc64*-*-*) path="powerpc64" ;;
+  ppc60[234]*-*-* | powerpc*-*-*) path="powerpc32" ;;
+  pyramid-*-*) path="pyr" ;;
+  rs6000-*-* | power-*-* | power2-*-*)
+    path="power"; extra_functions="udiv_w_sdiv" ;;
+  sh-*-*) path="sh" ;;
+  sh2-*-*) path="sh/sh2 sh" ;;
+  mips[34]*-*-*) path="mips3" ;;
+  mips*-*-irix6*) path="mips3" ;;
+  mips*-*-*) path="mips2" ;;
+  vax*-*-*) path="vax"; extra_functions="udiv_w_sdiv" ;;
+  z8000x*-*-*) path="z8000x"; extra_functions="udiv_w_sdiv" ;;
+  z8000*-*-*) path="z8000"; extra_functions="udiv_w_sdiv" ;;
+  clipper*-*-*) path="clipper" ;;
+  *-*-*) ;;
+esac
+
+case "${target}" in
+  *-*-linuxaout* | *-*-linuxoldld*) config=bsd.h ;;
+  *-sysv* | *-solaris* | *-*-linux*) config="sysv.h" ;;
+  *) config="bsd.h" ;;
+esac
+
+case "${target}" in
+  i[3456]86*-*-freebsd* | pentium-*-freebsd* | pentiumpro-*-freebsd* | \
+  i[3456]86*-*-netbsd* | pentium-*-netbsd* | pentiumpro-*-netbsd*)
+	target_makefile_frag=config/t-freebsd ;;
+esac
+
+
+functions="${extra_functions} inlines add_n addmul_1 cmp divmod_1 \
+ divrem divrem_1 dump lshift mod_1 mul mul_1 mul_n random2 rshift sqrtrem \
+ sub_n submul_1 get_str set_str scan0 scan1 popcount hamdist gcd_1 \
+ pre_mod_1 perfsqr bdivmod gcd gcdext"
+
+path="$path generic"
+mpn_objects=
+
+for fn in $functions ; do
+  mpn_objects="$mpn_objects $fn.o"
+  for dir in $path ; do
+    rm -f $fn.[Ssc]
+    if test -f $srcdir/$dir/$fn.S ; then
+      files="$files $dir/$fn.S"
+      links="$links $fn.S"
+      break
+    elif test -f $srcdir/$dir/$fn.s ; then
+      files="$files $dir/$fn.s"
+      links="$links $fn.s"
+      break
+    elif test -f $srcdir/$dir/$fn.c ; then
+      files="$files $dir/$fn.c"
+      links="$links $fn.c"
+      break
+    fi
+  done
+done
+
+for dir in $path ; do
+  rm -f gmp-mparam.h
+  if test -f $srcdir/$dir/gmp-mparam.h ; then
+    files="$files $dir/gmp-mparam.h"
+    links="$links gmp-mparam.h"
+    break
+  fi
+done
+
+links="sysdep.h $links"
+files="$config $files"
+
+mpn_links=$links
+
+# post-target:
+
+sed <Makefile >Makefile.tmp \
+  -e "s/MPN_LINKS = .*/MPN_LINKS =${mpn_links}/" \
+  -e "s/MPN_OBJECTS = .*/MPN_OBJECTS =${mpn_objects}/"
+
+mv Makefile.tmp Makefile
diff --git a/mpn/cray/gmp-mparam.h b/mpn/cray/gmp-mparam.h
new file mode 100644
index 000000000..349c812d4
--- /dev/null
+++ b/mpn/cray/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 64
+#define BITS_PER_SHORTINT 32
+#define BITS_PER_CHAR 8
diff --git a/mpn/generic/add_n.c b/mpn/generic/add_n.c
new file mode 100644
index 000000000..9d71df110
--- /dev/null
+++ b/mpn/generic/add_n.c
@@ -0,0 +1,62 @@
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     register mp_srcptr s2_ptr;
+     mp_size_t size;
+#endif
+{
+  register mp_limb_t x, y, cy;
+  register mp_size_t j;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  s2_ptr -= j;
+  res_ptr -= j;
+
+  cy = 0;
+  do
+    {
+      y = s2_ptr[j];
+      x = s1_ptr[j];
+      y += cy;			/* add previous carry to one addend */
+      cy = (y < cy);		/* get out carry from that addition */
+      y = x + y;		/* add other addend */
+      cy = (y < x) + cy;	/* get out carry from that add, combine */
+      res_ptr[j] = y;
+    }
+  while (++j != 0);
+
+  return cy;
+}
diff --git a/mpn/generic/addmul_1.c b/mpn/generic/addmul_1.c
new file mode 100644
index 000000000..3a5e21400
--- /dev/null
+++ b/mpn/generic/addmul_1.c
@@ -0,0 +1,65 @@
+/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+   by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
+   limb vector pointed to by RES_PTR.  Return the most significant limb of
+   the product, adjusted for carry-out from the addition.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     mp_size_t s1_size;
+     register mp_limb_t s2_limb;
+{
+  register mp_limb_t cy_limb;
+  register mp_size_t j;
+  register mp_limb_t prod_high, prod_low;
+  register mp_limb_t x;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -s1_size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  res_ptr -= j;
+  s1_ptr -= j;
+
+  cy_limb = 0;
+  do
+    {
+      umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+      prod_low += cy_limb;
+      cy_limb = (prod_low < cy_limb) + prod_high;
+
+      x = res_ptr[j];
+      prod_low = x + prod_low;
+      cy_limb += (prod_low < x);
+      res_ptr[j] = prod_low;
+    }
+  while (++j != 0);
+
+  return cy_limb;
+}
diff --git a/mpn/generic/bdivmod.c b/mpn/generic/bdivmod.c
new file mode 100644
index 000000000..ff4fb733b
--- /dev/null
+++ b/mpn/generic/bdivmod.c
@@ -0,0 +1,129 @@
+/*  mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*  q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
+
+    Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
+    returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
+
+    Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up.  Since the
+    low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
+    the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
+
+    Preconditions:
+    1.  V is odd.
+    2.  usize * BITS_PER_MP_LIMB >= d.
+    3.  If Q and U overlap, qp <= up.
+
+    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+    Funding for this work has been partially provided by Conselho Nacional
+    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant 
+    301314194-2, and was done while I was a visiting reseacher in the Instituto
+    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+    References:
+        T. Jebelean, An algorithm for exact division, Journal of Symbolic 
+        Computation, v. 15, 1993, pp. 169-180.
+
+        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on 
+        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+#if __STDC__
+mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize, 
+	     mp_srcptr vp, mp_size_t vsize, unsigned long int d)
+#else
+mpn_bdivmod (qp, up, usize, vp, vsize, d)
+     mp_ptr qp;
+     mp_ptr up;
+     mp_size_t usize;
+     mp_srcptr vp;
+     mp_size_t vsize;
+     unsigned long int d;
+#endif
+{
+  /*  Cache for v_inv is used to make mpn_accelgcd faster.  */
+  static mp_limb_t previous_low_vlimb = 0;
+  static mp_limb_t v_inv;                 /*  1/V mod 2^BITS_PER_MP_LIMB.  */
+
+  if (vp[0] != previous_low_vlimb)      /*   Cache miss; compute v_inv.  */
+    {
+      mp_limb_t v = previous_low_vlimb = vp[0];
+      mp_limb_t make_zero = 1;
+      mp_limb_t two_i = 1;
+      v_inv = 0;
+      do
+        {
+          while ((two_i & make_zero) == 0)
+            two_i <<= 1, v <<= 1;
+          v_inv += two_i;
+          make_zero -= v;
+        }
+      while (make_zero);
+    }
+
+  /*  Need faster computation for some common cases in mpn_accelgcd.  */
+  if (usize == 2 && vsize == 2 && 
+      (d == BITS_PER_MP_LIMB || d == 2*BITS_PER_MP_LIMB))
+    {
+      mp_limb_t hi, lo;
+      mp_limb_t q = up[0] * v_inv;
+      umul_ppmm (hi, lo, q, vp[0]);
+      up[0] = 0, up[1] -= hi + q*vp[1], qp[0] = q;
+      if (d == 2*BITS_PER_MP_LIMB)
+        q = up[1] * v_inv, up[1] = 0, qp[1] = q;
+      return 0;
+    }
+
+  /*  Main loop.  */
+  while (d >= BITS_PER_MP_LIMB)
+    {
+      mp_limb_t q = up[0] * v_inv;
+      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+      if (usize > vsize)
+        mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+      d -= BITS_PER_MP_LIMB;
+      up += 1, usize -= 1;
+      *qp++ = q;
+    }
+
+  if (d)
+    {
+      mp_limb_t b;
+      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
+      switch (q)
+        {
+          case 0:  return 0;
+          case 1:  b = mpn_sub_n (up, up, vp, MIN (usize, vsize));   break;
+          default: b = mpn_submul_1 (up, vp, MIN (usize, vsize), q); break;
+        }
+      if (usize > vsize)
+        mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+      return q;
+    }
+
+  return 0;
+}
diff --git a/mpn/generic/cmp.c b/mpn/generic/cmp.c
new file mode 100644
index 000000000..4e9c60d86
--- /dev/null
+++ b/mpn/generic/cmp.c
@@ -0,0 +1,56 @@
+/* mpn_cmp -- Compare two low-level natural-number integers.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+   There are no restrictions on the relative sizes of
+   the two arguments.
+   Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.  */
+
+int
+#if __STDC__
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+#else
+mpn_cmp (op1_ptr, op2_ptr, size)
+     mp_srcptr op1_ptr;
+     mp_srcptr op2_ptr;
+     mp_size_t size;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t op1_word, op2_word;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      op1_word = op1_ptr[i];
+      op2_word = op2_ptr[i];
+      if (op1_word != op2_word)
+	goto diff;
+    }
+  return 0;
+ diff:
+  /* This can *not* be simplified to
+	op2_word - op2_word
+     since that expression might give signed overflow.  */
+  return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/mpn/generic/divmod_1.c b/mpn/generic/divmod_1.c
new file mode 100644
index 000000000..f93841f63
--- /dev/null
+++ b/mpn/generic/divmod_1.c
@@ -0,0 +1,208 @@
+/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+   Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+   Return the single-limb remainder.
+   There are no constraints on the value of the divisor.
+
+   QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+   here (not udiv_qrnnd).  */
+
+mp_limb_t
+#if __STDC__
+mpn_divmod_1 (mp_ptr quot_ptr,
+	      mp_srcptr dividend_ptr, mp_size_t dividend_size,
+	      mp_limb_t divisor_limb)
+#else
+mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
+     mp_ptr quot_ptr;
+     mp_srcptr dividend_ptr;
+     mp_size_t dividend_size;
+     mp_limb_t divisor_limb;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t n1, n0, r;
+  int dummy;
+
+  /* ??? Should this be handled at all?  Rely on callers?  */
+  if (dividend_size == 0)
+    return 0;
+
+  /* If multiplication is much faster than division, and the
+     dividend is large, pre-invert the divisor, and use
+     only multiplications in the inner loop.  */
+
+  /* This test should be read:
+       Does it ever help to use udiv_qrnnd_preinv?
+	 && Does what we save compensate for the inversion overhead?  */
+  if (UDIV_TIME > (2 * UMUL_TIME + 6)
+      && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
+    {
+      int normalization_steps;
+
+      count_leading_zeros (normalization_steps, divisor_limb);
+      if (normalization_steps != 0)
+	{
+	  mp_limb_t divisor_limb_inverted;
+
+	  divisor_limb <<= normalization_steps;
+
+	  /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     most significant bit (with weight 2**N) implicit.  */
+
+	  /* Special case for DIVISOR_LIMB == 100...000.  */
+	  if (divisor_limb << 1 == 0)
+	    divisor_limb_inverted = ~(mp_limb_t) 0;
+	  else
+	    udiv_qrnnd (divisor_limb_inverted, dummy,
+			-divisor_limb, 0, divisor_limb);
+
+	  n1 = dividend_ptr[dividend_size - 1];
+	  r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+	  /* Possible optimization:
+	     if (r == 0
+	     && divisor_limb > ((n1 << normalization_steps)
+			     | (dividend_ptr[dividend_size - 2] >> ...)))
+	     ...one division less... */
+
+	  for (i = dividend_size - 2; i >= 0; i--)
+	    {
+	      n0 = dividend_ptr[i];
+	      udiv_qrnnd_preinv (quot_ptr[i + 1], r, r,
+				 ((n1 << normalization_steps)
+				  | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+				 divisor_limb, divisor_limb_inverted);
+	      n1 = n0;
+	    }
+	  udiv_qrnnd_preinv (quot_ptr[0], r, r,
+			     n1 << normalization_steps,
+			     divisor_limb, divisor_limb_inverted);
+	  return r >> normalization_steps;
+	}
+      else
+	{
+	  mp_limb_t divisor_limb_inverted;
+
+	  /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     most significant bit (with weight 2**N) implicit.  */
+
+	  /* Special case for DIVISOR_LIMB == 100...000.  */
+	  if (divisor_limb << 1 == 0)
+	    divisor_limb_inverted = ~(mp_limb_t) 0;
+	  else
+	    udiv_qrnnd (divisor_limb_inverted, dummy,
+			-divisor_limb, 0, divisor_limb);
+
+	  i = dividend_size - 1;
+	  r = dividend_ptr[i];
+
+	  if (r >= divisor_limb)
+	    r = 0;
+	  else
+	    {
+	      quot_ptr[i] = 0;
+	      i--;
+	    }
+
+	  for (; i >= 0; i--)
+	    {
+	      n0 = dividend_ptr[i];
+	      udiv_qrnnd_preinv (quot_ptr[i], r, r,
+				 n0, divisor_limb, divisor_limb_inverted);
+	    }
+	  return r;
+	}
+    }
+  else
+    {
+      if (UDIV_NEEDS_NORMALIZATION)
+	{
+	  int normalization_steps;
+
+	  count_leading_zeros (normalization_steps, divisor_limb);
+	  if (normalization_steps != 0)
+	    {
+	      divisor_limb <<= normalization_steps;
+
+	      n1 = dividend_ptr[dividend_size - 1];
+	      r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+	      /* Possible optimization:
+		 if (r == 0
+		 && divisor_limb > ((n1 << normalization_steps)
+				 | (dividend_ptr[dividend_size - 2] >> ...)))
+		 ...one division less... */
+
+	      for (i = dividend_size - 2; i >= 0; i--)
+		{
+		  n0 = dividend_ptr[i];
+		  udiv_qrnnd (quot_ptr[i + 1], r, r,
+			      ((n1 << normalization_steps)
+			       | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+			      divisor_limb);
+		  n1 = n0;
+		}
+	      udiv_qrnnd (quot_ptr[0], r, r,
+			  n1 << normalization_steps,
+			  divisor_limb);
+	      return r >> normalization_steps;
+	    }
+	}
+      /* No normalization needed, either because udiv_qrnnd doesn't require
+	 it, or because DIVISOR_LIMB is already normalized.  */
+
+      i = dividend_size - 1;
+      r = dividend_ptr[i];
+
+      if (r >= divisor_limb)
+	r = 0;
+      else
+	{
+	  quot_ptr[i] = 0;
+	  i--;
+	}
+
+      for (; i >= 0; i--)
+	{
+	  n0 = dividend_ptr[i];
+	  udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb);
+	}
+      return r;
+    }
+}
diff --git a/mpn/generic/divrem.c b/mpn/generic/divrem.c
new file mode 100644
index 000000000..1fe865a10
--- /dev/null
+++ b/mpn/generic/divrem.c
@@ -0,0 +1,245 @@
+/* mpn_divrem -- Divide natural numbers, producing both remainder and
+   quotient.
+
+Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+   the NSIZE-DSIZE least significant quotient limbs at QP
+   and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
+   non-zero, generate that many fraction bits and append them after the
+   other quotient limbs.
+   Return the most significant limb of the quotient, this is always 0 or 1.
+
+   Preconditions:
+   0. NSIZE >= DSIZE.
+   1. The most significant bit of the divisor must be set.
+   2. QP must either not overlap with the input operands at all, or
+      QP + DSIZE >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.  */
+
+mp_limb_t
+#if __STDC__
+mpn_divrem (mp_ptr qp, mp_size_t qextra_limbs,
+	    mp_ptr np, mp_size_t nsize,
+	    mp_srcptr dp, mp_size_t dsize)
+#else
+mpn_divrem (qp, qextra_limbs, np, nsize, dp, dsize)
+     mp_ptr qp;
+     mp_size_t qextra_limbs;
+     mp_ptr np;
+     mp_size_t nsize;
+     mp_srcptr dp;
+     mp_size_t dsize;
+#endif
+{
+  mp_limb_t most_significant_q_limb = 0;
+
+  switch (dsize)
+    {
+    case 0:
+      /* We are asked to divide by zero, so go ahead and do it!  (To make
+	 the compiler not remove this statement, return the value.)  */
+      return 1 / dsize;
+
+    case 1:
+      {
+	mp_size_t i;
+	mp_limb_t n1;
+	mp_limb_t d;
+
+	d = dp[0];
+	n1 = np[nsize - 1];
+
+	if (n1 >= d)
+	  {
+	    n1 -= d;
+	    most_significant_q_limb = 1;
+	  }
+
+	qp += qextra_limbs;
+	for (i = nsize - 2; i >= 0; i--)
+	  udiv_qrnnd (qp[i], n1, n1, np[i], d);
+	qp -= qextra_limbs;
+
+	for (i = qextra_limbs - 1; i >= 0; i--)
+	  udiv_qrnnd (qp[i], n1, n1, 0, d);
+
+	np[0] = n1;
+      }
+      break;
+
+    case 2:
+      {
+	mp_size_t i;
+	mp_limb_t n1, n0, n2;
+	mp_limb_t d1, d0;
+
+	np += nsize - 2;
+	d1 = dp[1];
+	d0 = dp[0];
+	n1 = np[1];
+	n0 = np[0];
+
+	if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+	  {
+	    sub_ddmmss (n1, n0, n1, n0, d1, d0);
+	    most_significant_q_limb = 1;
+	  }
+
+	for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--)
+	  {
+	    mp_limb_t q;
+	    mp_limb_t r;
+
+	    if (i >= qextra_limbs)
+	      np--;
+	    else
+	      np[0] = 0;
+
+	    if (n1 == d1)
+	      {
+		/* Q should be either 111..111 or 111..110.  Need special
+		   treatment of this rare case as normal division would
+		   give overflow.  */
+		q = ~(mp_limb_t) 0;
+
+		r = n0 + d1;
+		if (r < d1)	/* Carry in the addition? */
+		  {
+		    add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+		    qp[i] = q;
+		    continue;
+		  }
+		n1 = d0 - (d0 != 0);
+		n0 = -d0;
+	      }
+	    else
+	      {
+		udiv_qrnnd (q, r, n1, n0, d1);
+		umul_ppmm (n1, n0, d0, q);
+	      }
+
+	    n2 = np[0];
+	  q_test:
+	    if (n1 > r || (n1 == r && n0 > n2))
+	      {
+		/* The estimated Q was too large.  */
+		q--;
+
+		sub_ddmmss (n1, n0, n1, n0, 0, d0);
+		r += d1;
+		if (r >= d1)	/* If not carry, test Q again.  */
+		  goto q_test;
+	      }
+
+	    qp[i] = q;
+	    sub_ddmmss (n1, n0, r, n2, n1, n0);
+	  }
+	np[1] = n1;
+	np[0] = n0;
+      }
+      break;
+
+    default:
+      {
+	mp_size_t i;
+	mp_limb_t dX, d1, n0;
+
+	np += nsize - dsize;
+	dX = dp[dsize - 1];
+	d1 = dp[dsize - 2];
+	n0 = np[dsize - 1];
+
+	if (n0 >= dX)
+	  {
+	    if (n0 > dX || mpn_cmp (np, dp, dsize - 1) >= 0)
+	      {
+		mpn_sub_n (np, np, dp, dsize);
+		n0 = np[dsize - 1];
+		most_significant_q_limb = 1;
+	      }
+	  }
+
+	for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--)
+	  {
+	    mp_limb_t q;
+	    mp_limb_t n1, n2;
+	    mp_limb_t cy_limb;
+
+	    if (i >= qextra_limbs)
+	      {
+		np--;
+		n2 = np[dsize];
+	      }
+	    else
+	      {
+		n2 = np[dsize - 1];
+		MPN_COPY_DECR (np + 1, np, dsize);
+		np[0] = 0;
+	      }
+
+	    if (n0 == dX)
+	      /* This might over-estimate q, but it's probably not worth
+		 the extra code here to find out.  */
+	      q = ~(mp_limb_t) 0;
+	    else
+	      {
+		mp_limb_t r;
+
+		udiv_qrnnd (q, r, n0, np[dsize - 1], dX);
+		umul_ppmm (n1, n0, d1, q);
+
+		while (n1 > r || (n1 == r && n0 > np[dsize - 2]))
+		  {
+		    q--;
+		    r += dX;
+		    if (r < dX)	/* I.e. "carry in previous addition?"  */
+		      break;
+		    n1 -= n0 < d1;
+		    n0 -= d1;
+		  }
+	      }
+
+	    /* Possible optimization: We already have (q * n0) and (1 * n1)
+	       after the calculation of q.  Taking advantage of that, we
+	       could make this loop make two iterations less.  */
+
+	    cy_limb = mpn_submul_1 (np, dp, dsize, q);
+
+	    if (n2 != cy_limb)
+	      {
+		mpn_add_n (np, np, dp, dsize);
+		q--;
+	      }
+
+	    qp[i] = q;
+	    n0 = np[dsize - 1];
+	  }
+      }
+    }
+
+  return most_significant_q_limb;
+}
diff --git a/mpn/generic/divrem_1.c b/mpn/generic/divrem_1.c
new file mode 100644
index 000000000..d21326738
--- /dev/null
+++ b/mpn/generic/divrem_1.c
@@ -0,0 +1,58 @@
+/* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+   Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+   Return the single-limb remainder.
+   There are no constraints on the value of the divisor.
+
+   QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+#if __STDC__
+mpn_divrem_1 (mp_ptr qp, mp_size_t qsize,
+	      mp_srcptr dividend_ptr, mp_size_t dividend_size,
+	      mp_limb_t divisor_limb)
+#else
+mpn_divrem_1 (qp, qsize, dividend_ptr, dividend_size, divisor_limb)
+     mp_ptr qp;
+     mp_size_t qsize;
+     mp_srcptr dividend_ptr;
+     mp_size_t dividend_size;
+     mp_limb_t divisor_limb;
+#endif
+{
+  mp_limb_t rlimb;
+  long i;
+
+  /* Develop integer part of quotient.  */
+  rlimb = mpn_divmod_1 (qp + qsize, dividend_ptr, dividend_size, divisor_limb);
+
+  if (qsize != 0)
+    {
+      for (i = qsize - 1; i >= 0; i--)
+	udiv_qrnnd (qp[i], rlimb, rlimb, 0, divisor_limb);
+    }
+  return rlimb;
+}
diff --git a/mpn/generic/dump.c b/mpn/generic/dump.c
new file mode 100644
index 000000000..a5831c4cc
--- /dev/null
+++ b/mpn/generic/dump.c
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+mpn_dump (ptr, size)
+     mp_srcptr ptr;
+     mp_size_t size;
+{
+  if (size == 0)
+    printf ("0\n");
+  {
+    while (size)
+      {
+	size--;
+	printf ("%0*lX", (int) (2 * BYTES_PER_MP_LIMB), ptr[size]);
+      }
+    printf ("\n");
+  }
+}
diff --git a/mpn/generic/gcd.c b/mpn/generic/gcd.c
new file mode 100644
index 000000000..4a364e526
--- /dev/null
+++ b/mpn/generic/gcd.c
@@ -0,0 +1,402 @@
+/*  mpn/gcd.c: mpn_gcd for gcd of two odd integers.
+
+Copyright (C) 1991, 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*  Integer greatest common divisor of two unsigned integers, using
+    the accelerated algorithm (see reference below).
+
+    mp_size_t mpn_gcd (vp, vsize, up, usize).
+ 
+    Preconditions [U = (up, usize) and V = (vp, vsize)]:
+
+    1.  V is odd.
+    2.  numbits(U) >= numbits(V).
+
+    Both U and V are destroyed by the operation.  The result is left at vp,
+    and its size is returned.
+
+    Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+    Funding for this work has been partially provided by Conselho Nacional
+    de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant 
+    301314194-2, and was done while I was a visiting reseacher in the Instituto
+    de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+    Refer to
+        K. Weber, The accelerated integer GCD algorithm, ACM Transactions on 
+        Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*  If MIN (usize, vsize) > ACCEL_THRESHOLD, then the accelerated algorithm is
+    used, otherwise the binary algorithm is used.  This may be adjusted for
+    different architectures.  */
+#ifndef ACCEL_THRESHOLD
+#define ACCEL_THRESHOLD 4
+#endif
+
+/*  When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
+    algorithm reduces using the bmod operation.  Otherwise, the k-ary reduction
+    is used.  0 <= BMOD_THRESHOLD < BITS_PER_MP_LIMB.  */
+enum
+  {
+    BMOD_THRESHOLD = BITS_PER_MP_LIMB/2,
+  };
+
+#define SIGN_BIT  (~(~(mp_limb_t)0 >> 1))
+
+
+#define SWAP_LIMB(UL, VL) do{mp_limb_t __l=(UL);(UL)=(VL);(VL)=__l;}while(0)
+#define SWAP_PTR(UP, VP) do{mp_ptr __p=(UP);(UP)=(VP);(VP)=__p;}while(0)
+#define SWAP_SZ(US, VS) do{mp_size_t __s=(US);(US)=(VS);(VS)=__s;}while(0)
+#define SWAP_MPN(UP, US, VP, VS) do{SWAP_PTR(UP,VP);SWAP_SZ(US,VS);}while(0)
+
+/*  Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
+    Both U and V must be odd.  */
+static __gmp_inline mp_size_t
+#if __STDC__
+gcd_2 (mp_ptr vp, mp_srcptr up)
+#else
+gcd_2 (vp, up)
+     mp_ptr vp;
+     mp_srcptr up;
+#endif
+{
+  mp_limb_t u0, u1, v0, v1;
+  mp_size_t vsize;
+
+  u0 = up[0], u1 = up[1], v0 = vp[0], v1 = vp[1];
+
+  while (u1 != v1 && u0 != v0)
+    {
+      unsigned long int r;
+      if (u1 > v1)
+	{
+	  u1 -= v1 + (u0 < v0), u0 -= v0;
+	  count_trailing_zeros (r, u0);
+	  u0 = u1 << (BITS_PER_MP_LIMB - r) | u0 >> r;
+	  u1 >>= r;
+	}
+      else  /*  u1 < v1.  */
+	{
+	  v1 -= u1 + (v0 < u0), v0 -= u0;
+	  count_trailing_zeros (r, v0);
+	  v0 = v1 << (BITS_PER_MP_LIMB - r) | v0 >> r;
+	  v1 >>= r;
+	}
+    }
+
+  vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
+
+  /*  If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
+  if (u1 == v1 && u0 == v0)
+    return vsize;
+
+  v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
+  vp[0] = mpn_gcd_1 (vp, vsize, v0);
+
+  return 1;
+}
+
+/*  The function find_a finds 0 < N < 2^BITS_PER_MP_LIMB such that there exists
+    0 < |D| < 2^BITS_PER_MP_LIMB, and N == D * C mod 2^(2*BITS_PER_MP_LIMB).
+    In the reference article, D was computed along with N, but it is better to
+    compute D separately as D <-- N / C mod 2^(BITS_PER_MP_LIMB + 1), treating
+    the result as a twos' complement signed integer.
+
+    Initialize N1 to C mod 2^(2*BITS_PER_MP_LIMB).  According to the reference
+    article, N2 should be initialized to 2^(2*BITS_PER_MP_LIMB), but we use
+    2^(2*BITS_PER_MP_LIMB) - N1 to start the calculations within double 
+    precision.  If N2 > N1 initially, the first iteration of the while loop 
+    will swap them.  In all other situations, N1 >= N2 is maintained.   */
+
+static __gmp_inline mp_limb_t
+#if __STDC__
+find_a (mp_srcptr cp)
+#else
+find_a (cp)
+     mp_srcptr cp;
+#endif
+{
+  unsigned long int leading_zero_bits = 0;
+
+  mp_limb_t n1_l = cp[0];         /*  N1 == n1_h * 2^BITS_PER_MP_LIMB + n1_l.  */
+  mp_limb_t n1_h = cp[1];
+
+  mp_limb_t n2_l = -n1_l;         /*  N2 == n2_h * 2^BITS_PER_MP_LIMB + n2_l.  */
+  mp_limb_t n2_h = ~n1_h;
+
+  /*  Main loop.  */
+  while (n2_h)                  /*  While N2 >= 2^BITS_PER_MP_LIMB.  */
+    {
+      /*  N1 <-- N1 % N2.  */
+      if ((SIGN_BIT >> leading_zero_bits & n2_h) == 0)
+        {
+          unsigned long int i;
+          count_leading_zeros (i, n2_h);
+          i -= leading_zero_bits, leading_zero_bits += i;
+          n2_h = n2_h<<i | n2_l>>(BITS_PER_MP_LIMB - i), n2_l <<= i;
+          do
+            {
+              if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+                n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+              n2_l = n2_l>>1 | n2_h<<(BITS_PER_MP_LIMB - 1), n2_h >>= 1;
+              i -= 1;
+            }
+          while (i);
+        }
+      if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
+        n1_h -= n2_h + (n1_l < n2_l), n1_l -= n2_l;
+
+      SWAP_LIMB (n1_h, n2_h); 
+      SWAP_LIMB (n1_l, n2_l);
+    }
+
+  return n2_l;
+}
+
+mp_size_t
+#if __STDC__
+mpn_gcd (mp_ptr gp, mp_ptr vp, mp_size_t vsize, mp_ptr up, mp_size_t usize)
+#else
+mpn_gcd (gp, vp, vsize, up, usize)
+     mp_ptr gp;
+     mp_ptr vp;
+     mp_size_t vsize;
+     mp_ptr up;
+     mp_size_t usize;
+#endif
+{
+  mp_ptr orig_vp = vp;
+  mp_size_t orig_vsize = vsize;
+  int binary_gcd_ctr;		/*  Number of times binary gcd will execute.  */
+  TMP_DECL (marker);
+
+  TMP_MARK (marker);
+
+  /*  Use accelerated algorithm if vsize is over ACCEL_THRESHOLD.
+      Two EXTRA limbs for U and V are required for kary reduction.  */
+  if (vsize > ACCEL_THRESHOLD)
+    {
+      unsigned long int vbitsize, d;
+      mp_ptr orig_up = up;
+      mp_size_t orig_usize = usize;
+      mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
+
+      MPN_COPY (anchor_up, orig_up, usize);
+      up = anchor_up;
+
+      count_leading_zeros (d, up[usize-1]);
+      d = usize * BITS_PER_MP_LIMB - d;
+      count_leading_zeros (vbitsize, vp[vsize-1]);
+      vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+      d = d - vbitsize + 1;
+
+      /*  Use bmod reduction to quickly discover whether V divides U.  */
+      up[usize++] = 0;                  	/*  Insert leading zero.  */
+      mpn_bdivmod (up, up, usize, vp, vsize, d);
+
+      /*  Now skip U/V mod 2^d and any low zero limbs.  */
+      d /= BITS_PER_MP_LIMB, up += d, usize -= d;
+      while (usize != 0 && up[0] == 0)
+	up++, usize--;
+
+      if (usize == 0)				/*  GCD == ORIG_V.  */
+	goto done;
+
+      vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
+      MPN_COPY (vp, orig_vp, vsize);
+
+      do 					/*  Main loop.  */
+	{
+	  if (up[usize-1] & SIGN_BIT)		/*  U < 0; take twos' compl. */
+	    {
+	      mp_size_t i;
+	      anchor_up[0] = -up[0];
+	      for (i = 1; i < usize; i++)
+		anchor_up[i] = ~up[i];
+	      up = anchor_up;
+	    }
+
+	  MPN_NORMALIZE_NOT_ZERO (up, usize);
+
+	  if ((up[0] & 1) == 0)			/*  Result even; remove twos. */
+	    {
+	      unsigned long int r;
+	      count_trailing_zeros (r, up[0]);
+	      mpn_rshift (anchor_up, up, usize, r);
+	      usize -= (anchor_up[usize-1] == 0);
+	    }
+	  else if (anchor_up != up)
+	    MPN_COPY (anchor_up, up, usize);
+
+	  SWAP_MPN (anchor_up, usize, vp, vsize);
+	  up = anchor_up;
+
+	  if (vsize <= 2)		/*  Kary can't handle < 2 limbs and  */
+	    break;			/*  isn't efficient for == 2 limbs.  */
+
+	  d = vbitsize;
+	  count_leading_zeros (vbitsize, vp[vsize-1]);
+	  vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+	  d = d - vbitsize + 1;
+
+	  if (d > BMOD_THRESHOLD)       /*  Bmod reduction.  */
+	    {
+	      up[usize++] = 0;
+	      mpn_bdivmod (up, up, usize, vp, vsize, d);
+	      d /= BITS_PER_MP_LIMB, up += d, usize -= d;
+	    }
+	  else                          /*  Kary reduction.  */
+	    {
+	      mp_limb_t bp[2], cp[2];
+
+	      /*  C <-- V/U mod 2^(2*BITS_PER_MP_LIMB).  */
+	      cp[0] = vp[0], cp[1] = vp[1];
+	      mpn_bdivmod (cp, cp, 2, up, 2, 2*BITS_PER_MP_LIMB);
+
+	      /*  U <-- find_a (C)  *  U.  */
+	      up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
+	      usize++;
+
+	      /*  B <-- A/C == U/V mod 2^(BITS_PER_MP_LIMB + 1).
+		  bp[0] <-- U/V mod 2^BITS_PER_MP_LIMB and
+		  bp[1] <-- ( (U - bp[0] * V)/2^BITS_PER_MP_LIMB ) / V mod 2. */
+	      bp[0] = up[0], bp[1] = up[1];
+	      mpn_bdivmod (bp, bp, 2, vp, 2, BITS_PER_MP_LIMB);
+	      bp[1] &= 1;	/*  Since V is odd, division is unnecessary.  */
+
+	      up[usize++] = 0;
+	      if (bp[1])	/*  B < 0: U <-- U + (-B)  * V.  */
+		{
+		   mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0]);
+		   mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
+		}
+	      else		/*  B >= 0:  U <-- U - B * V.  */
+		{
+		  mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
+		  mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+		}
+
+	      up += 2, usize -= 2;  /*  At least two low limbs are zero.  */
+	    }
+
+	  /*  Must remove low zero limbs before complementing.  */
+	  while (usize != 0 && up[0] == 0)
+	    up++, usize--;
+	}
+      while (usize);
+
+      /*  Compute GCD (ORIG_V, GCD (ORIG_U, V)).  Binary will execute twice.  */
+      up = orig_up, usize = orig_usize;
+      binary_gcd_ctr = 2;
+    }
+  else
+    binary_gcd_ctr = 1;
+
+  /*  Finish up with the binary algorithm.  Executes once or twice.  */
+  for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
+    {
+      if (usize > 2)		/*  First make U close to V in size.  */
+	{
+	  unsigned long int vbitsize, d;
+	  count_leading_zeros (d, up[usize-1]);
+	  d = usize * BITS_PER_MP_LIMB - d;
+	  count_leading_zeros (vbitsize, vp[vsize-1]);
+	  vbitsize = vsize * BITS_PER_MP_LIMB - vbitsize;
+	  d = d - vbitsize - 1;
+	  if (d != -(unsigned long int)1 && d > 2)
+	    {
+	      mpn_bdivmod (up, up, usize, vp, vsize, d);  /*  Result > 0.  */
+	      d /= (unsigned long int)BITS_PER_MP_LIMB, up += d, usize -= d;
+	    }
+	}
+
+      /*  Start binary GCD.  */
+      do
+	{
+	  mp_size_t zeros;
+
+	  /*  Make sure U is odd.  */
+	  MPN_NORMALIZE (up, usize);
+	  while (up[0] == 0)
+	    up += 1, usize -= 1;
+	  if ((up[0] & 1) == 0)
+	    {
+	      unsigned long int r;
+	      count_trailing_zeros (r, up[0]);
+	      mpn_rshift (up, up, usize, r);
+	      usize -= (up[usize-1] == 0);
+	    }
+
+	  /*  Keep usize >= vsize.  */
+	  if (usize < vsize)
+	    SWAP_MPN (up, usize, vp, vsize);
+
+	  if (usize <= 2)               		/*  Double precision. */
+	    {
+              if (vsize == 1)
+		vp[0] = mpn_gcd_1 (up, usize, vp[0]);
+	      else
+		vsize = gcd_2 (vp, up);
+              break;					/*  Binary GCD done.  */
+	    }
+
+	  /*  Count number of low zero limbs of U - V.  */
+	  for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
+	    continue;					
+
+	  /*  If U < V, swap U and V; in any case, subtract V from U.  */
+	  if (zeros == vsize)				/*  Subtract done.  */
+	    up += zeros, usize -= zeros;
+	  else if (usize == vsize)
+	    {
+	      mp_size_t size = vsize;
+	      do
+		size--;
+	      while (up[size] == vp[size]);
+	      if (up[size] < vp[size])          	/*  usize == vsize.  */
+		SWAP_PTR (up, vp);
+	      up += zeros, usize = size + 1 - zeros;
+	      mpn_sub_n (up, up, vp + zeros, usize);
+	    }
+	  else
+	    {
+	      mp_size_t size = vsize - zeros;
+	      up += zeros, usize -= zeros; 
+	      if (mpn_sub_n (up, up, vp + zeros, size))       
+		{
+		  while (up[size] == 0)         	/*  Propagate borrow. */
+		    up[size++] = -(mp_limb_t)1;
+		  up[size] -= 1;
+		}
+	    }
+	}
+      while (usize);					/*  End binary GCD.  */
+    }
+
+done:
+  if (vp != gp)
+    MPN_COPY (gp, vp, vsize);
+  TMP_FREE (marker);
+  return vsize;
+}
diff --git a/mpn/generic/gcd_1.c b/mpn/generic/gcd_1.c
new file mode 100644
index 000000000..ebcdfb591
--- /dev/null
+++ b/mpn/generic/gcd_1.c
@@ -0,0 +1,73 @@
+/* mpn_gcd_1 --
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Does not work for U == 0 or V == 0.  It would be tough to make it work for
+   V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.  */
+
+mp_limb_t
+mpn_gcd_1 (up, size, vlimb)
+     mp_srcptr up;
+     mp_size_t size;
+     mp_limb_t vlimb;
+{
+  mp_limb_t ulimb;
+  unsigned long int u_low_zero_bits, v_low_zero_bits;
+
+  if (size > 1)
+    {
+      ulimb = mpn_mod_1 (up, size, vlimb);
+      if (ulimb == 0)
+	return vlimb;
+    }
+  else
+    ulimb = up[0];
+
+  /*  Need to eliminate low zero bits.  */
+  count_trailing_zeros (u_low_zero_bits, ulimb);
+  ulimb >>= u_low_zero_bits;
+
+  count_trailing_zeros (v_low_zero_bits, vlimb);
+  vlimb >>= v_low_zero_bits;
+
+  while (ulimb != vlimb)
+    {
+      if (ulimb > vlimb)
+	{
+	  ulimb -= vlimb;
+	  do
+	    ulimb >>= 1;
+	  while ((ulimb & 1) == 0);
+	}
+      else /*  vlimb > ulimb.  */
+	{
+	  vlimb -= ulimb;
+	  do
+	    vlimb >>= 1;
+	  while ((vlimb & 1) == 0);
+	}
+    }
+
+  return  ulimb << MIN (u_low_zero_bits, v_low_zero_bits);
+}
diff --git a/mpn/generic/gcdext.c b/mpn/generic/gcdext.c
new file mode 100644
index 000000000..f54b7b785
--- /dev/null
+++ b/mpn/generic/gcdext.c
@@ -0,0 +1,422 @@
+/* mpn_gcdext -- Extended Greatest Common Divisor.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef EXTEND
+#define EXTEND 1
+#endif
+
+#if STAT
+int arr[BITS_PER_MP_LIMB];
+#endif
+
+#define SGN(A) (((A) < 0) ? -1 : ((A) > 0))
+
+/* Idea 1: After we have performed a full division, don't shift operands back,
+	   but instead account for the extra factors-of-2 thus introduced.
+   Idea 2: Simple generalization to use divide-and-conquer would give us an
+	   algorithm that runs faster than O(n^2).
+   Idea 3: The input numbers need less space as the computation progresses,
+	   while the s0 and s1 variables need more space.  To save space, we
+	   could make them share space, and have the latter variables grow
+	   into the former.  */
+
+/* Precondition: U >= V.  */
+
+mp_size_t
+#if EXTEND
+mpn_gcdext (mp_ptr gp, mp_ptr s0p,
+	    mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
+#else
+mpn_gcd (mp_ptr gp,
+	 mp_ptr up, mp_size_t size, mp_ptr vp, mp_size_t vsize)
+#endif
+{
+  mp_limb_t uh, vh;
+  mp_limb_signed_t A, B, C, D;
+  int cnt;
+  mp_ptr tp, wp;
+#if RECORD
+  mp_limb_signed_t min = 0, max = 0;
+#endif
+#if EXTEND
+  mp_ptr s1p;
+  mp_ptr orig_s0p = s0p;
+  mp_size_t ssize, orig_size = size;
+  TMP_DECL (mark);
+
+  TMP_MARK (mark);
+
+  tp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+  wp = (mp_ptr) TMP_ALLOC ((size + 1) * BYTES_PER_MP_LIMB);
+  s1p = (mp_ptr) TMP_ALLOC (size * BYTES_PER_MP_LIMB);
+
+  MPN_ZERO (s0p, size);
+  MPN_ZERO (s1p, size);
+
+  s0p[0] = 1;
+  s1p[0] = 0;
+  ssize = 1;
+#endif
+
+  if (size > vsize)
+    {
+      /* Normalize V (and shift up U the same amount).  */
+      count_leading_zeros (cnt, vp[vsize - 1]);
+      if (cnt != 0)
+	{
+	  mp_limb_t cy;
+	  mpn_lshift (vp, vp, vsize, cnt);
+	  cy = mpn_lshift (up, up, size, cnt);
+	  up[size] = cy;
+	  size += cy != 0;
+	}
+
+      mpn_divmod (up + vsize, up, size, vp, vsize);
+#if EXTEND
+      /* This is really what it boils down to in this case... */
+      s0p[0] = 0;
+      s1p[0] = 1;
+#endif
+      size = vsize;
+      if (cnt != 0)
+	{
+	  mpn_rshift (up, up, size, cnt);
+	  mpn_rshift (vp, vp, size, cnt);
+	}
+      {
+	mp_ptr xp;
+	xp = up; up = vp; vp = xp;
+      }
+    }
+
+  for (;;)
+    {
+      /* Figure out exact size of V.  */
+      vsize = size;
+      MPN_NORMALIZE (vp, vsize);
+      if (vsize <= 1)
+	break;
+
+      /* Make UH be the most significant limb of U, and make VH be
+	 corresponding bits from V.  */
+      uh = up[size - 1];
+      vh = vp[size - 1];
+      count_leading_zeros (cnt, uh);
+      if (cnt != 0)
+	{
+	  uh = (uh << cnt) | (up[size - 2] >> (BITS_PER_MP_LIMB - cnt));
+	  vh = (vh << cnt) | (vp[size - 2] >> (BITS_PER_MP_LIMB - cnt));
+	}
+
+#if 0
+      /* For now, only handle BITS_PER_MP_LIMB-1 bits.  This makes
+	 room for sign bit.  */
+      uh >>= 1;
+      vh >>= 1;
+#endif
+      A = 1;
+      B = 0;
+      C = 0;
+      D = 1;
+
+      for (;;)
+	{
+	  mp_limb_signed_t q, T;
+	  if (vh + C == 0 || vh + D == 0)
+	    break;
+
+	  q = (uh + A) / (vh + C);
+	  if (q != (uh + B) / (vh + D))
+	    break;
+
+	  T = A - q * C;
+	  A = C;
+	  C = T;
+	  T = B - q * D;
+	  B = D;
+	  D = T;
+	  T = uh - q * vh;
+	  uh = vh;
+	  vh = T;
+	}
+
+#if RECORD
+      min = MIN (A, min);  min = MIN (B, min);
+      min = MIN (C, min);  min = MIN (D, min);
+      max = MAX (A, max);  max = MAX (B, max);
+      max = MAX (C, max);  max = MAX (D, max);
+#endif
+
+      if (B == 0)
+	{
+	  mp_limb_t qh;
+	  mp_size_t i;
+
+	  /* This is quite rare.  I.e., optimize something else!  */
+
+	  /* Normalize V (and shift up U the same amount).  */
+	  count_leading_zeros (cnt, vp[vsize - 1]);
+	  if (cnt != 0)
+	    {
+	      mp_limb_t cy;
+	      mpn_lshift (vp, vp, vsize, cnt);
+	      cy = mpn_lshift (up, up, size, cnt);
+	      up[size] = cy;
+	      size += cy != 0;
+	    }
+
+	  qh = mpn_divmod (up + vsize, up, size, vp, vsize);
+#if EXTEND
+	  MPN_COPY (tp, s0p, ssize);
+	  for (i = 0; i < size - vsize; i++)
+	    {
+	      mp_limb_t cy;
+	      cy = mpn_addmul_1 (tp + i, s1p, ssize, up[vsize + i]);
+	      if (cy != 0)
+		tp[ssize++] = cy;
+	    }
+	  if (qh != 0)
+	    {
+	      mp_limb_t cy;
+	      abort ();
+	      /* XXX since qh == 1, mpn_addmul_1 is overkill */
+	      cy = mpn_addmul_1 (tp + size - vsize, s1p, ssize, qh);
+	      if (cy != 0)
+		tp[ssize++] = cy;
+      	    }
+#if 0
+	  MPN_COPY (s0p, s1p, ssize); /* should be old ssize, kind of */
+	  MPN_COPY (s1p, tp, ssize);
+#else
+	  {
+	    mp_ptr xp;
+	    xp = s0p; s0p = s1p; s1p = xp;
+	    xp = s1p; s1p = tp; tp = xp;
+	  }
+#endif
+#endif
+	  size = vsize;
+	  if (cnt != 0)
+	    {
+	      mpn_rshift (up, up, size, cnt);
+	      mpn_rshift (vp, vp, size, cnt);
+	    }
+
+	  {
+	    mp_ptr xp;
+	    xp = up; up = vp; vp = xp;
+	  }
+	  MPN_NORMALIZE (up, size);
+	}
+      else
+	{
+	  /* T = U*A + V*B
+	     W = U*C + V*D
+	     U = T
+	     V = W	   */
+
+	  if (SGN(A) == SGN(B))	/* should be different sign */
+	    abort ();
+	  if (SGN(C) == SGN(D))	/* should be different sign */
+	    abort ();
+#if STAT
+	  { mp_limb_t x;
+	    x = ABS (A) | ABS (B) | ABS (C) | ABS (D);
+	    count_leading_zeros (cnt, x);
+	    arr[BITS_PER_MP_LIMB - cnt]++; }
+#endif
+	  if (A == 0)
+	    {
+	      if (B != 1) abort ();
+	      MPN_COPY (tp, vp, size);
+	    }
+	  else
+	    {
+	      if (A < 0)
+		{
+		  mpn_mul_1 (tp, vp, size, B);
+		  mpn_submul_1 (tp, up, size, -A);
+		}
+	      else
+		{
+		  mpn_mul_1 (tp, up, size, A);
+		  mpn_submul_1 (tp, vp, size, -B);
+		}
+	    }
+	  if (C < 0)
+	    {
+	      mpn_mul_1 (wp, vp, size, D);
+	      mpn_submul_1 (wp, up, size, -C);
+	    }
+	  else
+	    {
+	      mpn_mul_1 (wp, up, size, C);
+	      mpn_submul_1 (wp, vp, size, -D);
+	    }
+
+	  {
+	    mp_ptr xp;
+	    xp = tp; tp = up; up = xp;
+	    xp = wp; wp = vp; vp = xp;
+	  }
+
+#if EXTEND
+	  { mp_limb_t cy;
+	  MPN_ZERO (tp, orig_size);
+	  if (A == 0)
+	    {
+	      if (B != 1) abort ();
+	      MPN_COPY (tp, s1p, ssize);
+	    }
+	  else
+	    {
+	      if (A < 0)
+		{
+		  cy = mpn_mul_1 (tp, s1p, ssize, B);
+		  cy += mpn_addmul_1 (tp, s0p, ssize, -A);
+		}
+	      else
+		{
+		  cy = mpn_mul_1 (tp, s0p, ssize, A);
+		  cy += mpn_addmul_1 (tp, s1p, ssize, -B);
+		}
+	      if (cy != 0)
+		tp[ssize++] = cy;
+	    }
+	  MPN_ZERO (wp, orig_size);
+	  if (C < 0)
+	    {
+	      cy = mpn_mul_1 (wp, s1p, ssize, D);
+	      cy += mpn_addmul_1 (wp, s0p, ssize, -C);
+	    }
+	  else
+	    {
+	      cy = mpn_mul_1 (wp, s0p, ssize, C);
+	      cy += mpn_addmul_1 (wp, s1p, ssize, -D);
+	    }
+	  if (cy != 0)
+	    wp[ssize++] = cy;
+	  }
+	  {
+	    mp_ptr xp;
+	    xp = tp; tp = s0p; s0p = xp;
+	    xp = wp; wp = s1p; s1p = xp;
+	  }
+#endif
+#if 0	/* Is it a win to remove multiple zeros here? */
+	  MPN_NORMALIZE (up, size);
+#else
+	  if (up[size - 1] == 0)
+	    size--;
+#endif
+	}
+    }
+
+#if RECORD
+  printf ("min: %ld\n", min);
+  printf ("max: %ld\n", max);
+#endif
+
+  if (vsize == 0)
+    {
+      if (gp != up)
+	MPN_COPY (gp, up, size);
+#if EXTEND
+      if (orig_s0p != s0p)
+	MPN_COPY (orig_s0p, s0p, ssize);
+#endif
+      TMP_FREE (mark);
+      return size;
+    }
+  else
+    {
+      mp_limb_t vl, ul, t;
+#if EXTEND
+      mp_limb_t cy;
+      mp_size_t i;
+#endif
+      vl = vp[0];
+#if EXTEND
+      t = mpn_divmod_1 (wp, up, size, vl);
+      MPN_COPY (tp, s0p, ssize);
+      for (i = 0; i < size; i++)
+	{
+	  cy = mpn_addmul_1 (tp + i, s1p, ssize, wp[i]);
+	  if (cy != 0)
+	    tp[ssize++] = cy;
+	}
+#if 0
+      MPN_COPY (s0p, s1p, ssize);
+      MPN_COPY (s1p, tp, ssize);
+#else
+      {
+	mp_ptr xp;
+	xp = s0p; s0p = s1p; s1p = xp;
+	xp = s1p; s1p = tp; tp = xp;
+      }
+#endif
+#else
+      t = mpn_mod_1 (up, size, vl);
+#endif
+      ul = vl;
+      vl = t;
+      while (vl != 0)
+	{
+	  mp_limb_t t;
+#if EXTEND
+	  mp_limb_t q, cy;
+	  q = ul / vl;
+	  t = ul - q*vl;
+	  
+	  MPN_COPY (tp, s0p, ssize);
+	  cy = mpn_addmul_1 (tp, s1p, ssize, q);
+	  if (cy != 0)
+	    tp[ssize++] = cy;
+#if 0
+	  MPN_COPY (s0p, s1p, ssize);
+	  MPN_COPY (s1p, tp, ssize);
+#else
+	  {
+	    mp_ptr xp;
+	    xp = s0p; s0p = s1p; s1p = xp;
+	    xp = s1p; s1p = tp; tp = xp;
+	  }
+#endif
+
+#else
+	  t = ul % vl;
+#endif
+	  ul = vl;
+	  vl = t;
+	}
+      gp[0] = ul;
+#if EXTEND
+      if (orig_s0p != s0p)
+	MPN_COPY (orig_s0p, s0p, ssize);
+#endif
+      TMP_FREE (mark);
+      return 1;
+    }
+}
diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c
new file mode 100644
index 000000000..0e7fc60ef
--- /dev/null
+++ b/mpn/generic/get_str.c
@@ -0,0 +1,211 @@
+/* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR
+   to a printable string in STR in base BASE.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Convert the limb vector pointed to by MPTR and MSIZE long to a
+   char array, using base BASE for the result array.  Store the
+   result in the character array STR.  STR must point to an array with
+   space for the largest possible number represented by a MSIZE long
+   limb vector + 1 extra character.
+
+   The result is NOT in Ascii, to convert it to printable format, add
+   '0' or 'A' depending on the base and range.
+
+   Return the number of digits in the result string.
+   This may include some leading zeros.
+
+   The limb vector pointed to by MPTR is clobbered.  */
+
+size_t
+mpn_get_str (str, base, mptr, msize)
+     unsigned char *str;
+     int base;
+     mp_ptr mptr;
+     mp_size_t msize;
+{
+  mp_limb_t big_base;
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+  int normalization_steps;
+#endif
+#if UDIV_TIME > 2 * UMUL_TIME
+  mp_limb_t big_base_inverted;
+#endif
+  unsigned int dig_per_u;
+  mp_size_t out_len;
+  register unsigned char *s;
+
+  big_base = __mp_bases[base].big_base;
+
+  s = str;
+
+  /* Special case zero, as the code below doesn't handle it.  */
+  if (msize == 0)
+    {
+      s[0] = 0;
+      return 1;
+    }
+
+  if ((base & (base - 1)) == 0)
+    {
+      /* The base is a power of 2.  Make conversion from most
+	 significant side.  */
+      mp_limb_t n1, n0;
+      register int bits_per_digit = big_base;
+      register int x;
+      register int bit_pos;
+      register int i;
+
+      n1 = mptr[msize - 1];
+      count_leading_zeros (x, n1);
+
+	/* BIT_POS should be R when input ends in least sign. nibble,
+	   R + bits_per_digit * n when input ends in n:th least significant
+	   nibble. */
+
+      {
+	int bits;
+
+	bits = BITS_PER_MP_LIMB * msize - x;
+	x = bits % bits_per_digit;
+	if (x != 0)
+	  bits += bits_per_digit - x;
+	bit_pos = bits - (msize - 1) * BITS_PER_MP_LIMB;
+      }
+
+      /* Fast loop for bit output.  */
+      i = msize - 1;
+      for (;;)
+	{
+	  bit_pos -= bits_per_digit;
+	  while (bit_pos >= 0)
+	    {
+	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
+	      bit_pos -= bits_per_digit;
+	    }
+	  i--;
+	  if (i < 0)
+	    break;
+	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
+	  n1 = mptr[i];
+	  bit_pos += BITS_PER_MP_LIMB;
+	  *s++ = n0 | (n1 >> bit_pos);
+	}
+
+      *s = 0;
+
+      return s - str;
+    }
+  else
+    {
+      /* General case.  The base is not a power of 2.  Make conversion
+	 from least significant end.  */
+
+      /* If udiv_qrnnd only handles divisors with the most significant bit
+	 set, prepare BIG_BASE for being a divisor by shifting it to the
+	 left exactly enough to set the most significant bit.  */
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+      count_leading_zeros (normalization_steps, big_base);
+      big_base <<= normalization_steps;
+#if UDIV_TIME > 2 * UMUL_TIME
+      /* Get the fixed-point approximation to 1/(BIG_BASE << NORMALIZATION_STEPS).  */
+      big_base_inverted = __mp_bases[base].big_base_inverted;
+#endif
+#endif
+
+      dig_per_u = __mp_bases[base].chars_per_limb;
+      out_len = ((size_t) msize * BITS_PER_MP_LIMB
+		 * __mp_bases[base].chars_per_bit_exactly) + 1;
+      s += out_len;
+
+      while (msize != 0)
+	{
+	  int i;
+	  mp_limb_t n0, n1;
+
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+	  /* If we shifted BIG_BASE above, shift the dividend too, to get
+	     the right quotient.  We need to do this every loop,
+	     since the intermediate quotients are OK, but the quotient from
+	     one turn in the loop is going to be the dividend in the
+	     next turn, and the dividend needs to be up-shifted.  */
+	  if (normalization_steps != 0)
+	    {
+	      n0 = mpn_lshift (mptr, mptr, msize, normalization_steps);
+
+	      /* If the shifting gave a carry out limb, store it and
+		 increase the length.  */
+	      if (n0 != 0)
+		{
+		  mptr[msize] = n0;
+		  msize++;
+		}
+	    }
+#endif
+
+	  /* Divide the number at TP with BIG_BASE to get a quotient and a
+	     remainder.  The remainder is our new digit in base BIG_BASE.  */
+	  i = msize - 1;
+	  n1 = mptr[i];
+
+	  if (n1 >= big_base)
+	    n1 = 0;
+	  else
+	    {
+	      msize--;
+	      i--;
+	    }
+
+	  for (; i >= 0; i--)
+	    {
+	      n0 = mptr[i];
+#if UDIV_TIME > 2 * UMUL_TIME
+	      udiv_qrnnd_preinv (mptr[i], n1, n1, n0, big_base, big_base_inverted);
+#else
+	      udiv_qrnnd (mptr[i], n1, n1, n0, big_base);
+#endif
+	    }
+
+#if UDIV_NEEDS_NORMALIZATION || UDIV_TIME > 2 * UMUL_TIME
+	  /* If we shifted above (at previous UDIV_NEEDS_NORMALIZATION tests)
+	     the remainder will be up-shifted here.  Compensate.  */
+	  n1 >>= normalization_steps;
+#endif
+
+	  /* Convert N1 from BIG_BASE to a string of digits in BASE
+	     using single precision operations.  */
+	  for (i = dig_per_u - 1; i >= 0; i--)
+	    {
+	      *--s = n1 % base;
+	      n1 /= base;
+	      if (n1 == 0 && msize == 0)
+		break;
+	    }
+	}
+
+      while (s != str)
+	*--s = 0;
+      return out_len;
+    }
+}
diff --git a/mpn/generic/gmp-mparam.h b/mpn/generic/gmp-mparam.h
new file mode 100644
index 000000000..7c885575b
--- /dev/null
+++ b/mpn/generic/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/mpn/generic/hamdist.c b/mpn/generic/hamdist.c
new file mode 100644
index 000000000..2190b636f
--- /dev/null
+++ b/mpn/generic/hamdist.c
@@ -0,0 +1,88 @@
+/* mpn_hamdist --
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined __GNUC__
+#if defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+#define popc_limb(a) \
+  ({									\
+    DItype __res;							\
+    asm ("popc %1,%0" : "=r" (__res) : "rI" (a));			\
+    __res;								\
+  })
+#endif
+#endif
+
+#ifndef popc_limb
+
+/* Cool population count of a mp_limb_t.
+   You have to figure out how this works, I won't tell you!  */
+
+static inline unsigned int
+popc_limb (x)
+     mp_limb_t x;
+{
+#if BITS_PER_MP_LIMB == 64
+  /* We have to go into some trouble to define these constants.
+     (For mp_limb_t being `long long'.)  */
+  mp_limb_t cnst;
+  cnst = 0x55555555L | ((mp_limb_t) 0x55555555L << BITS_PER_MP_LIMB/2);
+  x = ((x & ~cnst) >> 1) + (x & cnst);
+  cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2);
+  x = ((x & ~cnst) >> 2) + (x & cnst);
+  cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2);
+  x = ((x >> 4) + x) & cnst;
+  x = ((x >> 8) + x);
+  x = ((x >> 16) + x);
+  x = ((x >> 32) + x) & 0xff;
+#endif
+#if BITS_PER_MP_LIMB == 32
+  x = ((x >> 1) & 0x55555555L) + (x & 0x55555555L);
+  x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L);
+  x = ((x >> 4) + x) & 0x0f0f0f0fL;
+  x = ((x >> 8) + x);
+  x = ((x >> 16) + x) & 0xff;
+#endif
+  return x;
+}
+#endif
+
+unsigned long int
+#if __STDC__
+mpn_hamdist (mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+mpn_hamdist (up, vp, size)
+     register mp_srcptr up;
+     register mp_srcptr vp;
+     register mp_size_t size;
+#endif
+{
+  unsigned long int hamdist;
+  mp_size_t i;
+
+  hamdist = 0;
+  for (i = 0; i < size; i++)
+    hamdist += popc_limb (up[i] ^ vp[i]);
+
+  return hamdist;
+}
diff --git a/mpn/generic/inlines.c b/mpn/generic/inlines.c
new file mode 100644
index 000000000..dca305e6e
--- /dev/null
+++ b/mpn/generic/inlines.c
@@ -0,0 +1,3 @@
+#define _FORCE_INLINES
+#define _EXTERN_INLINE /* empty */
+#include "gmp.h"
diff --git a/mpn/generic/lshift.c b/mpn/generic/lshift.c
new file mode 100644
index 000000000..e244bc52f
--- /dev/null
+++ b/mpn/generic/lshift.c
@@ -0,0 +1,87 @@
+/* mpn_lshift -- Shift left low level.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+   and store the USIZE least significant digits of the result at WP.
+   Return the bits shifted out from the most significant digit.
+
+   Argument constraints:
+   1. 0 < CNT < BITS_PER_MP_LIMB
+   2. If the result is to be written over the input, WP must be >= UP.
+*/
+
+mp_limb_t
+#if __STDC__
+mpn_lshift (register mp_ptr wp,
+	    register mp_srcptr up, mp_size_t usize,
+	    register unsigned int cnt)
+#else
+mpn_lshift (wp, up, usize, cnt)
+     register mp_ptr wp;
+     register mp_srcptr up;
+     mp_size_t usize;
+     register unsigned int cnt;
+#endif
+{
+  register mp_limb_t high_limb, low_limb;
+  register unsigned sh_1, sh_2;
+  register mp_size_t i;
+  mp_limb_t retval;
+
+#ifdef DEBUG
+  if (usize == 0 || cnt == 0)
+    abort ();
+#endif
+
+  sh_1 = cnt;
+#if 0
+  if (sh_1 == 0)
+    {
+      if (wp != up)
+	{
+	  /* Copy from high end to low end, to allow specified input/output
+	     overlapping.  */
+	  for (i = usize - 1; i >= 0; i--)
+	    wp[i] = up[i];
+	}
+      return 0;
+    }
+#endif
+
+  wp += 1;
+  sh_2 = BITS_PER_MP_LIMB - sh_1;
+  i = usize - 1;
+  low_limb = up[i];
+  retval = low_limb >> sh_2;
+  high_limb = low_limb;
+  while (--i >= 0)
+    {
+      low_limb = up[i];
+      wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+      high_limb = low_limb;
+    }
+  wp[i] = high_limb << sh_1;
+
+  return retval;
+}
diff --git a/mpn/generic/mod_1.c b/mpn/generic/mod_1.c
new file mode 100644
index 000000000..314d11b30
--- /dev/null
+++ b/mpn/generic/mod_1.c
@@ -0,0 +1,197 @@
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+   Return the single-limb remainder.
+   There are no constraints on the value of the divisor.
+
+Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+   here (not udiv_qrnnd).  */
+
+mp_limb_t
+#if __STDC__
+mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+	   mp_limb_t divisor_limb)
+#else
+mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
+     mp_srcptr dividend_ptr;
+     mp_size_t dividend_size;
+     mp_limb_t divisor_limb;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t n1, n0, r;
+  int dummy;
+
+  /* Botch: Should this be handled at all?  Rely on callers?  */
+  if (dividend_size == 0)
+    return 0;
+
+  /* If multiplication is much faster than division, and the
+     dividend is large, pre-invert the divisor, and use
+     only multiplications in the inner loop.  */
+
+  /* This test should be read:
+       Does it ever help to use udiv_qrnnd_preinv?
+	 && Does what we save compensate for the inversion overhead?  */
+  if (UDIV_TIME > (2 * UMUL_TIME + 6)
+      && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
+    {
+      int normalization_steps;
+
+      count_leading_zeros (normalization_steps, divisor_limb);
+      if (normalization_steps != 0)
+	{
+	  mp_limb_t divisor_limb_inverted;
+
+	  divisor_limb <<= normalization_steps;
+
+	  /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     most significant bit (with weight 2**N) implicit.  */
+
+	  /* Special case for DIVISOR_LIMB == 100...000.  */
+	  if (divisor_limb << 1 == 0)
+	    divisor_limb_inverted = ~(mp_limb_t) 0;
+	  else
+	    udiv_qrnnd (divisor_limb_inverted, dummy,
+			-divisor_limb, 0, divisor_limb);
+
+	  n1 = dividend_ptr[dividend_size - 1];
+	  r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+	  /* Possible optimization:
+	     if (r == 0
+	     && divisor_limb > ((n1 << normalization_steps)
+			     | (dividend_ptr[dividend_size - 2] >> ...)))
+	     ...one division less... */
+
+	  for (i = dividend_size - 2; i >= 0; i--)
+	    {
+	      n0 = dividend_ptr[i];
+	      udiv_qrnnd_preinv (dummy, r, r,
+				 ((n1 << normalization_steps)
+				  | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+				 divisor_limb, divisor_limb_inverted);
+	      n1 = n0;
+	    }
+	  udiv_qrnnd_preinv (dummy, r, r,
+			     n1 << normalization_steps,
+			     divisor_limb, divisor_limb_inverted);
+	  return r >> normalization_steps;
+	}
+      else
+	{
+	  mp_limb_t divisor_limb_inverted;
+
+	  /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+	     result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+	     most significant bit (with weight 2**N) implicit.  */
+
+	  /* Special case for DIVISOR_LIMB == 100...000.  */
+	  if (divisor_limb << 1 == 0)
+	    divisor_limb_inverted = ~(mp_limb_t) 0;
+	  else
+	    udiv_qrnnd (divisor_limb_inverted, dummy,
+			-divisor_limb, 0, divisor_limb);
+
+	  i = dividend_size - 1;
+	  r = dividend_ptr[i];
+
+	  if (r >= divisor_limb)
+	    r = 0;
+	  else
+	    i--;
+
+	  for (; i >= 0; i--)
+	    {
+	      n0 = dividend_ptr[i];
+	      udiv_qrnnd_preinv (dummy, r, r,
+				 n0, divisor_limb, divisor_limb_inverted);
+	    }
+	  return r;
+	}
+    }
+  else
+    {
+      if (UDIV_NEEDS_NORMALIZATION)
+	{
+	  int normalization_steps;
+
+	  count_leading_zeros (normalization_steps, divisor_limb);
+	  if (normalization_steps != 0)
+	    {
+	      divisor_limb <<= normalization_steps;
+
+	      n1 = dividend_ptr[dividend_size - 1];
+	      r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
+
+	      /* Possible optimization:
+		 if (r == 0
+		 && divisor_limb > ((n1 << normalization_steps)
+				 | (dividend_ptr[dividend_size - 2] >> ...)))
+		 ...one division less... */
+
+	      for (i = dividend_size - 2; i >= 0; i--)
+		{
+		  n0 = dividend_ptr[i];
+		  udiv_qrnnd (dummy, r, r,
+			      ((n1 << normalization_steps)
+			       | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
+			      divisor_limb);
+		  n1 = n0;
+		}
+	      udiv_qrnnd (dummy, r, r,
+			  n1 << normalization_steps,
+			  divisor_limb);
+	      return r >> normalization_steps;
+	    }
+	}
+      /* No normalization needed, either because udiv_qrnnd doesn't require
+	 it, or because DIVISOR_LIMB is already normalized.  */
+
+      i = dividend_size - 1;
+      r = dividend_ptr[i];
+
+      if (r >= divisor_limb)
+	r = 0;
+      else
+	i--;
+
+      for (; i >= 0; i--)
+	{
+	  n0 = dividend_ptr[i];
+	  udiv_qrnnd (dummy, r, r, n0, divisor_limb);
+	}
+      return r;
+    }
+}
diff --git a/mpn/generic/mul.c b/mpn/generic/mul.c
new file mode 100644
index 000000000..dcf8cb4da
--- /dev/null
+++ b/mpn/generic/mul.c
@@ -0,0 +1,152 @@
+/* mpn_mul -- Multiply two natural numbers.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+   and v (pointed to by VP, with VSIZE limbs), and store the result at
+   PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+   operands are normalized.  Return the most significant limb of the
+   result.
+
+   NOTE: The space pointed to by PRODP is overwritten before finished
+   with U and V, so overlap is an error.
+
+   Argument constraints:
+   1. USIZE >= VSIZE.
+   2. PRODP != UP and PRODP != VP, i.e. the destination
+      must be distinct from the multiplier and the multiplicand.  */
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+   value which is good on most machines.  */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 32
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_mul (mp_ptr prodp,
+	 mp_srcptr up, mp_size_t usize,
+	 mp_srcptr vp, mp_size_t vsize)
+#else
+mpn_mul (prodp, up, usize, vp, vsize)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_size_t usize;
+     mp_srcptr vp;
+     mp_size_t vsize;
+#endif
+{
+  mp_ptr prod_endp = prodp + usize + vsize - 1;
+  mp_limb_t cy;
+  mp_ptr tspace;
+  TMP_DECL (marker);
+
+  if (vsize < KARATSUBA_THRESHOLD)
+    {
+      /* Handle simple cases with traditional multiplication.
+
+	 This is the most critical code of the entire function.  All
+	 multiplies rely on this, both small and huge.  Small ones arrive
+	 here immediately.  Huge ones arrive here as this is the base case
+	 for Karatsuba's recursive algorithm below.  */
+      mp_size_t i;
+      mp_limb_t cy_limb;
+      mp_limb_t v_limb;
+
+      if (vsize == 0)
+	return 0;
+
+      /* Multiply by the first limb in V separately, as the result can be
+	 stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+      v_limb = vp[0];
+      if (v_limb <= 1)
+	{
+	  if (v_limb == 1)
+	    MPN_COPY (prodp, up, usize);
+	  else
+	    MPN_ZERO (prodp, usize);
+	  cy_limb = 0;
+	}
+      else
+	cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
+
+      prodp[usize] = cy_limb;
+      prodp++;
+
+      /* For each iteration in the outer loop, multiply one limb from
+	 U with one limb from V, and add it to PROD.  */
+      for (i = 1; i < vsize; i++)
+	{
+	  v_limb = vp[i];
+	  if (v_limb <= 1)
+	    {
+	      cy_limb = 0;
+	      if (v_limb == 1)
+		cy_limb = mpn_add_n (prodp, prodp, up, usize);
+	    }
+	  else
+	    cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
+
+	  prodp[usize] = cy_limb;
+	  prodp++;
+	}
+      return cy_limb;
+    }
+
+  TMP_MARK (marker);
+
+  tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
+  MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
+
+  prodp += vsize;
+  up += vsize;
+  usize -= vsize;
+  if (usize >= vsize)
+    {
+      mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
+      do
+	{
+	  MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
+	  cy = mpn_add_n (prodp, prodp, tp, vsize);
+	  mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
+	  prodp += vsize;
+	  up += vsize;
+	  usize -= vsize;
+	}
+      while (usize >= vsize);
+    }
+
+  /* True: usize < vsize.  */
+
+  /* Make life simple: Recurse.  */
+
+  if (usize != 0)
+    {
+      mpn_mul (tspace, vp, vsize, up, usize);
+      cy = mpn_add_n (prodp, prodp, tspace, vsize);
+      mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
+    }
+
+  TMP_FREE (marker);
+  return *prod_endp;
+}
diff --git a/mpn/generic/mul_1.c b/mpn/generic/mul_1.c
new file mode 100644
index 000000000..2de680a64
--- /dev/null
+++ b/mpn/generic/mul_1.c
@@ -0,0 +1,59 @@
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and
+   store the product in a second limb vector.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     mp_size_t s1_size;
+     register mp_limb_t s2_limb;
+{
+  register mp_limb_t cy_limb;
+  register mp_size_t j;
+  register mp_limb_t prod_high, prod_low;
+
+  /* The loop counter and index J goes from -S1_SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -s1_size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  res_ptr -= j;
+
+  cy_limb = 0;
+  do
+    {
+      umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+      prod_low += cy_limb;
+      cy_limb = (prod_low < cy_limb) + prod_high;
+
+      res_ptr[j] = prod_low;
+    }
+  while (++j != 0);
+
+  return cy_limb;
+}
diff --git a/mpn/generic/mul_n.c b/mpn/generic/mul_n.c
new file mode 100644
index 000000000..b38e8ad17
--- /dev/null
+++ b/mpn/generic/mul_n.c
@@ -0,0 +1,401 @@
+/* mpn_mul_n -- Multiply two natural numbers of length n.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+   both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+   always stored.  Return the most significant limb.
+
+   Argument constraints:
+   1. PRODP != UP and PRODP != VP, i.e. the destination
+      must be distinct from the multiplier and the multiplicand.  */
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+   value which is good on most machines.  */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 32
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+
+/* Handle simple cases with traditional multiplication.
+
+   This is the most critical code of multiplication.  All multiplies rely
+   on this, both small and huge.  Small ones arrive here immediately.  Huge
+   ones arrive here as this is the base case for Karatsuba's recursive
+   algorithm below.  */
+
+void
+#if __STDC__
+impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+impn_mul_n_basecase (prodp, up, vp, size)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_srcptr vp;
+     mp_size_t size;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t cy_limb;
+  mp_limb_t v_limb;
+
+  /* Multiply by the first limb in V separately, as the result can be
+     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+  v_limb = vp[0];
+  if (v_limb <= 1)
+    {
+      if (v_limb == 1)
+	MPN_COPY (prodp, up, size);
+      else
+	MPN_ZERO (prodp, size);
+      cy_limb = 0;
+    }
+  else
+    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
+
+  prodp[size] = cy_limb;
+  prodp++;
+
+  /* For each iteration in the outer loop, multiply one limb from
+     U with one limb from V, and add it to PROD.  */
+  for (i = 1; i < size; i++)
+    {
+      v_limb = vp[i];
+      if (v_limb <= 1)
+	{
+	  cy_limb = 0;
+	  if (v_limb == 1)
+	    cy_limb = mpn_add_n (prodp, prodp, up, size);
+	}
+      else
+	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
+
+      prodp[size] = cy_limb;
+      prodp++;
+    }
+}
+
+void
+#if __STDC__
+impn_mul_n (mp_ptr prodp,
+	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
+#else
+impn_mul_n (prodp, up, vp, size, tspace)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_srcptr vp;
+     mp_size_t size;
+     mp_ptr tspace;
+#endif
+{
+  if ((size & 1) != 0)
+    {
+      /* The size is odd, the code code below doesn't handle that.
+	 Multiply the least significant (size - 1) limbs with a recursive
+	 call, and handle the most significant limb of S1 and S2
+	 separately.  */
+      /* A slightly faster way to do this would be to make the Karatsuba
+	 code below behave as if the size were even, and let it check for
+	 odd size in the end.  I.e., in essence move this code to the end.
+	 Doing so would save us a recursive call, and potentially make the
+	 stack grow a lot less.  */
+
+      mp_size_t esize = size - 1;	/* even size */
+      mp_limb_t cy_limb;
+
+      MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
+      prodp[esize + esize] = cy_limb;
+      cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
+
+      prodp[esize + size] = cy_limb;
+    }
+  else
+    {
+      /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+
+	 Split U in two pieces, U1 and U0, such that
+	 U = U0 + U1*(B**n),
+	 and V in V1 and V0, such that
+	 V = V0 + V1*(B**n).
+
+	 UV is then computed recursively using the identity
+
+		2n   n          n                     n
+	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+			1 1        1  0   0  1              0 0
+
+	 Where B = 2**BITS_PER_MP_LIMB.  */
+
+      mp_size_t hsize = size >> 1;
+      mp_limb_t cy;
+      int negflg;
+
+      /*** Product H.	 ________________  ________________
+			|_____U1 x V1____||____U0 x V0_____|  */
+      /* Put result in upper part of PROD and pass low part of TSPACE
+	 as new TSPACE.  */
+      MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
+
+      /*** Product M.	 ________________
+			|_(U1-U0)(V0-V1)_|  */
+      if (mpn_cmp (up + hsize, up, hsize) >= 0)
+	{
+	  mpn_sub_n (prodp, up + hsize, up, hsize);
+	  negflg = 0;
+	}
+      else
+	{
+	  mpn_sub_n (prodp, up, up + hsize, hsize);
+	  negflg = 1;
+	}
+      if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
+	{
+	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
+	  negflg ^= 1;
+	}
+      else
+	{
+	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
+	  /* No change of NEGFLG.  */
+	}
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
+
+      /*** Add/copy product H.  */
+      MPN_COPY (prodp + hsize, prodp + size, hsize);
+      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+
+      /*** Add product M (if NEGFLG M is a negative number).  */
+      if (negflg)
+	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+      else
+	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+
+      /*** Product L.	 ________________  ________________
+			|________________||____U0 x V0_____|  */
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
+
+      /*** Add/copy Product L (twice).  */
+
+      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+      if (cy)
+	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+
+      MPN_COPY (prodp, tspace, hsize);
+      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+      if (cy)
+	mpn_add_1 (prodp + size, prodp + size, size, 1);
+    }
+}
+
+void
+#if __STDC__
+impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
+#else
+impn_sqr_n_basecase (prodp, up, size)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_size_t size;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t cy_limb;
+  mp_limb_t v_limb;
+
+  /* Multiply by the first limb in V separately, as the result can be
+     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+  v_limb = up[0];
+  if (v_limb <= 1)
+    {
+      if (v_limb == 1)
+	MPN_COPY (prodp, up, size);
+      else
+	MPN_ZERO (prodp, size);
+      cy_limb = 0;
+    }
+  else
+    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
+
+  prodp[size] = cy_limb;
+  prodp++;
+
+  /* For each iteration in the outer loop, multiply one limb from
+     U with one limb from V, and add it to PROD.  */
+  for (i = 1; i < size; i++)
+    {
+      v_limb = up[i];
+      if (v_limb <= 1)
+	{
+	  cy_limb = 0;
+	  if (v_limb == 1)
+	    cy_limb = mpn_add_n (prodp, prodp, up, size);
+	}
+      else
+	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
+
+      prodp[size] = cy_limb;
+      prodp++;
+    }
+}
+
+void
+#if __STDC__
+impn_sqr_n (mp_ptr prodp,
+	     mp_srcptr up, mp_size_t size, mp_ptr tspace)
+#else
+impn_sqr_n (prodp, up, size, tspace)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_size_t size;
+     mp_ptr tspace;
+#endif
+{
+  if ((size & 1) != 0)
+    {
+      /* The size is odd, the code code below doesn't handle that.
+	 Multiply the least significant (size - 1) limbs with a recursive
+	 call, and handle the most significant limb of S1 and S2
+	 separately.  */
+      /* A slightly faster way to do this would be to make the Karatsuba
+	 code below behave as if the size were even, and let it check for
+	 odd size in the end.  I.e., in essence move this code to the end.
+	 Doing so would save us a recursive call, and potentially make the
+	 stack grow a lot less.  */
+
+      mp_size_t esize = size - 1;	/* even size */
+      mp_limb_t cy_limb;
+
+      MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
+      cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
+      prodp[esize + esize] = cy_limb;
+      cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
+
+      prodp[esize + size] = cy_limb;
+    }
+  else
+    {
+      mp_size_t hsize = size >> 1;
+      mp_limb_t cy;
+
+      /*** Product H.	 ________________  ________________
+			|_____U1 x U1____||____U0 x U0_____|  */
+      /* Put result in upper part of PROD and pass low part of TSPACE
+	 as new TSPACE.  */
+      MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
+
+      /*** Product M.	 ________________
+			|_(U1-U0)(U0-U1)_|  */
+      if (mpn_cmp (up + hsize, up, hsize) >= 0)
+	{
+	  mpn_sub_n (prodp, up + hsize, up, hsize);
+	}
+      else
+	{
+	  mpn_sub_n (prodp, up, up + hsize, hsize);
+	}
+
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
+
+      /*** Add/copy product H.  */
+      MPN_COPY (prodp + hsize, prodp + size, hsize);
+      cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+
+      /*** Add product M (if NEGFLG M is a negative number).  */
+      cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+
+      /*** Product L.	 ________________  ________________
+			|________________||____U0 x U0_____|  */
+      /* Read temporary operands from low part of PROD.
+	 Put result in low part of TSPACE using upper part of TSPACE
+	 as new TSPACE.  */
+      MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
+
+      /*** Add/copy Product L (twice).  */
+
+      cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+      if (cy)
+	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+
+      MPN_COPY (prodp, tspace, hsize);
+      cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+      if (cy)
+	mpn_add_1 (prodp + size, prodp + size, size, 1);
+    }
+}
+
+/* This should be made into an inline function in gmp.h.  */
+inline void
+#if __STDC__
+mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+#else
+mpn_mul_n (prodp, up, vp, size)
+     mp_ptr prodp;
+     mp_srcptr up;
+     mp_srcptr vp;
+     mp_size_t size;
+#endif
+{
+  TMP_DECL (marker);
+  TMP_MARK (marker);
+  if (up == vp)
+    {
+      if (size < KARATSUBA_THRESHOLD)
+	{
+	  impn_sqr_n_basecase (prodp, up, size);
+	}
+      else
+	{
+	  mp_ptr tspace;
+	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+	  impn_sqr_n (prodp, up, size, tspace);
+	}
+    }
+  else
+    {
+      if (size < KARATSUBA_THRESHOLD)
+	{
+	  impn_mul_n_basecase (prodp, up, vp, size);
+	}
+      else
+	{
+	  mp_ptr tspace;
+	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+	  impn_mul_n (prodp, up, vp, size, tspace);
+	}
+    }
+  TMP_FREE (marker);
+}
diff --git a/mpn/generic/perfsqr.c b/mpn/generic/perfsqr.c
new file mode 100644
index 000000000..5a6e2afe2
--- /dev/null
+++ b/mpn/generic/perfsqr.c
@@ -0,0 +1,138 @@
+/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
+   zero otherwise.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+#if BITS_PER_MP_LIMB == 32
+#define PP 0xC0CFD797L		/* 3 x 5 x 7 x 11 x 13 x ... x 29 */
+#define PP_INVERTED 0x53E5645CL
+#endif
+
+#if BITS_PER_MP_LIMB == 64
+#define PP 0xE221F97C30E94E1DL	/* 3 x 5 x 7 x 11 x 13 x ... x 53 */
+#define PP_INVERTED 0x21CFE6CFC938B36BL
+#endif
+
+/* sq_res_0x100[x mod 0x100] == 1 iff x mod 0x100 is a quadratic residue
+   modulo 0x100.  */
+static unsigned char const sq_res_0x100[0x100] =
+{
+  1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+  0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
+};
+
+int
+#if __STDC__
+mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
+#else
+mpn_perfect_square_p (up, usize)
+     mp_srcptr up;
+     mp_size_t usize;
+#endif
+{
+  mp_limb_t rem;
+  mp_ptr root_ptr;
+  int res;
+  TMP_DECL (marker);
+
+  /* The first test excludes 55/64 (85.9%) of the perfect square candidates
+     in O(1) time.  */
+  if ((sq_res_0x100[(unsigned int) up[0] % 0x100] & 1) == 0)
+    return 0;
+
+#if defined (PP)
+  /* The second test excludes 30652543/30808063 (99.5%) of the remaining
+     perfect square candidates in O(n) time.  */
+
+  /* Firstly, compute REM = A mod PP.  */
+  if (UDIV_TIME > (2 * UMUL_TIME + 6))
+    rem = mpn_preinv_mod_1 (up, usize, (mp_limb_t) PP, (mp_limb_t) PP_INVERTED);
+  else
+    rem = mpn_mod_1 (up, usize, (mp_limb_t) PP);
+
+  /* Now decide if REM is a quadratic residue modulo the factors in PP.  */
+
+  /* If A is just a few limbs, computing the square root does not take long
+     time, so things might run faster if we limit this loop according to the
+     size of A.  */
+
+#if BITS_PER_MP_LIMB == 64
+  if (((0x12DD703303AED3L >> rem % 53) & 1) == 0)
+    return 0;
+  if (((0x4351B2753DFL >> rem % 47) & 1) == 0)
+    return 0;
+  if (((0x35883A3EE53L >> rem % 43) & 1) == 0)
+    return 0;
+  if (((0x1B382B50737L >> rem % 41) & 1) == 0)
+    return 0;
+  if (((0x165E211E9BL >> rem % 37) & 1) == 0)
+    return 0;
+  if (((0x121D47B7L >> rem % 31) & 1) == 0)
+    return 0;
+#endif
+  if (((0x13D122F3L >> rem % 29) & 1) == 0)
+    return 0;
+  if (((0x5335FL >> rem % 23) & 1) == 0)
+    return 0;
+  if (((0x30AF3L >> rem % 19) & 1) == 0)
+    return 0;
+  if (((0x1A317L >> rem % 17) & 1) == 0)
+    return 0;
+  if (((0x161BL >> rem % 13) & 1) == 0)
+    return 0;
+  if (((0x23BL >> rem % 11) & 1) == 0)
+    return 0;
+  if (((0x017L >> rem % 7) & 1) == 0)
+    return 0;
+  if (((0x13L >> rem % 5) & 1) == 0)
+    return 0;
+  if (((0x3L >> rem % 3) & 1) == 0)
+    return 0;
+#endif
+
+  TMP_MARK (marker);
+
+  /* For the third and last test, we finally compute the square root,
+     to make sure we've really got a perfect square.  */
+  root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB);
+
+  /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
+  res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
+  TMP_FREE (marker);
+  return res;
+}
diff --git a/mpn/generic/popcount.c b/mpn/generic/popcount.c
new file mode 100644
index 000000000..c48573a47
--- /dev/null
+++ b/mpn/generic/popcount.c
@@ -0,0 +1,87 @@
+/* popcount.c
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined __GNUC__
+#if defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
+#define popc_limb(a) \
+  ({									\
+    DItype __res;							\
+    asm ("popc %1,%0" : "=r" (__res) : "rI" (a));			\
+    __res;								\
+  })
+#endif
+#endif
+
+#ifndef popc_limb
+
+/* Cool population count of a mp_limb_t.
+   You have to figure out how this works, I won't tell you!  */
+
+static inline unsigned int
+popc_limb (x)
+     mp_limb_t x;
+{
+#if BITS_PER_MP_LIMB == 64
+  /* We have to go into some trouble to define these constants.
+     (For mp_limb_t being `long long'.)  */
+  mp_limb_t cnst;
+  cnst = 0x55555555L | ((mp_limb_t) 0x55555555L << BITS_PER_MP_LIMB/2);
+  x = ((x & ~cnst) >> 1) + (x & cnst);
+  cnst = 0x33333333L | ((mp_limb_t) 0x33333333L << BITS_PER_MP_LIMB/2);
+  x = ((x & ~cnst) >> 2) + (x & cnst);
+  cnst = 0x0f0f0f0fL | ((mp_limb_t) 0x0f0f0f0fL << BITS_PER_MP_LIMB/2);
+  x = ((x >> 4) + x) & cnst;
+  x = ((x >> 8) + x);
+  x = ((x >> 16) + x);
+  x = ((x >> 32) + x) & 0xff;
+#endif
+#if BITS_PER_MP_LIMB == 32
+  x = ((x >> 1) & 0x55555555L) + (x & 0x55555555L);
+  x = ((x >> 2) & 0x33333333L) + (x & 0x33333333L);
+  x = ((x >> 4) + x) & 0x0f0f0f0fL;
+  x = ((x >> 8) + x);
+  x = ((x >> 16) + x) & 0xff;
+#endif
+  return x;
+}
+#endif
+
+unsigned long int
+#if __STDC__
+mpn_popcount (register mp_srcptr p, register mp_size_t size)
+#else
+mpn_popcount (p, size)
+     register mp_srcptr p;
+     register mp_size_t size;
+#endif
+{
+  unsigned long int popcnt;
+  mp_size_t i;
+
+  popcnt = 0;
+  for (i = 0; i < size; i++)
+    popcnt += popc_limb (p[i]);
+
+  return popcnt;
+}
diff --git a/mpn/generic/pre_mod_1.c b/mpn/generic/pre_mod_1.c
new file mode 100644
index 000000000..92d413b13
--- /dev/null
+++ b/mpn/generic/pre_mod_1.c
@@ -0,0 +1,69 @@
+/* mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb,
+		       divisor_limb_inverted) --
+   Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by the normalized DIVISOR_LIMB.
+   DIVISOR_LIMB_INVERTED should be 2^(2*BITS_PER_MP_LIMB) / DIVISOR_LIMB +
+   - 2^BITS_PER_MP_LIMB.
+   Return the single-limb remainder.
+
+Copyright (C) 1991, 1993, 1994, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+
+mp_limb_t
+#if __STDC__
+mpn_preinv_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+		  mp_limb_t divisor_limb, mp_limb_t divisor_limb_inverted)
+#else
+mpn_preinv_mod_1 (dividend_ptr, dividend_size, divisor_limb, divisor_limb_inverted)
+     mp_srcptr dividend_ptr;
+     mp_size_t dividend_size;
+     mp_limb_t divisor_limb;
+     mp_limb_t divisor_limb_inverted;
+#endif
+{
+  mp_size_t i;
+  mp_limb_t n0, r;
+  int dummy;
+
+  i = dividend_size - 1;
+  r = dividend_ptr[i];
+
+  if (r >= divisor_limb)
+    r = 0;
+  else
+    i--;
+
+  for (; i >= 0; i--)
+    {
+      n0 = dividend_ptr[i];
+      udiv_qrnnd_preinv (dummy, r, r, n0, divisor_limb, divisor_limb_inverted);
+    }
+  return r;
+}
diff --git a/mpn/generic/random2.c b/mpn/generic/random2.c
new file mode 100644
index 000000000..e4217a374
--- /dev/null
+++ b/mpn/generic/random2.c
@@ -0,0 +1,93 @@
+/* mpn_random2 -- Generate random numbers with relatively long strings
+   of ones and zeroes.  Suitable for border testing.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if defined (__hpux) || defined (__alpha__)  || defined (__svr4__) || defined (__SVR4)
+/* HPUX lacks random().  DEC OSF/1 1.2 random() returns a double.  */
+long mrand48 ();
+static inline long
+random ()
+{
+  return mrand48 ();
+}
+#else
+long random ();
+#endif
+
+/* It's a bit tricky to get this right, so please test the code well
+   if you hack with it.  Some early versions of the function produced
+   random numbers with the leading limb == 0, and some versions never
+   made the most significant bit set.  */
+
+void
+mpn_random2 (res_ptr, size)
+     mp_ptr res_ptr;
+     mp_size_t size;
+{
+  int n_bits;
+  int bit_pos;
+  mp_size_t limb_pos;
+  unsigned int ran;
+  mp_limb_t limb;
+
+  limb = 0;
+
+  /* Start off in a random bit position in the most significant limb.  */
+  bit_pos = random () & (BITS_PER_MP_LIMB - 1);
+
+  /* Least significant bit of RAN chooses string of ones/string of zeroes.
+     Make most significant limb be non-zero by setting bit 0 of RAN.  */
+  ran = random () | 1;
+
+  for (limb_pos = size - 1; limb_pos >= 0; )
+    {
+      n_bits = (ran >> 1) % BITS_PER_MP_LIMB + 1;
+      if ((ran & 1) != 0)
+	{
+	  /* Generate a string of ones.  */
+	  if (n_bits >= bit_pos)
+	    {
+	      res_ptr[limb_pos--] = limb | ((((mp_limb_t) 2) << bit_pos) - 1);
+	      bit_pos += BITS_PER_MP_LIMB;
+	      limb = (~(mp_limb_t) 0) << (bit_pos - n_bits);
+	    }
+	  else
+	    {
+	      limb |= ((((mp_limb_t) 1) << n_bits) - 1) << (bit_pos - n_bits + 1);
+	    }
+	}
+      else
+	{
+	  /* Generate a string of zeroes.  */
+	  if (n_bits >= bit_pos)
+	    {
+	      res_ptr[limb_pos--] = limb;
+	      limb = 0;
+	      bit_pos += BITS_PER_MP_LIMB;
+	    }
+	}
+      bit_pos -= n_bits;
+      ran = random ();
+    }
+}
diff --git a/mpn/generic/rshift.c b/mpn/generic/rshift.c
new file mode 100644
index 000000000..804f9be58
--- /dev/null
+++ b/mpn/generic/rshift.c
@@ -0,0 +1,88 @@
+/* mpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+   and store the USIZE least significant limbs of the result at WP.
+   The bits shifted out to the right are returned.
+
+   Argument constraints:
+   1. 0 < CNT < BITS_PER_MP_LIMB
+   2. If the result is to be written over the input, WP must be <= UP.
+*/
+
+mp_limb_t
+#if __STDC__
+mpn_rshift (register mp_ptr wp,
+	    register mp_srcptr up, mp_size_t usize,
+	    register unsigned int cnt)
+#else
+mpn_rshift (wp, up, usize, cnt)
+     register mp_ptr wp;
+     register mp_srcptr up;
+     mp_size_t usize;
+     register unsigned int cnt;
+#endif
+{
+  register mp_limb_t high_limb, low_limb;
+  register unsigned sh_1, sh_2;
+  register mp_size_t i;
+  mp_limb_t retval;
+
+#ifdef DEBUG
+  if (usize == 0 || cnt == 0)
+    abort ();
+#endif
+
+  sh_1 = cnt;
+
+#if 0
+  if (sh_1 == 0)
+    {
+      if (wp != up)
+	{
+	  /* Copy from low end to high end, to allow specified input/output
+	     overlapping.  */
+	  for (i = 0; i < usize; i++)
+	    wp[i] = up[i];
+	}
+      return usize;
+    }
+#endif
+
+  wp -= 1;
+  sh_2 = BITS_PER_MP_LIMB - sh_1;
+  high_limb = up[0];
+  retval = high_limb << sh_2;
+  low_limb = high_limb;
+
+  for (i = 1; i < usize; i++)
+    {
+      high_limb = up[i];
+      wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+      low_limb = high_limb;
+    }
+  wp[i] = low_limb >> sh_1;
+
+  return retval;
+}
diff --git a/mpn/generic/scan0.c b/mpn/generic/scan0.c
new file mode 100644
index 000000000..84b197ce2
--- /dev/null
+++ b/mpn/generic/scan0.c
@@ -0,0 +1,62 @@
+/* mpn_scan0 -- Scan from a given bit position for the next clear bit.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Design issues:
+   1. What if starting_bit is not within U?  Caller's problem?
+   2. Bit index should be 'unsigned'?
+
+   Argument constraints:
+   1. U must sooner ot later have a limb != 1.
+ */
+
+unsigned long int
+#if __STDC__
+mpn_scan0 (register mp_srcptr up,
+	   register unsigned long int starting_bit)
+#else
+mpn_scan0 (up, starting_bit)
+     register mp_srcptr up;
+     register unsigned long int starting_bit;
+#endif
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / BITS_PER_MP_LIMB;
+  p = up + starting_word;
+  alimb = ~*p++;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB);
+
+  while (alimb == 0)
+    alimb = ~*p++;
+
+  count_leading_zeros (cnt, alimb & -alimb);
+  return (p - up) * BITS_PER_MP_LIMB - 1 - cnt;
+}
diff --git a/mpn/generic/scan1.c b/mpn/generic/scan1.c
new file mode 100644
index 000000000..c95d090da
--- /dev/null
+++ b/mpn/generic/scan1.c
@@ -0,0 +1,62 @@
+/* mpn_scan1 -- Scan from a given bit position for the next set bit.
+
+Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Design issues:
+   1. What if starting_bit is not within U?  Caller's problem?
+   2. Bit index should be 'unsigned'?
+
+   Argument constraints:
+   1. U must sooner ot later have a limb != 0.
+ */
+
+unsigned long int
+#if __STDC__
+mpn_scan1 (register mp_srcptr up,
+	   register unsigned long int starting_bit)
+#else
+mpn_scan1 (up, starting_bit)
+     register mp_srcptr up;
+     register unsigned long int starting_bit;
+#endif
+{
+  mp_size_t starting_word;
+  mp_limb_t alimb;
+  int cnt;
+  mp_srcptr p;
+
+  /* Start at the word implied by STARTING_BIT.  */
+  starting_word = starting_bit / BITS_PER_MP_LIMB;
+  p = up + starting_word;
+  alimb = *p++;
+
+  /* Mask off any bits before STARTING_BIT in the first limb.  */
+  alimb &= - (mp_limb_t) 1 << (starting_bit % BITS_PER_MP_LIMB);
+
+  while (alimb == 0)
+    alimb = *p++;
+
+  count_leading_zeros (cnt, alimb & -alimb);
+  return (p - up) * BITS_PER_MP_LIMB - 1 - cnt;
+}
diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c
new file mode 100644
index 000000000..154b4b096
--- /dev/null
+++ b/mpn/generic/set_str.c
@@ -0,0 +1,154 @@
+/* mpn_set_str (mp_ptr res_ptr, const char *str, size_t str_len, int base)
+   -- Convert a STR_LEN long base BASE byte string pointed to by STR to a
+   limb vector pointed to by RES_PTR.  Return the number of limbs in
+   RES_PTR.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_size_t
+mpn_set_str (xp, str, str_len, base)
+     mp_ptr xp;
+     const unsigned char *str;
+     size_t str_len;
+     int base;
+{
+  mp_size_t size;
+  mp_limb_t big_base;
+  int indigits_per_limb;
+  mp_limb_t res_digit;
+
+  big_base = __mp_bases[base].big_base;
+  indigits_per_limb = __mp_bases[base].chars_per_limb;
+
+/*   size = str_len / indigits_per_limb + 1;  */
+
+  size = 0;
+
+  if ((base & (base - 1)) == 0)
+    {
+      /* The base is a power of 2.  Read the input string from
+	 least to most significant character/digit.  */
+
+      const unsigned char *s;
+      int next_bitpos;
+      int bits_per_indigit = big_base;
+
+      res_digit = 0;
+      next_bitpos = 0;
+
+      for (s = str + str_len - 1; s >= str; s--)
+	{
+	  int inp_digit = *s;
+
+	  res_digit |= (mp_limb_t) inp_digit << next_bitpos;
+	  next_bitpos += bits_per_indigit;
+	  if (next_bitpos >= BITS_PER_MP_LIMB)
+	    {
+	      xp[size++] = res_digit;
+	      next_bitpos -= BITS_PER_MP_LIMB;
+	      res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
+	    }
+	}
+
+      if (res_digit != 0)
+	xp[size++] = res_digit;
+    }
+  else
+    {
+      /* General case.  The base is not a power of 2.  */
+
+      size_t i;
+      int j;
+      mp_limb_t cy_limb;
+
+      for (i = indigits_per_limb; i < str_len; i += indigits_per_limb)
+	{
+	  res_digit = *str++;
+	  if (base == 10)
+	    { /* This is a common case.
+		 Help the compiler to avoid multiplication.  */
+	      for (j = 1; j < indigits_per_limb; j++)
+		res_digit = res_digit * 10 + *str++;
+	    }
+	  else
+	    {
+	      for (j = 1; j < indigits_per_limb; j++)
+		res_digit = res_digit * base + *str++;
+	    }
+
+	  if (size == 0)
+	    {
+	      if (res_digit != 0)
+		{
+		  xp[0] = res_digit;
+		  size = 1;
+		}
+	    }
+	  else
+	    {
+	      cy_limb = mpn_mul_1 (xp, xp, size, big_base);
+	      cy_limb += mpn_add_1 (xp, xp, size, res_digit);
+	      if (cy_limb != 0)
+		xp[size++] = cy_limb;
+	    }
+	}
+
+      big_base = base;
+      res_digit = *str++;
+      if (base == 10)
+	{ /* This is a common case.
+	     Help the compiler to avoid multiplication.  */
+	  for (j = 1; j < str_len - (i - indigits_per_limb); j++)
+	    {
+	      res_digit = res_digit * 10 + *str++;
+	      big_base *= 10;
+	    }
+	}
+      else
+	{
+	  for (j = 1; j < str_len - (i - indigits_per_limb); j++)
+	    {
+	      res_digit = res_digit * base + *str++;
+	      big_base *= base;
+	    }
+	}
+      
+      if (size == 0)
+	{
+	  if (res_digit != 0)
+	    {
+	      xp[0] = res_digit;
+	      size = 1;
+	    }
+	}
+      else
+	{
+	  cy_limb = mpn_mul_1 (xp, xp, size, big_base);
+	  cy_limb += mpn_add_1 (xp, xp, size, res_digit);
+	  if (cy_limb != 0)
+	    xp[size++] = cy_limb;
+	}
+    }
+
+  return size;
+}
diff --git a/mpn/generic/sqrtrem.c b/mpn/generic/sqrtrem.c
new file mode 100644
index 000000000..539480d37
--- /dev/null
+++ b/mpn/generic/sqrtrem.c
@@ -0,0 +1,498 @@
+/* mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
+
+   Write the square root of {OP_PTR, OP_SIZE} at ROOT_PTR.
+   Write the remainder at REM_PTR, if REM_PTR != NULL.
+   Return the size of the remainder.
+   (The size of the root is always half of the size of the operand.)
+
+   OP_PTR and ROOT_PTR may not point to the same object.
+   OP_PTR and REM_PTR may point to the same object.
+
+   If REM_PTR is NULL, only the root is computed and the return value of
+   the function is 0 if OP is a perfect square, and *any* non-zero number
+   otherwise.
+
+Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* This code is just correct if "unsigned char" has at least 8 bits.  It
+   doesn't help to use CHAR_BIT from limits.h, as the real problem is
+   the static arrays.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Square root algorithm:
+
+   1. Shift OP (the input) to the left an even number of bits s.t. there
+      are an even number of words and either (or both) of the most
+      significant bits are set.  This way, sqrt(OP) has exactly half as
+      many words as OP, and has its most significant bit set.
+
+   2. Get a 9-bit approximation to sqrt(OP) using the pre-computed tables.
+      This approximation is used for the first single-precision
+      iterations of Newton's method, yielding a full-word approximation
+      to sqrt(OP).
+
+   3. Perform multiple-precision Newton iteration until we have the
+      exact result.  Only about half of the input operand is used in
+      this calculation, as the square root is perfectly determinable
+      from just the higher half of a number.  */
+
+/* Define this macro for IEEE P854 machines with a fast sqrt instruction.  */
+#if defined __GNUC__ && ! defined __SOFT_FLOAT
+
+#if defined __sparc__
+#define SQRT(a) \
+  ({									\
+    double __sqrt_res;							\
+    asm ("fsqrtd %1,%0" : "=f" (__sqrt_res) : "f" (a));			\
+    __sqrt_res;								\
+  })
+#endif
+
+#if defined __HAVE_68881__
+#define SQRT(a) \
+  ({									\
+    double __sqrt_res;							\
+    asm ("fsqrtx %1,%0" : "=f" (__sqrt_res) : "f" (a));			\
+    __sqrt_res;								\
+  })
+#endif
+
+#if defined __hppa
+#define SQRT(a) \
+  ({									\
+    double __sqrt_res;							\
+    asm ("fsqrt,dbl %1,%0" : "=fx" (__sqrt_res) : "fx" (a));		\
+    __sqrt_res;								\
+  })
+#endif
+
+#if defined _ARCH_PWR2
+#define SQRT(a) \
+  ({									\
+    double __sqrt_res;							\
+    asm ("fsqrt %0,%1" : "=f" (__sqrt_res) : "f" (a));			\
+    __sqrt_res;								\
+  })
+#endif
+
+#endif
+
+#ifndef SQRT
+
+/* Tables for initial approximation of the square root.  These are
+   indexed with bits 1-8 of the operand for which the square root is
+   calculated, where bit 0 is the most significant non-zero bit.  I.e.
+   the most significant one-bit is not used, since that per definition
+   is one.  Likewise, the tables don't return the highest bit of the
+   result.  That bit must be inserted by or:ing the returned value with
+   0x100.  This way, we get a 9-bit approximation from 8-bit tables!  */
+
+/* Table to be used for operands with an even total number of bits.
+   (Exactly as in the decimal system there are similarities between the
+   square root of numbers with the same initial digits and an even
+   difference in the total number of digits.  Consider the square root
+   of 1, 10, 100, 1000, ...)  */
+static unsigned char even_approx_tab[256] =
+{
+  0x6a, 0x6a, 0x6b, 0x6c, 0x6c, 0x6d, 0x6e, 0x6e,
+  0x6f, 0x70, 0x71, 0x71, 0x72, 0x73, 0x73, 0x74,
+  0x75, 0x75, 0x76, 0x77, 0x77, 0x78, 0x79, 0x79,
+  0x7a, 0x7b, 0x7b, 0x7c, 0x7d, 0x7d, 0x7e, 0x7f,
+  0x80, 0x80, 0x81, 0x81, 0x82, 0x83, 0x83, 0x84,
+  0x85, 0x85, 0x86, 0x87, 0x87, 0x88, 0x89, 0x89,
+  0x8a, 0x8b, 0x8b, 0x8c, 0x8d, 0x8d, 0x8e, 0x8f,
+  0x8f, 0x90, 0x90, 0x91, 0x92, 0x92, 0x93, 0x94,
+  0x94, 0x95, 0x96, 0x96, 0x97, 0x97, 0x98, 0x99,
+  0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9c, 0x9d, 0x9e,
+  0x9e, 0x9f, 0xa0, 0xa0, 0xa1, 0xa1, 0xa2, 0xa3,
+  0xa3, 0xa4, 0xa4, 0xa5, 0xa6, 0xa6, 0xa7, 0xa7,
+  0xa8, 0xa9, 0xa9, 0xaa, 0xaa, 0xab, 0xac, 0xac,
+  0xad, 0xad, 0xae, 0xaf, 0xaf, 0xb0, 0xb0, 0xb1,
+  0xb2, 0xb2, 0xb3, 0xb3, 0xb4, 0xb5, 0xb5, 0xb6,
+  0xb6, 0xb7, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xba,
+  0xbb, 0xbb, 0xbc, 0xbd, 0xbd, 0xbe, 0xbe, 0xbf,
+  0xc0, 0xc0, 0xc1, 0xc1, 0xc2, 0xc2, 0xc3, 0xc3,
+  0xc4, 0xc5, 0xc5, 0xc6, 0xc6, 0xc7, 0xc7, 0xc8,
+  0xc9, 0xc9, 0xca, 0xca, 0xcb, 0xcb, 0xcc, 0xcc,
+  0xcd, 0xce, 0xce, 0xcf, 0xcf, 0xd0, 0xd0, 0xd1,
+  0xd1, 0xd2, 0xd3, 0xd3, 0xd4, 0xd4, 0xd5, 0xd5,
+  0xd6, 0xd6, 0xd7, 0xd7, 0xd8, 0xd9, 0xd9, 0xda,
+  0xda, 0xdb, 0xdb, 0xdc, 0xdc, 0xdd, 0xdd, 0xde,
+  0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe1, 0xe2, 0xe2,
+  0xe3, 0xe3, 0xe4, 0xe4, 0xe5, 0xe5, 0xe6, 0xe6,
+  0xe7, 0xe7, 0xe8, 0xe8, 0xe9, 0xea, 0xea, 0xeb,
+  0xeb, 0xec, 0xec, 0xed, 0xed, 0xee, 0xee, 0xef,
+  0xef, 0xf0, 0xf0, 0xf1, 0xf1, 0xf2, 0xf2, 0xf3,
+  0xf3, 0xf4, 0xf4, 0xf5, 0xf5, 0xf6, 0xf6, 0xf7,
+  0xf7, 0xf8, 0xf8, 0xf9, 0xf9, 0xfa, 0xfa, 0xfb,
+  0xfb, 0xfc, 0xfc, 0xfd, 0xfd, 0xfe, 0xfe, 0xff,
+};
+
+/* Table to be used for operands with an odd total number of bits.
+   (Further comments before previous table.)  */
+static unsigned char odd_approx_tab[256] =
+{
+  0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03,
+  0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07,
+  0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b,
+  0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f,
+  0x0f, 0x10, 0x10, 0x10, 0x11, 0x11, 0x12, 0x12,
+  0x13, 0x13, 0x14, 0x14, 0x15, 0x15, 0x16, 0x16,
+  0x16, 0x17, 0x17, 0x18, 0x18, 0x19, 0x19, 0x1a,
+  0x1a, 0x1b, 0x1b, 0x1b, 0x1c, 0x1c, 0x1d, 0x1d,
+  0x1e, 0x1e, 0x1f, 0x1f, 0x20, 0x20, 0x20, 0x21,
+  0x21, 0x22, 0x22, 0x23, 0x23, 0x23, 0x24, 0x24,
+  0x25, 0x25, 0x26, 0x26, 0x27, 0x27, 0x27, 0x28,
+  0x28, 0x29, 0x29, 0x2a, 0x2a, 0x2a, 0x2b, 0x2b,
+  0x2c, 0x2c, 0x2d, 0x2d, 0x2d, 0x2e, 0x2e, 0x2f,
+  0x2f, 0x30, 0x30, 0x30, 0x31, 0x31, 0x32, 0x32,
+  0x32, 0x33, 0x33, 0x34, 0x34, 0x35, 0x35, 0x35,
+  0x36, 0x36, 0x37, 0x37, 0x37, 0x38, 0x38, 0x39,
+  0x39, 0x39, 0x3a, 0x3a, 0x3b, 0x3b, 0x3b, 0x3c,
+  0x3c, 0x3d, 0x3d, 0x3d, 0x3e, 0x3e, 0x3f, 0x3f,
+  0x40, 0x40, 0x40, 0x41, 0x41, 0x41, 0x42, 0x42,
+  0x43, 0x43, 0x43, 0x44, 0x44, 0x45, 0x45, 0x45,
+  0x46, 0x46, 0x47, 0x47, 0x47, 0x48, 0x48, 0x49,
+  0x49, 0x49, 0x4a, 0x4a, 0x4b, 0x4b, 0x4b, 0x4c,
+  0x4c, 0x4c, 0x4d, 0x4d, 0x4e, 0x4e, 0x4e, 0x4f,
+  0x4f, 0x50, 0x50, 0x50, 0x51, 0x51, 0x51, 0x52,
+  0x52, 0x53, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55,
+  0x55, 0x56, 0x56, 0x56, 0x57, 0x57, 0x57, 0x58,
+  0x58, 0x59, 0x59, 0x59, 0x5a, 0x5a, 0x5a, 0x5b,
+  0x5b, 0x5b, 0x5c, 0x5c, 0x5d, 0x5d, 0x5d, 0x5e,
+  0x5e, 0x5e, 0x5f, 0x5f, 0x60, 0x60, 0x60, 0x61,
+  0x61, 0x61, 0x62, 0x62, 0x62, 0x63, 0x63, 0x63,
+  0x64, 0x64, 0x65, 0x65, 0x65, 0x66, 0x66, 0x66,
+  0x67, 0x67, 0x67, 0x68, 0x68, 0x68, 0x69, 0x69,
+};
+#endif
+
+
+mp_size_t
+#if __STDC__
+mpn_sqrtrem (mp_ptr root_ptr, mp_ptr rem_ptr, mp_srcptr op_ptr, mp_size_t op_size)
+#else
+mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size)
+     mp_ptr root_ptr;
+     mp_ptr rem_ptr;
+     mp_srcptr op_ptr;
+     mp_size_t op_size;
+#endif
+{
+  /* R (root result) */
+  mp_ptr rp;			/* Pointer to least significant word */
+  mp_size_t rsize;		/* The size in words */
+
+  /* T (OP shifted to the left a.k.a. normalized) */
+  mp_ptr tp;			/* Pointer to least significant word */
+  mp_size_t tsize;		/* The size in words */
+  mp_ptr t_end_ptr;		/* Pointer right beyond most sign. word */
+  mp_limb_t t_high0, t_high1;	/* The two most significant words */
+
+  /* TT (temporary for numerator/remainder) */
+  mp_ptr ttp;			/* Pointer to least significant word */
+
+  /* X (temporary for quotient in main loop) */
+  mp_ptr xp;			/* Pointer to least significant word */
+  mp_size_t xsize;		/* The size in words */
+
+  unsigned cnt;
+  mp_limb_t initial_approx;	/* Initially made approximation */
+  mp_size_t tsizes[BITS_PER_MP_LIMB];	/* Successive calculation precisions */
+  mp_size_t tmp;
+  mp_size_t i;
+
+  mp_limb_t cy_limb;
+  TMP_DECL (marker);
+
+  /* If OP is zero, both results are zero.  */
+  if (op_size == 0)
+    return 0;
+
+  count_leading_zeros (cnt, op_ptr[op_size - 1]);
+  tsize = op_size;
+  if ((tsize & 1) != 0)
+    {
+      cnt += BITS_PER_MP_LIMB;
+      tsize++;
+    }
+
+  rsize = tsize / 2;
+  rp = root_ptr;
+
+  TMP_MARK (marker);
+
+  /* Shift OP an even number of bits into T, such that either the most or
+     the second most significant bit is set, and such that the number of
+     words in T becomes even.  This way, the number of words in R=sqrt(OP)
+     is exactly half as many as in OP, and the most significant bit of R
+     is set.
+
+     Also, the initial approximation is simplified by this up-shifted OP.
+
+     Finally, the Newtonian iteration which is the main part of this
+     program performs division by R.  The fast division routine expects
+     the divisor to be "normalized" in exactly the sense of having the
+     most significant bit set.  */
+
+  tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+
+  if ((cnt & ~1) % BITS_PER_MP_LIMB != 0)
+    t_high0 = mpn_lshift (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size,
+			  (cnt & ~1) % BITS_PER_MP_LIMB);
+  else
+    MPN_COPY (tp + cnt / BITS_PER_MP_LIMB, op_ptr, op_size);
+
+  if (cnt >= BITS_PER_MP_LIMB)
+    tp[0] = 0;
+
+  t_high0 = tp[tsize - 1];
+  t_high1 = tp[tsize - 2];	/* Never stray.  TSIZE is >= 2.  */
+
+/* Is there a fast sqrt instruction defined for this machine?  */
+#ifdef SQRT
+  {
+    initial_approx = SQRT (t_high0 * 2.0
+			   * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1))
+			   + t_high1);
+    /* If t_high0,,t_high1 is big, the result in INITIAL_APPROX might have
+       become incorrect due to overflow in the conversion from double to
+       mp_limb_t above.  It will typically be zero in that case, but might be
+       a small number on some machines.  The most significant bit of
+       INITIAL_APPROX should be set, so that bit is a good overflow
+       indication.  */
+    if ((mp_limb_signed_t) initial_approx >= 0)
+      initial_approx = ~(mp_limb_t)0;
+  }
+#else
+  /* Get a 9 bit approximation from the tables.  The tables expect to
+     be indexed with the 8 high bits right below the highest bit.
+     Also, the highest result bit is not returned by the tables, and
+     must be or:ed into the result.  The scheme gives 9 bits of start
+     approximation with just 256-entry 8 bit tables.  */
+
+  if ((cnt & 1) == 0)
+    {
+      /* The most sign bit of t_high0 is set.  */
+      initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 1);
+      initial_approx &= 0xff;
+      initial_approx = even_approx_tab[initial_approx];
+    }
+  else
+    {
+      /* The most significant bit of T_HIGH0 is unset,
+	 the second most significant is set.  */
+      initial_approx = t_high0 >> (BITS_PER_MP_LIMB - 8 - 2);
+      initial_approx &= 0xff;
+      initial_approx = odd_approx_tab[initial_approx];
+    }
+  initial_approx |= 0x100;
+  initial_approx <<= BITS_PER_MP_LIMB - 8 - 1;
+
+  /* Perform small precision Newtonian iterations to get a full word
+     approximation.  For small operands, these iteration will make the
+     entire job.  */
+  if (t_high0 == ~(mp_limb_t)0)
+    initial_approx = t_high0;
+  else
+    {
+      mp_limb_t quot;
+
+      if (t_high0 >= initial_approx)
+	initial_approx = t_high0 + 1;
+
+      /* First get about 18 bits with pure C arithmetics.  */
+      quot = t_high0 / (initial_approx >> BITS_PER_MP_LIMB/2) << BITS_PER_MP_LIMB/2;
+      initial_approx = (initial_approx + quot) / 2;
+      initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+
+      /* Now get a full word by one (or for > 36 bit machines) several
+	 iterations.  */
+      for (i = 16; i < BITS_PER_MP_LIMB; i <<= 1)
+	{
+	  mp_limb_t ignored_remainder;
+
+	  udiv_qrnnd (quot, ignored_remainder,
+		      t_high0, t_high1, initial_approx);
+	  initial_approx = (initial_approx + quot) / 2;
+	  initial_approx |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+	}
+    }
+#endif
+
+  rp[0] = initial_approx;
+  rsize = 1;
+
+#ifdef DEBUG
+	  printf ("\n\nT = ");
+	  mpn_dump (tp, tsize);
+#endif
+
+  if (tsize > 2)
+    {
+      /* Determine the successive precisions to use in the iteration.  We
+	 minimize the precisions, beginning with the highest (i.e. last
+	 iteration) to the lowest (i.e. first iteration).  */
+
+      xp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+      ttp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB);
+
+      t_end_ptr = tp + tsize;
+
+      tmp = tsize / 2;
+      for (i = 0;; i++)
+	{
+	  tsize = (tmp + 1) / 2;
+	  if (tmp == tsize)
+	    break;
+	  tsizes[i] = tsize + tmp;
+	  tmp = tsize;
+	}
+
+      /* Main Newton iteration loop.  For big arguments, most of the
+	 time is spent here.  */
+
+      /* It is possible to do a great optimization here.  The successive
+	 divisors in the mpn_divmod call below has more and more leading
+	 words equal to its predecessor.  Therefore the beginning of
+	 each division will repeat the same work as did the last
+	 division.  If we could guarantee that the leading words of two
+	 consecutive divisors are the same (i.e. in this case, a later
+	 divisor has just more digits at the end) it would be a simple
+	 matter of just using the old remainder of the last division in
+	 a subsequent division, to take care of this optimization.  This
+	 idea would surely make a difference even for small arguments.  */
+
+      /* Loop invariants:
+
+	 R <= shiftdown_to_same_size(floor(sqrt(OP))) < R + 1.
+	 X - 1 < shiftdown_to_same_size(floor(sqrt(OP))) <= X.
+	 R <= shiftdown_to_same_size(X).  */
+
+      while (--i >= 0)
+	{
+	  mp_limb_t cy;
+#ifdef DEBUG
+	  mp_limb_t old_least_sign_r = rp[0];
+	  mp_size_t old_rsize = rsize;
+
+	  printf ("R = ");
+	  mpn_dump (rp, rsize);
+#endif
+	  tsize = tsizes[i];
+
+	  /* Need to copy the numerator into temporary space, as
+	     mpn_divmod overwrites its numerator argument with the
+	     remainder (which we currently ignore).  */
+	  MPN_COPY (ttp, t_end_ptr - tsize, tsize);
+	  cy = mpn_divmod (xp, ttp, tsize, rp, rsize);
+	  xsize = tsize - rsize;
+
+#ifdef DEBUG
+	  printf ("X =%d ", cy);
+	  mpn_dump (xp, xsize);
+#endif
+
+	  /* Add X and R with the most significant limbs aligned,
+	     temporarily ignoring at least one limb at the low end of X.  */
+	  tmp = xsize - rsize;
+	  cy += mpn_add_n (xp + tmp, rp, xp + tmp, rsize);
+
+	  /* If T begins with more than 2 x BITS_PER_MP_LIMB of ones, we get
+	     intermediate roots that'd need an extra bit.  We don't want to
+	     handle that since it would make the subsequent divisor
+	     non-normalized, so round such roots down to be only ones in the
+	     current precision.  */
+	  if (cy == 2)
+	    {
+	      mp_size_t j;
+	      for (j = xsize; j >= 0; j--)
+		xp[j] = ~(mp_limb_t)0;
+	    }
+
+	  /* Divide X by 2 and put the result in R.  This is the new
+	     approximation.  Shift in the carry from the addition.  */
+	  mpn_rshift (rp, xp, xsize, 1);
+	  rp[xsize - 1] |= ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1));
+	  rsize = xsize;
+#ifdef DEBUG
+	  if (old_least_sign_r != rp[rsize - old_rsize])
+	    printf (">>>>>>>> %d: %0*lX, %0*lX <<<<<<<<\n",
+		    i, 2 * BYTES_PER_MP_LIMB, old_least_sign_r,
+		    2 * BYTES_PER_MP_LIMB, rp[rsize - old_rsize]);
+#endif
+	}
+    }
+
+#ifdef DEBUG
+  printf ("(final) R = ");
+  mpn_dump (rp, rsize);
+#endif
+
+  /* We computed the square root of OP * 2**(2*floor(cnt/2)).
+     This has resulted in R being 2**floor(cnt/2) to large.
+     Shift it down here to fix that.  */
+  if (cnt / 2 != 0)
+    {
+      mpn_rshift (rp, rp, rsize, cnt/2);
+      rsize -= rp[rsize - 1] == 0;
+    }
+
+  /* Calculate the remainder.  */
+  mpn_mul_n (tp, rp, rp, rsize);
+  tsize = rsize + rsize;
+  tsize -= tp[tsize - 1] == 0;
+  if (op_size < tsize
+      || (op_size == tsize && mpn_cmp (op_ptr, tp, op_size) < 0))
+    {
+      /* R is too large.  Decrement it.  */
+
+      /* These operations can't overflow.  */
+      cy_limb  = mpn_sub_n (tp, tp, rp, rsize);
+      cy_limb += mpn_sub_n (tp, tp, rp, rsize);
+      mpn_sub_1 (tp + rsize, tp + rsize, tsize - rsize, cy_limb);
+      mpn_add_1 (tp, tp, tsize, (mp_limb_t) 1);
+
+      mpn_sub_1 (rp, rp, rsize, (mp_limb_t) 1);
+
+#ifdef DEBUG
+      printf ("(adjusted) R = ");
+      mpn_dump (rp, rsize);
+#endif
+    }
+
+  if (rem_ptr != NULL)
+    {
+      cy_limb = mpn_sub (rem_ptr, op_ptr, op_size, tp, tsize);
+      MPN_NORMALIZE (rem_ptr, op_size);
+      TMP_FREE (marker);
+      return op_size;
+    }
+  else
+    {
+      int res;
+      res = op_size != tsize || mpn_cmp (op_ptr, tp, op_size);
+      TMP_FREE (marker);
+      return res;
+    }
+}
diff --git a/mpn/generic/sub_n.c b/mpn/generic/sub_n.c
new file mode 100644
index 000000000..9d4b21675
--- /dev/null
+++ b/mpn/generic/sub_n.c
@@ -0,0 +1,62 @@
+/* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+#if __STDC__
+mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     register mp_srcptr s2_ptr;
+     mp_size_t size;
+#endif
+{
+  register mp_limb_t x, y, cy;
+  register mp_size_t j;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  s2_ptr -= j;
+  res_ptr -= j;
+
+  cy = 0;
+  do
+    {
+      y = s2_ptr[j];
+      x = s1_ptr[j];
+      y += cy;			/* add previous carry to subtrahend */
+      cy = (y < cy);		/* get out carry from that addition */
+      y = x - y;		/* main subtract */
+      cy = (y > x) + cy;	/* get out carry from the subtract, combine */
+      res_ptr[j] = y;
+    }
+  while (++j != 0);
+
+  return cy;
+}
diff --git a/mpn/generic/submul_1.c b/mpn/generic/submul_1.c
new file mode 100644
index 000000000..b144283bd
--- /dev/null
+++ b/mpn/generic/submul_1.c
@@ -0,0 +1,65 @@
+/* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+   by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
+   from the limb vector pointed to by RES_PTR.  Return the most significant
+   limb of the product, adjusted for carry-out from the subtraction.
+
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     mp_size_t s1_size;
+     register mp_limb_t s2_limb;
+{
+  register mp_limb_t cy_limb;
+  register mp_size_t j;
+  register mp_limb_t prod_high, prod_low;
+  register mp_limb_t x;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -s1_size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  res_ptr -= j;
+  s1_ptr -= j;
+
+  cy_limb = 0;
+  do
+    {
+      umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+      prod_low += cy_limb;
+      cy_limb = (prod_low < cy_limb) + prod_high;
+
+      x = res_ptr[j];
+      prod_low = x - prod_low;
+      cy_limb += (prod_low > x);
+      res_ptr[j] = prod_low;
+    }
+  while (++j != 0);
+
+  return cy_limb;
+}
diff --git a/mpn/generic/udiv_w_sdiv.c b/mpn/generic/udiv_w_sdiv.c
new file mode 100644
index 000000000..d9e71b78a
--- /dev/null
+++ b/mpn/generic/udiv_w_sdiv.c
@@ -0,0 +1,125 @@
+/* mpn_udiv_w_sdiv -- implement udiv_qrnnd on machines with only signed
+   division.
+
+   Contributed by Peter L. Montgomery.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+mp_limb_t
+mpn_udiv_w_sdiv (rp, a1, a0, d)
+     mp_limb_t *rp, a1, a0, d;
+{
+  mp_limb_t q, r;
+  mp_limb_t c0, c1, b1;
+
+  if ((mp_limb_signed_t) d >= 0)
+    {
+      if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
+	{
+	  /* dividend, divisor, and quotient are nonnegative */
+	  sdiv_qrnnd (q, r, a1, a0, d);
+	}
+      else
+	{
+	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
+	  /* Divide (c1*2^32 + c0) by d */
+	  sdiv_qrnnd (q, r, c1, c0, d);
+	  /* Add 2^31 to quotient */
+	  q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+	}
+    }
+  else
+    {
+      b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
+      c1 = a1 >> 1;			/* A/2 */
+      c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
+
+      if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
+	{
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  r = 2*r + (a0 & 1);		/* Remainder from A/(2*b1) */
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else if (c1 < b1)			/* So 2^31 <= (A/2)/b1 < 2^32 */
+	{
+	  c1 = (b1 - 1) - c1;
+	  c0 = ~c0;			/* logical NOT */
+
+	  sdiv_qrnnd (q, r, c1, c0, b1); /* (A/2) / (d/2) */
+
+	  q = ~q;			/* (A/2)/b1 */
+	  r = (b1 - 1) - r;
+
+	  r = 2*r + (a0 & 1);		/* A/(2*b1) */
+
+	  if ((d & 1) != 0)
+	    {
+	      if (r >= q)
+		r = r - q;
+	      else if (q - r <= d)
+		{
+		  r = r - q + d;
+		  q--;
+		}
+	      else
+		{
+		  r = r - q + 2*d;
+		  q -= 2;
+		}
+	    }
+	}
+      else				/* Implies c1 = b1 */
+	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
+	  if (a0 >= -d)
+	    {
+	      q = -1;
+	      r = a0 + d;
+	    }
+	  else
+	    {
+	      q = -2;
+	      r = a0 + 2*d;
+	    }
+	}
+    }
+
+  *rp = r;
+  return q;
+}
diff --git a/mpn/hppa/README b/mpn/hppa/README
new file mode 100644
index 000000000..5a2d5fd97
--- /dev/null
+++ b/mpn/hppa/README
@@ -0,0 +1,84 @@
+This directory contains mpn functions for various HP PA-RISC chips.  Code
+that runs faster on the PA7100 and later implementations, is in the pa7100
+directory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+  Load and Store timing
+
+On the PA7000 no memory instructions can issue the two cycles after a store.
+For the PA7100, this is reduced to one cycle.
+
+The PA7100 has a lookup-free cache, so it helps to schedule loads and the
+dependent instruction really far from each other.
+
+STATUS
+
+1. mpn_mul_1 could be improved to 6.5 cycles/limb on the PA7100, using the
+   instructions bwlow (but some sw pipelining is needed to avoid the
+   xmpyu-fstds delay):
+
+	fldds	s1_ptr
+
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+
+	addib	Loop
+
+2. mpn_addmul_1 could be improved from the current 10 to 7.5 cycles/limb
+   (asymptotically) on the PA7100, using the instructions below.  With proper
+   sw pipelining and the unrolling level below, the speed becomes 8
+   cycles/limb.
+
+	fldds	s1_ptr
+	fldds	s1_ptr
+
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+	xmpyu
+	fstds	N(%r30)
+
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	ldws	N(%r30)
+	addc
+	addc
+	addc
+	addc
+	addc	%r0,%r0,cy-limb
+
+	ldws	res_ptr
+	ldws	res_ptr
+	ldws	res_ptr
+	ldws	res_ptr
+	add
+	stws	res_ptr
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+	addc
+	stws	res_ptr
+
+	addib
diff --git a/mpn/hppa/add_n.s b/mpn/hppa/add_n.s
new file mode 100644
index 000000000..b4a142836
--- /dev/null
+++ b/mpn/hppa/add_n.s
@@ -0,0 +1,58 @@
+; HP-PA  __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s1_ptr	gr25
+; s2_ptr	gr24
+; size		gr23
+
+; One might want to unroll this as for other processors, but it turns
+; out that the data cache contention after a store makes such
+; unrolling useless.  We can't come under 5 cycles/limb anyway.
+
+	.code
+	.export		__mpn_add_n
+__mpn_add_n
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,=		-1,%r23,L$end	; check for (SIZE == 1)
+	 add		%r20,%r19,%r28	; add first limbs ignoring cy
+
+L$loop	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,<>	-1,%r23,L$loop
+	 addc		%r20,%r19,%r28
+
+L$end	stws		%r28,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r0,%r28
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/addmul_1.s b/mpn/hppa/hppa1_1/addmul_1.s
new file mode 100644
index 000000000..0fdcb3cb2
--- /dev/null
+++ b/mpn/hppa/hppa1_1/addmul_1.s
@@ -0,0 +1,102 @@
+; HP-PA-1.1 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r26
+; s1_ptr	r25
+; size		r24
+; s2_limb	r23
+
+; This runs at 11 cycles/limb on a PA7000.  With the used instructions, it
+; can not become faster due to data cache contention after a store.  On the
+; PA7100 it runs at 10 cycles/limb, and that can not be improved either,
+; since only the xmpyu does not need the integer pipeline, so the only
+; dual-issue we will get are addc+xmpyu.  Unrolling could gain a cycle/limb
+; on the PA7100.
+
+; There are some ideas described in mul_1.s that applies to this code too.
+
+	.code
+	.export		__mpn_addmul_1
+__mpn_addmul_1
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		; move s2_limb ...
+	addib,=		-1,%r24,L$just_one_limb
+	 fldws		-16(%r30),%fr4		; ... into fr4
+	add		%r0,%r0,%r0		; clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		; least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,=		-1,%r24,L$end
+	 ldw		-12(%r30),%r1
+
+; Main loop
+L$loop	ldws		0(%r26),%r29
+	fldws,ma	4(%r25),%fr5
+	add		%r29,%r19,%r19
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addc		%r0,%r28,%r28
+	addib,<>	-1,%r24,L$loop
+	 ldw		-12(%r30),%r1
+
+L$end	ldw		0(%r26),%r29
+	add		%r29,%r19,%r19
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	ldws		0(%r26),%r29
+	addc		%r0,%r28,%r28
+	add		%r29,%r19,%r19
+	stws,ma		%r19,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+L$just_one_limb
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		0(%r26),%r29
+	fstds		%fr6,-16(%r30)
+	ldw		-12(%r30),%r1
+	ldw		-16(%r30),%r28
+	add		%r29,%r1,%r19
+	stw		%r19,0(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/mul_1.s b/mpn/hppa/hppa1_1/mul_1.s
new file mode 100644
index 000000000..cdd0c1d7f
--- /dev/null
+++ b/mpn/hppa/hppa1_1/mul_1.s
@@ -0,0 +1,98 @@
+; HP-PA-1.1 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+; the result in a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r26
+; s1_ptr	r25
+; size		r24
+; s2_limb	r23
+
+; This runs at 9 cycles/limb on a PA7000.  With the used instructions, it can
+; not become faster due to data cache contention after a store.  On the
+; PA7100 it runs at 7 cycles/limb, and that can not be improved either, since
+; only the xmpyu does not need the integer pipeline, so the only dual-issue
+; we will get are addc+xmpyu.  Unrolling would not help either CPU.
+
+; We could use fldds to read two limbs at a time from the S1 array, and that
+; could bring down the times to 8.5 and 6.5 cycles/limb for the PA7000 and
+; PA7100, respectively.  We don't do that since it does not seem worth the
+; (alignment) troubles...
+
+; At least the PA7100 is rumored to be able to deal with cache-misses
+; without stalling instruction issue.  If this is true, and the cache is
+; actually also lockup-free, we should use a deeper software pipeline, and
+; load from S1 very early!  (The loads and stores to -12(sp) will surely be
+; in the cache.)
+
+	.code
+	.export		__mpn_mul_1
+__mpn_mul_1
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		; move s2_limb ...
+	addib,=		-1,%r24,L$just_one_limb
+	 fldws		-16(%r30),%fr4		; ... into fr4
+	add		%r0,%r0,%r0		; clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds	 	%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		; least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,=		-1,%r24,L$end
+	 ldw		-12(%r30),%r1
+
+; Main loop
+L$loop	fldws,ma	4(%r25),%fr5
+	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addib,<>	-1,%r24,L$loop
+	 ldw		-12(%r30),%r1
+
+L$end	stws,ma		%r19,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	stws,ma		%r19,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+L$just_one_limb
+	xmpyu		%fr4,%fr5,%fr6
+	fstds		%fr6,-16(%r30)
+	ldw		-16(%r30),%r28
+	ldo		-64(%r30),%r30
+	bv		0(%r2)
+	 fstws		%fr6R,0(%r26)
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/pa7100/add_n.s b/mpn/hppa/hppa1_1/pa7100/add_n.s
new file mode 100644
index 000000000..21fe16154
--- /dev/null
+++ b/mpn/hppa/hppa1_1/pa7100/add_n.s
@@ -0,0 +1,75 @@
+; HP-PA  __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+; This is optimized for the PA7100, where is runs at 4.25 cycles/limb
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s1_ptr	gr25
+; s2_ptr	gr24
+; size		gr23
+
+	.code
+	.export		__mpn_add_n
+__mpn_add_n
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,<=	-5,%r23,L$rest
+	 add		%r20,%r19,%r28	; add first limbs ignoring cy
+
+L$loop	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addc		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addc		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addc		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-4,%r23,L$loop
+	addc		%r20,%r19,%r28
+
+L$rest	addib,=		4,%r23,L$end
+	nop
+L$eloop	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-1,%r23,L$eloop
+	addc		%r20,%r19,%r28
+
+L$end	stws		%r28,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r0,%r28
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/pa7100/addmul_1.S b/mpn/hppa/hppa1_1/pa7100/addmul_1.S
new file mode 100644
index 000000000..eb1d12bf6
--- /dev/null
+++ b/mpn/hppa/hppa1_1/pa7100/addmul_1.S
@@ -0,0 +1,189 @@
+; HP-PA 7100/7200 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+; add the result to a second limb vector.
+
+; Copyright (C) 1995 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define res_ptr	%r26
+#define s1_ptr	%r25
+#define size	%r24
+#define s2_limb	%r23
+
+#define cylimb	%r28
+#define s0	%r19
+#define s1	%r20
+#define s2	%r3
+#define s3	%r4
+#define lo0	%r21
+#define lo1	%r5
+#define lo2	%r6
+#define lo3	%r7
+#define hi0	%r22
+#define hi1	%r23				/* safe to reuse */
+#define hi2	%r29
+#define hi3	%r1
+
+	.code
+	.export		__mpn_addmul_1
+__mpn_addmul_1
+	.proc
+	.callinfo	frame=128,no_calls
+	.entry
+
+	ldo	128(%r30),%r30
+	stws	s2_limb,-16(%r30)
+	add	 %r0,%r0,cylimb			; clear cy and cylimb
+	addib,<	-4,size,L$few_limbs
+	fldws	-16(%r30),%fr31R
+
+	ldo	-112(%r30),%r31
+	stw	%r3,-96(%r30)
+	stw	%r4,-92(%r30)
+	stw	%r5,-88(%r30)
+	stw	%r6,-84(%r30)
+	stw	%r7,-80(%r30)
+
+	bb,>=,n	 s1_ptr,29,L$0
+
+	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r31)
+	ldws	-16(%r31),cylimb
+	ldws	-12(%r31),lo0
+	add	 s0,lo0,s0
+	addib,< -1,size,L$few_limbs
+	stws,ma	 s0,4(res_ptr)
+
+; start software pipeline ----------------------------------------------------
+L$0	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	xmpyu	 %fr4L,%fr31R,%fr5
+	xmpyu	 %fr4R,%fr31R,%fr6
+	xmpyu	 %fr8L,%fr31R,%fr9
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	fstds	 %fr6,-8(%r31)
+	fstds	 %fr9,0(%r31)
+	fstds	 %fr10,8(%r31)
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	addc	 lo1,hi0,lo1
+	addc	 lo2,hi1,lo2
+	addc	 lo3,hi2,lo3
+
+	addib,<	 -4,size,L$end
+	addc	 %r0,hi3,cylimb			; propagate carry into cylimb
+; main loop ------------------------------------------------------------------
+L$loop	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4L,%fr31R,%fr5
+	ldws	 4(res_ptr),s1
+	xmpyu	 %fr4R,%fr31R,%fr6
+	ldws	 8(res_ptr),s2
+	xmpyu	 %fr8L,%fr31R,%fr9
+	ldws	12(res_ptr),s3
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	add	 s0,lo0,s0
+	fstds	 %fr6,-8(%r31)
+	addc	 s1,lo1,s1
+	fstds	 %fr9,0(%r31)
+	addc	 s2,lo2,s2
+	fstds	 %fr10,8(%r31)
+	addc	 s3,lo3,s3
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	stws,ma	 s0,4(res_ptr)
+	addc	 lo1,hi0,lo1
+	stws,ma	 s1,4(res_ptr)
+	addc	 lo2,hi1,lo2
+	stws,ma	 s2,4(res_ptr)
+	addc	 lo3,hi2,lo3
+	stws,ma	 s3,4(res_ptr)
+
+	addib,>= -4,size,L$loop
+	addc	 %r0,hi3,cylimb			; propagate carry into cylimb
+; finish software pipeline ---------------------------------------------------
+L$end	ldws	 0(res_ptr),s0
+	ldws	 4(res_ptr),s1
+	ldws	 8(res_ptr),s2
+	ldws	12(res_ptr),s3
+
+	add	 s0,lo0,s0
+	stws,ma	 s0,4(res_ptr)
+	addc	 s1,lo1,s1
+	stws,ma	 s1,4(res_ptr)
+	addc	 s2,lo2,s2
+	stws,ma	 s2,4(res_ptr)
+	addc	 s3,lo3,s3
+	stws,ma	 s3,4(res_ptr)
+
+; restore callee-saves registers ---------------------------------------------
+	ldw	-96(%r30),%r3
+	ldw	-92(%r30),%r4
+	ldw	-88(%r30),%r5
+	ldw	-84(%r30),%r6
+	ldw	-80(%r30),%r7
+
+L$few_limbs
+	addib,=,n 4,size,L$ret
+L$loop2	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r30)
+	ldws	-16(%r30),hi0
+	ldws	-12(%r30),lo0
+	addc	 lo0,cylimb,lo0
+	addc	 %r0,hi0,cylimb
+	add	 s0,lo0,s0
+	stws,ma	 s0,4(res_ptr)
+	addib,<> -1,size,L$loop2
+	nop
+
+L$ret	addc	 %r0,cylimb,cylimb
+	bv	 0(%r2)
+	ldo	 -128(%r30),%r30
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/pa7100/lshift.s b/mpn/hppa/hppa1_1/pa7100/lshift.s
new file mode 100644
index 000000000..4c74a505a
--- /dev/null
+++ b/mpn/hppa/hppa1_1/pa7100/lshift.s
@@ -0,0 +1,83 @@
+; HP-PA  __mpn_lshift --
+; This is optimized for the PA7100, where is runs at 3.25 cycles/limb
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s_ptr		gr25
+; size		gr24
+; cnt		gr23
+
+	.code
+	.export		__mpn_lshift
+__mpn_lshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	sh2add		%r24,%r25,%r25
+	sh2add		%r24,%r26,%r26
+	ldws,mb		-4(0,%r25),%r22
+	subi		32,%r23,%r1
+	mtsar		%r1
+	addib,=		-1,%r24,L$0004
+	vshd		%r0,%r22,%r28		; compute carry out limb
+	ldws,mb		-4(0,%r25),%r29
+	addib,<=	-5,%r24,L$rest
+	vshd		%r22,%r29,%r20
+
+L$loop	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	addib,>		-4,%r24,L$loop
+	vshd		%r22,%r29,%r20
+
+L$rest	addib,=		4,%r24,L$end1
+	nop
+L$eloop	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	addib,<=	-1,%r24,L$end2
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	addib,>		-1,%r24,L$eloop
+	vshd		%r22,%r29,%r20
+
+L$end1	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+L$end2	stws,mb		%r20,-4(0,%r26)
+L$0004	vshd		%r22,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/pa7100/rshift.s b/mpn/hppa/hppa1_1/pa7100/rshift.s
new file mode 100644
index 000000000..845418c53
--- /dev/null
+++ b/mpn/hppa/hppa1_1/pa7100/rshift.s
@@ -0,0 +1,80 @@
+; HP-PA  __mpn_rshift --
+; This is optimized for the PA7100, where is runs at 3.25 cycles/limb
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s_ptr		gr25
+; size		gr24
+; cnt		gr23
+
+	.code
+	.export		__mpn_rshift
+__mpn_rshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldws,ma		4(0,%r25),%r22
+	mtsar		%r23
+	addib,=		-1,%r24,L$0004
+	vshd		%r22,%r0,%r28		; compute carry out limb
+	ldws,ma		4(0,%r25),%r29
+	addib,<=	-5,%r24,L$rest
+	vshd		%r29,%r22,%r20
+
+L$loop	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	addib,>		-4,%r24,L$loop
+	vshd		%r29,%r22,%r20
+
+L$rest	addib,=		4,%r24,L$end1
+	nop
+L$eloop	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	addib,<=	-1,%r24,L$end2
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	addib,>		-1,%r24,L$eloop
+	vshd		%r29,%r22,%r20
+
+L$end1	stws,ma		%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+L$end2	stws,ma		%r20,4(0,%r26)
+L$0004	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/pa7100/sub_n.s b/mpn/hppa/hppa1_1/pa7100/sub_n.s
new file mode 100644
index 000000000..1e1ebcf91
--- /dev/null
+++ b/mpn/hppa/hppa1_1/pa7100/sub_n.s
@@ -0,0 +1,76 @@
+; HP-PA  __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+; This is optimized for the PA7100, where is runs at 4.25 cycles/limb
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s1_ptr	gr25
+; s2_ptr	gr24
+; size		gr23
+
+	.code
+	.export		__mpn_sub_n
+__mpn_sub_n
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,<=	-5,%r23,L$rest
+	 sub		%r20,%r19,%r28	; subtract first limbs ignoring cy
+
+L$loop	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	subb		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	subb		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	subb		%r20,%r19,%r28
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-4,%r23,L$loop
+	subb		%r20,%r19,%r28
+
+L$rest	addib,=		4,%r23,L$end
+	nop
+L$eloop	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,>		-1,%r23,L$eloop
+	subb		%r20,%r19,%r28
+
+L$end	stws		%r28,0(0,%r26)
+	addc		%r0,%r0,%r28
+	bv		0(%r2)
+	 subi		1,%r28,%r28
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/pa7100/submul_1.S b/mpn/hppa/hppa1_1/pa7100/submul_1.S
new file mode 100644
index 000000000..a71176e68
--- /dev/null
+++ b/mpn/hppa/hppa1_1/pa7100/submul_1.S
@@ -0,0 +1,195 @@
+; HP-PA 7100/7200 __mpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1995 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+; INPUT PARAMETERS
+#define res_ptr	%r26
+#define s1_ptr	%r25
+#define size	%r24
+#define s2_limb	%r23
+
+#define cylimb	%r28
+#define s0	%r19
+#define s1	%r20
+#define s2	%r3
+#define s3	%r4
+#define lo0	%r21
+#define lo1	%r5
+#define lo2	%r6
+#define lo3	%r7
+#define hi0	%r22
+#define hi1	%r23				/* safe to reuse */
+#define hi2	%r29
+#define hi3	%r1
+
+	.code
+	.export		__mpn_submul_1
+__mpn_submul_1
+	.proc
+	.callinfo	frame=128,no_calls
+	.entry
+
+	ldo	128(%r30),%r30
+	stws	s2_limb,-16(%r30)
+	add	 %r0,%r0,cylimb			; clear cy and cylimb
+	addib,<	-4,size,L$few_limbs
+	fldws	-16(%r30),%fr31R
+
+	ldo	-112(%r30),%r31
+	stw	%r3,-96(%r30)
+	stw	%r4,-92(%r30)
+	stw	%r5,-88(%r30)
+	stw	%r6,-84(%r30)
+	stw	%r7,-80(%r30)
+
+	bb,>=,n	 s1_ptr,29,L$0
+
+	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r31)
+	ldws	-16(%r31),cylimb
+	ldws	-12(%r31),lo0
+	sub	 s0,lo0,s0
+	add	 s0,lo0,%r0			; invert cy
+	addib,< -1,size,L$few_limbs
+	stws,ma	 s0,4(res_ptr)
+
+; start software pipeline ----------------------------------------------------
+L$0	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	xmpyu	 %fr4L,%fr31R,%fr5
+	xmpyu	 %fr4R,%fr31R,%fr6
+	xmpyu	 %fr8L,%fr31R,%fr9
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	fstds	 %fr6,-8(%r31)
+	fstds	 %fr9,0(%r31)
+	fstds	 %fr10,8(%r31)
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	addc	 lo1,hi0,lo1
+	addc	 lo2,hi1,lo2
+	addc	 lo3,hi2,lo3
+
+	addib,<	 -4,size,L$end
+	addc	 %r0,hi3,cylimb			; propagate carry into cylimb
+; main loop ------------------------------------------------------------------
+L$loop	fldds,ma 8(s1_ptr),%fr4
+	fldds,ma 8(s1_ptr),%fr8
+
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4L,%fr31R,%fr5
+	ldws	 4(res_ptr),s1
+	xmpyu	 %fr4R,%fr31R,%fr6
+	ldws	 8(res_ptr),s2
+	xmpyu	 %fr8L,%fr31R,%fr9
+	ldws	12(res_ptr),s3
+	xmpyu	 %fr8R,%fr31R,%fr10
+
+	fstds	 %fr5,-16(%r31)
+	sub	 s0,lo0,s0
+	fstds	 %fr6,-8(%r31)
+	subb	 s1,lo1,s1
+	fstds	 %fr9,0(%r31)
+	subb	 s2,lo2,s2
+	fstds	 %fr10,8(%r31)
+	subb	 s3,lo3,s3
+	subb	 %r0,%r0,lo0			; these two insns ...
+	add	 lo0,lo0,%r0			; ... just invert cy
+
+	ldws   -16(%r31),hi0
+	ldws   -12(%r31),lo0
+	ldws	-8(%r31),hi1
+	ldws	-4(%r31),lo1
+	ldws	 0(%r31),hi2
+	ldws	 4(%r31),lo2
+	ldws	 8(%r31),hi3
+	ldws	12(%r31),lo3
+
+	addc	 lo0,cylimb,lo0
+	stws,ma	 s0,4(res_ptr)
+	addc	 lo1,hi0,lo1
+	stws,ma	 s1,4(res_ptr)
+	addc	 lo2,hi1,lo2
+	stws,ma	 s2,4(res_ptr)
+	addc	 lo3,hi2,lo3
+	stws,ma	 s3,4(res_ptr)
+
+	addib,>= -4,size,L$loop
+	addc	 %r0,hi3,cylimb			; propagate carry into cylimb
+; finish software pipeline ---------------------------------------------------
+L$end	ldws	 0(res_ptr),s0
+	ldws	 4(res_ptr),s1
+	ldws	 8(res_ptr),s2
+	ldws	12(res_ptr),s3
+
+	sub	 s0,lo0,s0
+	stws,ma	 s0,4(res_ptr)
+	subb	 s1,lo1,s1
+	stws,ma	 s1,4(res_ptr)
+	subb	 s2,lo2,s2
+	stws,ma	 s2,4(res_ptr)
+	subb	 s3,lo3,s3
+	stws,ma	 s3,4(res_ptr)
+	subb	 %r0,%r0,lo0			; these two insns ...
+	add	 lo0,lo0,%r0			; ... invert cy
+
+; restore callee-saves registers ---------------------------------------------
+	ldw	-96(%r30),%r3
+	ldw	-92(%r30),%r4
+	ldw	-88(%r30),%r5
+	ldw	-84(%r30),%r6
+	ldw	-80(%r30),%r7
+
+L$few_limbs
+	addib,=,n 4,size,L$ret
+L$loop2	fldws,ma 4(s1_ptr),%fr4
+	ldws	 0(res_ptr),s0
+	xmpyu	 %fr4,%fr31R,%fr5
+	fstds	 %fr5,-16(%r30)
+	ldws	-16(%r30),hi0
+	ldws	-12(%r30),lo0
+	addc	 lo0,cylimb,lo0
+	addc	 %r0,hi0,cylimb
+	sub	 s0,lo0,s0
+	add	 s0,lo0,%r0			; invert cy
+	stws,ma	 s0,4(res_ptr)
+	addib,<> -1,size,L$loop2
+	nop
+
+L$ret	addc	 %r0,cylimb,cylimb
+	bv	 0(%r2)
+	ldo	 -128(%r30),%r30
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/submul_1.s b/mpn/hppa/hppa1_1/submul_1.s
new file mode 100644
index 000000000..a4a385467
--- /dev/null
+++ b/mpn/hppa/hppa1_1/submul_1.s
@@ -0,0 +1,111 @@
+; HP-PA-1.1 __mpn_submul_1 -- Multiply a limb vector with a limb and
+; subtract the result from a second limb vector.
+
+; Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r26
+; s1_ptr	r25
+; size		r24
+; s2_limb	r23
+
+; This runs at 12 cycles/limb on a PA7000.  With the used instructions, it
+; can not become faster due to data cache contention after a store.  On the
+; PA7100 it runs at 11 cycles/limb, and that can not be improved either,
+; since only the xmpyu does not need the integer pipeline, so the only
+; dual-issue we will get are addc+xmpyu.  Unrolling could gain a cycle/limb
+; on the PA7100.
+
+; There are some ideas described in mul_1.s that applies to this code too.
+
+; It seems possible to make this run as fast as __mpn_addmul_1, if we use
+; 	sub,>>=	%r29,%r19,%r22
+;	addi	1,%r28,%r28
+; but that requires reworking the hairy software pipeline...
+
+	.code
+	.export		__mpn_submul_1
+__mpn_submul_1
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldo		64(%r30),%r30
+	fldws,ma	4(%r25),%fr5
+	stw		%r23,-16(%r30)		; move s2_limb ...
+	addib,=		-1,%r24,L$just_one_limb
+	 fldws		-16(%r30),%fr4		; ... into fr4
+	add		%r0,%r0,%r0		; clear carry
+	xmpyu		%fr4,%fr5,%fr6
+	fldws,ma	4(%r25),%fr7
+	fstds		%fr6,-16(%r30)
+	xmpyu		%fr4,%fr7,%fr8
+	ldw		-12(%r30),%r19		; least significant limb in product
+	ldw		-16(%r30),%r28
+
+	fstds		%fr8,-16(%r30)
+	addib,=		-1,%r24,L$end
+	 ldw		-12(%r30),%r1
+
+; Main loop
+L$loop	ldws		0(%r26),%r29
+	fldws,ma	4(%r25),%fr5
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma		%r22,4(%r26)
+	addc		%r28,%r1,%r19
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		-16(%r30),%r28
+	fstds		%fr6,-16(%r30)
+	addc		%r0,%r28,%r28
+	addib,<>	-1,%r24,L$loop
+	 ldw		-12(%r30),%r1
+
+L$end	ldw		0(%r26),%r29
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma		%r22,4(%r26)
+	addc		%r28,%r1,%r19
+	ldw		-16(%r30),%r28
+	ldws		0(%r26),%r29
+	addc		%r0,%r28,%r28
+	sub		%r29,%r19,%r22
+	add		%r22,%r19,%r0
+	stws,ma		%r22,4(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+L$just_one_limb
+	xmpyu		%fr4,%fr5,%fr6
+	ldw		0(%r26),%r29
+	fstds		%fr6,-16(%r30)
+	ldw		-12(%r30),%r1
+	ldw		-16(%r30),%r28
+	sub		%r29,%r1,%r22
+	add		%r22,%r1,%r0
+	stw		%r22,0(%r26)
+	addc		%r0,%r28,%r28
+	bv		0(%r2)
+	 ldo		-64(%r30),%r30
+
+	.exit
+	.procend
diff --git a/mpn/hppa/hppa1_1/udiv_qrnnd.s b/mpn/hppa/hppa1_1/udiv_qrnnd.s
new file mode 100644
index 000000000..bf7dc70cd
--- /dev/null
+++ b/mpn/hppa/hppa1_1/udiv_qrnnd.s
@@ -0,0 +1,75 @@
+; HP-PA  __udiv_qrnnd division support, used from longlong.h.
+; This version runs fast on PA 7000 and later.
+
+; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; rem_ptr	gr26
+; n1		gr25
+; n0		gr24
+; d		gr23
+
+	.code
+L$0000	.word		0x43f00000
+	.word		0x0
+	.export		__udiv_qrnnd
+__udiv_qrnnd
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+	ldo		64(%r30),%r30
+
+	stws		%r25,-16(0,%r30)	; n_hi
+	stws		%r24,-12(0,%r30)	; n_lo
+	ldil		L'L$0000,%r19
+	ldo		R'L$0000(%r19),%r19
+	fldds		-16(0,%r30),%fr5
+	stws		%r23,-12(0,%r30)
+	comib,<=	0,%r25,L$1
+	fcnvxf,dbl,dbl	%fr5,%fr5
+	fldds		0(0,%r19),%fr4
+	fadd,dbl	%fr4,%fr5,%fr5
+L$1
+	fcpy,sgl	%fr0,%fr6L
+	fldws		-12(0,%r30),%fr6R
+	fcnvxf,dbl,dbl	%fr6,%fr4
+
+	fdiv,dbl	%fr5,%fr4,%fr5
+
+	fcnvfx,dbl,dbl	%fr5,%fr4
+	fstws		%fr4R,-16(%r30)
+	xmpyu		%fr4R,%fr6R,%fr6
+	ldws		-16(%r30),%r28
+	fstds		%fr6,-16(0,%r30)
+	ldws		-12(0,%r30),%r21
+	ldws		-16(0,%r30),%r20
+	sub		%r24,%r21,%r22
+	subb		%r25,%r20,%r19
+	comib,=		0,%r19,L$2
+	ldo		-64(%r30),%r30
+
+	add		%r22,%r23,%r22
+	ldo		-1(%r28),%r28
+L$2	bv		0(%r2)
+	stws		%r22,0(0,%r26)
+
+	.exit
+	.procend
diff --git a/mpn/hppa/lshift.s b/mpn/hppa/lshift.s
new file mode 100644
index 000000000..abac6ec20
--- /dev/null
+++ b/mpn/hppa/lshift.s
@@ -0,0 +1,66 @@
+; HP-PA  __mpn_lshift --
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s_ptr		gr25
+; size		gr24
+; cnt		gr23
+
+	.code
+	.export		__mpn_lshift
+__mpn_lshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	sh2add		%r24,%r25,%r25
+	sh2add		%r24,%r26,%r26
+	ldws,mb		-4(0,%r25),%r22
+	subi		32,%r23,%r1
+	mtsar		%r1
+	addib,=		-1,%r24,L$0004
+	vshd		%r0,%r22,%r28		; compute carry out limb
+	ldws,mb		-4(0,%r25),%r29
+	addib,=		-1,%r24,L$0002
+	vshd		%r22,%r29,%r20
+
+L$loop	ldws,mb		-4(0,%r25),%r22
+	stws,mb		%r20,-4(0,%r26)
+	addib,=		-1,%r24,L$0003
+	vshd		%r29,%r22,%r20
+	ldws,mb		-4(0,%r25),%r29
+	stws,mb		%r20,-4(0,%r26)
+	addib,<>	-1,%r24,L$loop
+	vshd		%r22,%r29,%r20
+
+L$0002	stws,mb		%r20,-4(0,%r26)
+	vshd		%r29,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+L$0003	stws,mb		%r20,-4(0,%r26)
+L$0004	vshd		%r22,%r0,%r20
+	bv		0(%r2)
+	stw		%r20,-4(0,%r26)
+
+	.exit
+	.procend
diff --git a/mpn/hppa/rshift.s b/mpn/hppa/rshift.s
new file mode 100644
index 000000000..c1480e5ab
--- /dev/null
+++ b/mpn/hppa/rshift.s
@@ -0,0 +1,63 @@
+; HP-PA  __mpn_rshift -- 
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s_ptr		gr25
+; size		gr24
+; cnt		gr23
+
+	.code
+	.export		__mpn_rshift
+__mpn_rshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldws,ma		4(0,%r25),%r22
+	mtsar		%r23
+	addib,=		-1,%r24,L$0004
+	vshd		%r22,%r0,%r28		; compute carry out limb
+	ldws,ma		4(0,%r25),%r29
+	addib,=		-1,%r24,L$0002
+	vshd		%r29,%r22,%r20
+
+L$loop	ldws,ma		4(0,%r25),%r22
+	stws,ma		%r20,4(0,%r26)
+	addib,=		-1,%r24,L$0003
+	vshd		%r22,%r29,%r20
+	ldws,ma		4(0,%r25),%r29
+	stws,ma		%r20,4(0,%r26)
+	addib,<>	-1,%r24,L$loop
+	vshd		%r29,%r22,%r20
+
+L$0002	stws,ma		%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+L$0003	stws,ma		%r20,4(0,%r26)
+L$0004	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.exit
+	.procend
diff --git a/mpn/hppa/sub_n.s b/mpn/hppa/sub_n.s
new file mode 100644
index 000000000..04fa3e1e3
--- /dev/null
+++ b/mpn/hppa/sub_n.s
@@ -0,0 +1,59 @@
+; HP-PA  __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	gr26
+; s1_ptr	gr25
+; s2_ptr	gr24
+; size		gr23
+
+; One might want to unroll this as for other processors, but it turns
+; out that the data cache contention after a store makes such
+; unrolling useless.  We can't come under 5 cycles/limb anyway.
+
+	.code
+	.export		__mpn_sub_n
+__mpn_sub_n
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+
+	addib,=		-1,%r23,L$end	; check for (SIZE == 1)
+	 sub		%r20,%r19,%r28	; subtract first limbs ignoring cy
+
+L$loop	ldws,ma		4(0,%r25),%r20
+	ldws,ma		4(0,%r24),%r19
+	stws,ma		%r28,4(0,%r26)
+	addib,<>	-1,%r23,L$loop
+	 subb		%r20,%r19,%r28
+
+L$end	stws		%r28,0(0,%r26)
+	addc		%r0,%r0,%r28
+	bv		0(%r2)
+	 subi		1,%r28,%r28
+
+	.exit
+	.procend
diff --git a/mpn/hppa/udiv_qrnnd.s b/mpn/hppa/udiv_qrnnd.s
new file mode 100644
index 000000000..9b45eb40d
--- /dev/null
+++ b/mpn/hppa/udiv_qrnnd.s
@@ -0,0 +1,286 @@
+; HP-PA  __udiv_qrnnd division support, used from longlong.h.
+; This version runs fast on pre-PA7000 CPUs.
+
+; Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; rem_ptr	gr26
+; n1		gr25
+; n0		gr24
+; d		gr23
+
+; The code size is a bit excessive.  We could merge the last two ds;addc
+; sequences by simply moving the "bb,< Odd" instruction down.  The only
+; trouble is the FFFFFFFF code that would need some hacking.
+
+	.code
+	.export		__udiv_qrnnd
+__udiv_qrnnd
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+
+	comb,<		%r23,0,L$largedivisor
+	 sub		%r0,%r23,%r1		; clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r23,%r25
+	addc		%r24,%r24,%r28
+	ds		%r25,%r23,%r25
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r23,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r28,%r28,%r28
+
+L$largedivisor
+	extru		%r24,31,1,%r19		; r19 = n0 & 1
+	bb,<		%r23,31,L$odd
+	 extru		%r23,30,31,%r22		; r22 = d >> 1
+	shd		%r25,%r24,1,%r24	; r24 = new n0
+	extru		%r25,30,31,%r25		; r25 = new n1
+	sub		%r0,%r22,%r21
+	ds		%r0,%r21,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r22,%r25
+	sh1addl		%r25,%r19,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r24,%r24,%r28
+
+L$odd	addib,sv,n	1,%r22,L$FF..		; r22 = (d / 2 + 1)
+	shd		%r25,%r24,1,%r24	; r24 = new n0
+	extru		%r25,30,31,%r25		; r25 = new n1
+	sub		%r0,%r22,%r21
+	ds		%r0,%r21,%r0
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r24
+	ds		%r25,%r22,%r25
+	addc		%r24,%r24,%r28
+	comclr,>=	%r25,%r0,%r0
+	addl		%r25,%r22,%r25
+	sh1addl		%r25,%r19,%r25
+; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+	add,nuv		%r28,%r25,%r25
+	addl		%r25,%r1,%r25
+	addc		%r0,%r28,%r28
+	sub,<<		%r25,%r23,%r0
+	addl		%r25,%r1,%r25
+	stws		%r25,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r28,%r28
+
+; This is just a special case of the code above.
+; We come here when d == 0xFFFFFFFF
+L$FF..	add,uv		%r25,%r24,%r24
+	sub,<<		%r24,%r23,%r0
+	ldo		1(%r24),%r24
+	stws		%r24,0(0,%r26)
+	bv		0(%r2)
+	 addc		%r0,%r25,%r28
+
+	.exit
+	.procend
diff --git a/mpn/i960/README b/mpn/i960/README
new file mode 100644
index 000000000..d68a0a83e
--- /dev/null
+++ b/mpn/i960/README
@@ -0,0 +1,9 @@
+This directory contains mpn functions for Intel i960 processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+The code in this directory is not well optimized.
+
+STATUS
+
+The code in this directory has not been tested.
diff --git a/mpn/i960/add_n.s b/mpn/i960/add_n.s
new file mode 100644
index 000000000..6031f6d4c
--- /dev/null
+++ b/mpn/i960/add_n.s
@@ -0,0 +1,21 @@
+.text
+	.align 4
+	.globl ___mpn_add_n
+___mpn_add_n:
+	mov	0,g6		# clear carry-save register
+	cmpo	1,0		# clear cy
+
+Loop:	subo	1,g3,g3		# update loop counter
+	ld	(g1),g5		# load from s1_ptr
+	addo	4,g1,g1		# s1_ptr++
+	ld	(g2),g4		# load from s2_ptr
+	addo	4,g2,g2		# s2_ptr++
+	cmpo	g6,1		# restore cy from g6, relies on cy being 0
+	addc	g4,g5,g4	# main add
+	subc	0,0,g6		# save cy in g6
+	st	g4,(g0)		# store result to res_ptr
+	addo	4,g0,g0		# res_ptr++
+	cmpobne	0,g3,Loop	# when branch is taken, clears C bit
+
+	mov	g6,g0
+	ret
diff --git a/mpn/i960/addmul_1.s b/mpn/i960/addmul_1.s
new file mode 100644
index 000000000..1a3de95e5
--- /dev/null
+++ b/mpn/i960/addmul_1.s
@@ -0,0 +1,26 @@
+.text
+	.align	4
+	.globl	___mpn_mul_1
+___mpn_mul_1:
+	subo	g2,0,g2
+	shlo	2,g2,g4
+	subo	g4,g1,g1
+	subo	g4,g0,g13
+	mov	0,g0
+
+	cmpo	1,0		# clear C bit on AC.cc
+
+Loop:	ld	(g1)[g2*4],g5
+	emul	g3,g5,g6
+	ld	(g13)[g2*4],g5
+
+	addc	g0,g6,g6	# relies on that C bit is clear
+	addc	0,g7,g7
+	addc	g5,g6,g6	# relies on that C bit is clear
+	st	g6,(g13)[g2*4]
+	addc	0,g7,g0
+
+	addo	g2,1,g2
+	cmpobne	0,g2,Loop	# when branch is taken, clears C bit
+
+	ret
diff --git a/mpn/i960/mul_1.s b/mpn/i960/mul_1.s
new file mode 100644
index 000000000..e75ea42d3
--- /dev/null
+++ b/mpn/i960/mul_1.s
@@ -0,0 +1,23 @@
+.text
+	.align	4
+	.globl	___mpn_mul_1
+___mpn_mul_1:
+	subo	g2,0,g2
+	shlo	2,g2,g4
+	subo	g4,g1,g1
+	subo	g4,g0,g13
+	mov	0,g0
+
+	cmpo	1,0		# clear C bit on AC.cc
+
+Loop:	ld	(g1)[g2*4],g5
+	emul	g3,g5,g6
+
+	addc	g0,g6,g6	# relies on that C bit is clear
+	st	g6,(g13)[g2*4]
+	addc	0,g7,g0
+
+	addo	g2,1,g2
+	cmpobne	0,g2,Loop	# when branch is taken, clears C bit
+
+	ret
diff --git a/mpn/i960/sub_n.s b/mpn/i960/sub_n.s
new file mode 100644
index 000000000..13ebbfa9f
--- /dev/null
+++ b/mpn/i960/sub_n.s
@@ -0,0 +1,21 @@
+.text
+	.align 4
+	.globl ___mpn_sub_n
+___mpn_sub_n:
+	mov	1,g6		# set carry-save register
+	cmpo	1,0		# clear cy
+
+Loop:	subo	1,g3,g3		# update loop counter
+	ld	(g1),g5		# load from s1_ptr
+	addo	4,g1,g1		# s1_ptr++
+	ld	(g2),g4		# load from s2_ptr
+	addo	4,g2,g2		# s2_ptr++
+	cmpo	g6,1		# restore cy from g6, relies on cy being 0
+	subc	g4,g5,g4	# main subtract
+	subc	0,0,g6		# save cy in g6
+	st	g4,(g0)		# store result to res_ptr
+	addo	4,g0,g0		# res_ptr++
+	cmpobne	0,g3,Loop	# when branch is taken, cy will be 0
+
+	mov	g6,g0
+	ret
diff --git a/mpn/m68k/add_n.S b/mpn/m68k/add_n.S
new file mode 100644
index 000000000..7ca5b95bc
--- /dev/null
+++ b/mpn/m68k/add_n.S
@@ -0,0 +1,80 @@
+/* mc68020 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+   sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s2_ptr	(sp + 16)
+  size		(sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_add_n)
+
+C_SYMBOL_NAME(__mpn_add_n:)
+PROLOG(__mpn_add_n)
+/* Save used registers on the stack.  */
+	movel	R(d2),MEM_PREDEC(sp)
+	movel	R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?  */
+	movel	MEM_DISP(sp,12),R(a2)
+	movel	MEM_DISP(sp,16),R(a0)
+	movel	MEM_DISP(sp,20),R(a1)
+	movel	MEM_DISP(sp,24),R(d2)
+
+	eorw	#1,R(d2)
+	lsrl	#1,R(d2)
+	bcc	L(L1)
+	subql	#1,R(d2)	/* clears cy as side effect */
+
+L(Loop:)
+	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	addxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+L(L1:)	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	addxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+
+	dbf	R(d2),L(Loop)		/* loop until 16 lsb of %4 == -1 */
+	subxl	R(d0),R(d0)	/* d0 <= -cy; save cy as 0 or -1 in d0 */
+	subl	#0x10000,R(d2)
+	bcs	L(L2)
+	addl	R(d0),R(d0)	/* restore cy */
+	bra	L(Loop)
+
+L(L2:)
+	negl	R(d0)
+
+/* Restore used registers from stack frame.  */
+	movel	MEM_POSTINC(sp),R(a2)
+	movel	MEM_POSTINC(sp),R(d2)
+
+	rts
+EPILOG(__mpn_add_n)
diff --git a/mpn/m68k/lshift.S b/mpn/m68k/lshift.S
new file mode 100644
index 000000000..77184d6ee
--- /dev/null
+++ b/mpn/m68k/lshift.S
@@ -0,0 +1,151 @@
+/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s_ptr		(sp + 8)
+  s_size	(sp + 16)
+  cnt		(sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_lshift)
+
+C_SYMBOL_NAME(__mpn_lshift:)
+PROLOG(__mpn_lshift)
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  */
+	movel	MEM_DISP(sp,28),R(res_ptr)
+	movel	MEM_DISP(sp,32),R(s_ptr)
+	movel	MEM_DISP(sp,36),R(s_size)
+	movel	MEM_DISP(sp,40),R(cnt)
+
+	moveql	#1,R(d5)
+	cmpl	R(d5),R(cnt)
+	bne	L(Lnormal)
+	cmpl	R(s_ptr),R(res_ptr)
+	bls	L(Lspecial)		/* jump if s_ptr >= res_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	lea	MEM_INDX(s_ptr,d0,l),R(a2)
+#endif
+	cmpl	R(res_ptr),R(a2)
+	bls	L(Lspecial)		/* jump if res_ptr >= s_ptr + s_size */
+
+L(Lnormal:)
+	moveql	#32,R(d5)
+	subl	R(cnt),R(d5)
+
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	addl	R(s_size),R(s_ptr)
+	addl	R(s_size),R(res_ptr)
+#endif
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	movel	R(d2),R(d0)
+	lsrl	R(d5),R(d0)		/* compute carry limb */
+
+	lsll	R(cnt),R(d2)
+	movel	R(d2),R(d1)
+	subql	#1,R(s_size)
+	beq	L(Lend)
+	lsrl	#1,R(s_size)
+	bcs	L(L1)
+	subql	#1,R(s_size)
+
+L(Loop:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	movel	R(d2),R(d3)
+	lsrl	R(d5),R(d3)
+	orl	R(d3),R(d1)
+	movel	R(d1),MEM_PREDEC(res_ptr)
+	lsll	R(cnt),R(d2)
+L(L1:)
+	movel	MEM_PREDEC(s_ptr),R(d1)
+	movel	R(d1),R(d3)
+	lsrl	R(d5),R(d3)
+	orl	R(d3),R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+	lsll	R(cnt),R(d1)
+
+	dbf	R(s_size),L(Loop)
+	subl	#0x10000,R(s_size)
+	bcc	L(Loop)
+
+L(Lend:)
+	movel	R(d1),MEM_PREDEC(res_ptr) /* store least significant limb */
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Lspecial:)
+	clrl	R(d0)			/* initialize carry */
+	eorw	#1,R(s_size)
+	lsrl	#1,R(s_size)
+	bcc	L(LL1)
+	subql	#1,R(s_size)
+
+L(LLoop:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	addxl	R(d2),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+L(LL1:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	addxl	R(d2),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+
+	dbf	R(s_size),L(LLoop)
+	addxl	R(d0),R(d0)		/* save cy in lsb */
+	subl	#0x10000,R(s_size)
+	bcs	L(LLend)
+	lsrl	#1,R(d0)		/* restore cy */
+	bra	L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+EPILOG(__mpn_lshift)
diff --git a/mpn/m68k/mc68020/addmul_1.S b/mpn/m68k/mc68020/addmul_1.S
new file mode 100644
index 000000000..4b99c21f8
--- /dev/null
+++ b/mpn/m68k/mc68020/addmul_1.S
@@ -0,0 +1,84 @@
+/* mc68020 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+   the result to a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s1_size	(sp + 12)
+  s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_addmul_1)
+
+C_SYMBOL_NAME(__mpn_addmul_1:)
+PROLOG(__mpn_addmul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?  */
+	movel	MEM_DISP(sp,20),R(res_ptr)
+	movel	MEM_DISP(sp,24),R(s1_ptr)
+	movel	MEM_DISP(sp,28),R(s1_size)
+	movel	MEM_DISP(sp,32),R(s2_limb)
+
+	eorw	#1,R(s1_size)
+	clrl	R(d1)
+	clrl	R(d5)
+	lsrl	#1,R(s1_size)
+	bcc	L(L1)
+	subql	#1,R(s1_size)
+	subl	R(d0),R(d0)		/* (d0,cy) <= (0,0) */
+
+L(Loop:)
+	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d1):R(d3)
+	addxl	R(d0),R(d3)
+	addxl	R(d5),R(d1)
+	addl	R(d3),MEM_POSTINC(res_ptr)
+L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d0):R(d3)
+	addxl	R(d1),R(d3)
+	addxl	R(d5),R(d0)
+	addl	R(d3),MEM_POSTINC(res_ptr)
+
+	dbf	R(s1_size),L(Loop)
+	addxl	R(d5),R(d0)
+	subl	#0x10000,R(s1_size)
+	bcc	L(Loop)
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d5)
+
+	rts
+EPILOG(__mpn_addmul_1)
diff --git a/mpn/m68k/mc68020/mul_1.S b/mpn/m68k/mc68020/mul_1.S
new file mode 100644
index 000000000..ef7d93721
--- /dev/null
+++ b/mpn/m68k/mc68020/mul_1.S
@@ -0,0 +1,91 @@
+/* mc68020 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+   the result in a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s1_size	(sp + 12)
+  s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_mul_1)
+
+C_SYMBOL_NAME(__mpn_mul_1:)
+PROLOG(__mpn_mul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d4),MEM_PREDEC(sp)
+#if 0
+	movel	R(d2),MEM_PREDEC(sp)
+	movel	R(d3),MEM_PREDEC(sp)
+	movel	R(d4),MEM_PREDEC(sp)
+#endif
+
+/* Copy the arguments to registers.  Better use movem?  */
+	movel	MEM_DISP(sp,16),R(res_ptr)
+	movel	MEM_DISP(sp,20),R(s1_ptr)
+	movel	MEM_DISP(sp,24),R(s1_size)
+	movel	MEM_DISP(sp,28),R(s2_limb)
+
+	eorw	#1,R(s1_size)
+	clrl	R(d1)
+	lsrl	#1,R(s1_size)
+	bcc	L(L1)
+	subql	#1,R(s1_size)
+	subl	R(d0),R(d0)	/* (d0,cy) <= (0,0) */
+
+L(Loop:)
+	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d1):R(d3)
+	addxl	R(d0),R(d3)
+	movel	R(d3),MEM_POSTINC(res_ptr)
+L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d0):R(d3)
+	addxl	R(d1),R(d3)
+	movel	R(d3),MEM_POSTINC(res_ptr)
+
+	dbf	R(s1_size),L(Loop)
+	clrl	R(d3)
+	addxl	R(d3),R(d0)
+	subl	#0x10000,R(s1_size)
+	bcc	L(Loop)
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d4)
+#if 0
+	movel	MEM_POSTINC(sp),R(d4)
+	movel	MEM_POSTINC(sp),R(d3)
+	movel	MEM_POSTINC(sp),R(d2)
+#endif
+	rts
+EPILOG(__mpn_mul_1)
diff --git a/mpn/m68k/mc68020/submul_1.S b/mpn/m68k/mc68020/submul_1.S
new file mode 100644
index 000000000..9770c6cd6
--- /dev/null
+++ b/mpn/m68k/mc68020/submul_1.S
@@ -0,0 +1,84 @@
+/* mc68020 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+   the result from a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s1_size	(sp + 12)
+  s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_submul_1)
+
+C_SYMBOL_NAME(__mpn_submul_1:)
+PROLOG(__mpn_submul_1)
+
+#define res_ptr a0
+#define s1_ptr a1
+#define s1_size d2
+#define s2_limb d4
+
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d5),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?  */
+	movel	MEM_DISP(sp,20),R(res_ptr)
+	movel	MEM_DISP(sp,24),R(s1_ptr)
+	movel	MEM_DISP(sp,28),R(s1_size)
+	movel	MEM_DISP(sp,32),R(s2_limb)
+
+	eorw	#1,R(s1_size)
+	clrl	R(d1)
+	clrl	R(d5)
+	lsrl	#1,R(s1_size)
+	bcc	L(L1)
+	subql	#1,R(s1_size)
+	subl	R(d0),R(d0)	/* (d0,cy) <= (0,0) */
+
+L(Loop:)
+	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d1):R(d3)
+	addxl	R(d0),R(d3)
+	addxl	R(d5),R(d1)
+	subl	R(d3),MEM_POSTINC(res_ptr)
+L(L1:)	movel	MEM_POSTINC(s1_ptr),R(d3)
+	mulul	R(s2_limb),R(d0):R(d3)
+	addxl	R(d1),R(d3)
+	addxl	R(d5),R(d0)
+	subl	R(d3),MEM_POSTINC(res_ptr)
+
+	dbf	R(s1_size),L(Loop)
+	addxl	R(d5),R(d0)
+	subl	#0x10000,R(s1_size)
+	bcc	L(Loop)
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d5)
+
+	rts
+EPILOG(__mpn_submul_1)
diff --git a/mpn/m68k/rshift.S b/mpn/m68k/rshift.S
new file mode 100644
index 000000000..01dde0ab5
--- /dev/null
+++ b/mpn/m68k/rshift.S
@@ -0,0 +1,150 @@
+/* mc68020 __mpn_rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s_ptr		(sp + 8)
+  s_size	(sp + 16)
+  cnt		(sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_rshift)
+
+C_SYMBOL_NAME(__mpn_rshift:)
+PROLOG(__mpn_rshift)
+/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  */
+	movel	MEM_DISP(sp,28),R(res_ptr)
+	movel	MEM_DISP(sp,32),R(s_ptr)
+	movel	MEM_DISP(sp,36),R(s_size)
+	movel	MEM_DISP(sp,40),R(cnt)
+
+	moveql	#1,R(d5)
+	cmpl	R(d5),R(cnt)
+	bne	L(Lnormal)
+	cmpl	R(res_ptr),R(s_ptr)
+	bls	L(Lspecial)		/* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	lea	MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+	cmpl	R(s_ptr),R(a2)
+	bls	L(Lspecial)		/* jump if s_ptr >= res_ptr + s_size */
+
+L(Lnormal:)
+	moveql	#32,R(d5)
+	subl	R(cnt),R(d5)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	movel	R(d2),R(d0)
+	lsll	R(d5),R(d0)		/* compute carry limb */
+   
+	lsrl	R(cnt),R(d2)
+	movel	R(d2),R(d1)
+	subql	#1,R(s_size)
+	beq	L(Lend)
+	lsrl	#1,R(s_size)
+	bcs	L(L1)
+	subql	#1,R(s_size)
+
+L(Loop:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	movel	R(d2),R(d3)
+	lsll	R(d5),R(d3)
+	orl	R(d3),R(d1)
+	movel	R(d1),MEM_POSTINC(res_ptr)
+	lsrl	R(cnt),R(d2)
+L(L1:)
+	movel	MEM_POSTINC(s_ptr),R(d1)
+	movel	R(d1),R(d3)
+	lsll	R(d5),R(d3)
+	orl	R(d3),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+	lsrl	R(cnt),R(d1)
+
+	dbf	R(s_size),L(Loop)
+	subl	#0x10000,R(s_size)
+	bcc	L(Loop)
+
+L(Lend:)
+	movel	R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+
+/* We loop from most significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Lspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	addl	R(s_size),R(s_ptr)
+	addl	R(s_size),R(res_ptr)
+#endif
+
+	clrl	R(d0)			/* initialize carry */
+	eorw	#1,R(s_size)
+	lsrl	#1,R(s_size)
+	bcc	L(LL1)
+	subql	#1,R(s_size)
+
+L(LLoop:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	roxrl	#1,R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+L(LL1:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	roxrl	#1,R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+
+	dbf	R(s_size),L(LLoop)
+	roxrl	#1,R(d0)		/* save cy in msb */
+	subl	#0x10000,R(s_size)
+	bcs	L(LLend)
+	addl	R(d0),R(d0)		/* restore cy */
+	bra	L(LLoop)
+
+L(LLend:)
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+EPILOG(__mpn_rshift)
diff --git a/mpn/m68k/sub_n.S b/mpn/m68k/sub_n.S
new file mode 100644
index 000000000..f94b0c728
--- /dev/null
+++ b/mpn/m68k/sub_n.S
@@ -0,0 +1,80 @@
+/* mc68020 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+   store difference in a third limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s2_ptr	(sp + 16)
+  size		(sp + 12)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(__mpn_sub_n)
+
+C_SYMBOL_NAME(__mpn_sub_n:)
+PROLOG(__mpn_sub_n)
+/* Save used registers on the stack.  */
+	movel	R(d2),MEM_PREDEC(sp)
+	movel	R(a2),MEM_PREDEC(sp)
+
+/* Copy the arguments to registers.  Better use movem?  */
+	movel	MEM_DISP(sp,12),R(a2)
+	movel	MEM_DISP(sp,16),R(a0)
+	movel	MEM_DISP(sp,20),R(a1)
+	movel	MEM_DISP(sp,24),R(d2)
+
+	eorw	#1,R(d2)
+	lsrl	#1,R(d2)
+	bcc	L(L1)
+	subql	#1,R(d2)	/* clears cy as side effect */
+
+L(Loop:)
+	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	subxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+L(L1:)	movel	MEM_POSTINC(a0),R(d0)
+	movel	MEM_POSTINC(a1),R(d1)
+	subxl	R(d1),R(d0)
+	movel	R(d0),MEM_POSTINC(a2)
+
+	dbf	R(d2),L(Loop)		/* loop until 16 lsb of %4 == -1 */
+	subxl	R(d0),R(d0)	/* d0 <= -cy; save cy as 0 or -1 in d0 */
+	subl	#0x10000,R(d2)
+	bcs	L(L2)
+	addl	R(d0),R(d0)	/* restore cy */
+	bra	L(Loop)
+
+L(L2:)
+	negl	R(d0)
+
+/* Restore used registers from stack frame.  */
+	movel	MEM_POSTINC(sp),R(a2)
+	movel	MEM_POSTINC(sp),R(d2)
+
+	rts
+EPILOG(__mpn_sub_n)
diff --git a/mpn/m68k/syntax.h b/mpn/m68k/syntax.h
new file mode 100644
index 000000000..9d6f3522b
--- /dev/null
+++ b/mpn/m68k/syntax.h
@@ -0,0 +1,177 @@
+/* asm.h -- Definitions for 68k syntax variations.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.  */
+
+#undef ALIGN
+
+#ifdef MIT_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)base@
+#define MEM_DISP(base,displacement)base@(displacement)
+#define MEM_INDX(base,idx,size_suffix)base@(idx:size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)base@(idx:size_suffix:scale)
+#define MEM_PREDEC(memory_base)memory_base@-
+#define MEM_POSTINC(memory_base)memory_base@+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#define moveql moveq
+/* Use variable sized opcodes.  */
+#define bcc jcc
+#define bcs jcs
+#define bls jls
+#define beq jeq
+#define bne jne
+#define bra jra
+#endif
+
+#ifdef SONY_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT .text
+#define ALIGN .even
+#define GLOBL .globl
+#endif
+
+#ifdef MOTOROLA_SYNTAX
+#define PROLOG(name)
+#define EPILOG(name)
+#define R(r)r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)(displacement,base)
+#define MEM_INDX(base,idx,size_suffix)(base,idx.size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(base,idx.size_suffix*scale)
+#define MEM_PREDEC(memory_base)-(memory_base)
+#define MEM_POSTINC(memory_base)(memory_base)+
+#define L(label) label
+#define TEXT
+#define ALIGN
+#define GLOBL XDEF
+#define lea LEA
+#define movel MOVE.L
+#define moveml MOVEM.L
+#define moveql MOVEQ.L
+#define cmpl CMP.L
+#define orl OR.L
+#define clrl CLR.L
+#define eorw EOR.W
+#define lsrl LSR.L
+#define lsll LSL.L
+#define roxrl ROXR.L
+#define roxll ROXL.L
+#define addl ADD.L
+#define addxl ADDX.L
+#define addql ADDQ.L
+#define subl SUB.L
+#define subxl SUBX.L
+#define subql SUBQ.L
+#define negl NEG.L
+#define mulul MULU.L
+#define bcc BCC
+#define bcs BCS
+#define bls BLS
+#define beq BEQ
+#define bne BNE
+#define bra BRA
+#define dbf DBF
+#define rts RTS
+#define d0 D0
+#define d1 D1
+#define d2 D2
+#define d3 D3
+#define d4 D4
+#define d5 D5
+#define d6 D6
+#define d7 D7
+#define a0 A0
+#define a1 A1
+#define a2 A2
+#define a3 A3
+#define a4 A4
+#define a5 A5
+#define a6 A6
+#define a7 A7
+#define sp SP
+#endif
+
+#ifdef ELF_SYNTAX
+#define PROLOG(name) .type name,@function
+#define EPILOG(name) .size name,.-name
+#define MEM(base)(R(base))
+#define MEM_DISP(base,displacement)(displacement,R(base))
+#define MEM_PREDEC(memory_base)-(R(memory_base))
+#define MEM_POSTINC(memory_base)(R(memory_base))+
+#ifdef __STDC__
+#define R_(r)%##r
+#define R(r)R_(r)
+#define MEM_INDX_(base,idx,size_suffix)(R(base),R(idx##.##size_suffix))
+#define MEM_INDX(base,idx,size_suffix)MEM_INDX_(base,idx,size_suffix)
+#define MEM_INDX1_(base,idx,size_suffix,scale)(R(base),R(idx##.##size_suffix*scale))
+#define MEM_INDX1(base,idx,size_suffix,scale)MEM_INDX1_(base,idx,size_suffix,scale)
+#define L(label) .##label
+#else
+#define R(r)%/**/r
+#define MEM_INDX(base,idx,size_suffix)(R(base),R(idx).size_suffix)
+#define MEM_INDX1(base,idx,size_suffix,scale)(R(base),R(idx).size_suffix*scale)
+#define L(label) ./**/label
+#endif
+#define TEXT .text
+#define ALIGN .align 2
+#define GLOBL .globl
+#define bcc jbcc
+#define bcs jbcs
+#define bls jbls
+#define beq jbeq
+#define bne jbne
+#define bra jbra
+#endif
+
+#if defined (SONY_SYNTAX) || defined (ELF_SYNTAX)
+#define movel move.l
+#define moveml movem.l
+#define moveql moveq.l
+#define cmpl cmp.l
+#define orl or.l
+#define clrl clr.l
+#define eorw eor.w
+#define lsrl lsr.l
+#define lsll lsl.l
+#define roxrl roxr.l
+#define roxll roxl.l
+#define addl add.l
+#define addxl addx.l
+#define addql addq.l
+#define subl sub.l
+#define subxl subx.l
+#define subql subq.l
+#define negl neg.l
+#define mulul mulu.l
+#endif
diff --git a/mpn/m88k/add_n.s b/mpn/m88k/add_n.s
new file mode 100644
index 000000000..1b09ccef8
--- /dev/null
+++ b/mpn/m88k/add_n.s
@@ -0,0 +1,104 @@
+; mc88100 __mpn_add -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r2
+; s1_ptr	r3
+; s2_ptr	r4
+; size		r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention.  As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+	text
+	align	 16
+	global	 ___mpn_add_n
+___mpn_add_n:
+	ld	r6,r3,0			; read first limb from s1_ptr
+	extu	r10,r5,3
+	ld	r7,r4,0			; read first limb from s2_ptr
+
+	subu.co	r5,r0,r5		; (clear carry as side effect)
+	mak	r5,r5,3<4>
+	bcnd	eq0,r5,Lzero
+
+	or	r12,r0,lo16(Lbase)
+	or.u	r12,r12,hi16(Lbase)
+	addu	r12,r12,r5		; r12 is address for entering in loop
+
+	extu	r5,r5,2			; divide by 4
+	subu	r2,r2,r5		; adjust res_ptr
+	subu	r3,r3,r5		; adjust s1_ptr
+	subu	r4,r4,r5		; adjust s2_ptr
+
+	or	r8,r6,r0
+
+	jmp.n	r12
+	 or	r9,r7,r0
+
+Loop:	addu	r3,r3,32
+	st	r8,r2,28
+	addu	r4,r4,32
+	ld	r6,r3,0
+	addu	r2,r2,32
+	ld	r7,r4,0
+Lzero:	subu	r10,r10,1		; add 0 + 8r limbs (adj loop cnt)
+Lbase:	ld	r8,r3,4
+	addu.cio r6,r6,r7
+	ld	r9,r4,4
+	st	r6,r2,0
+	ld	r6,r3,8			; add 7 + 8r limbs
+	addu.cio r8,r8,r9
+	ld	r7,r4,8
+	st	r8,r2,4
+	ld	r8,r3,12		; add 6 + 8r limbs
+	addu.cio r6,r6,r7
+	ld	r9,r4,12
+	st	r6,r2,8
+	ld	r6,r3,16		; add 5 + 8r limbs
+	addu.cio r8,r8,r9
+	ld	r7,r4,16
+	st	r8,r2,12
+	ld	r8,r3,20		; add 4 + 8r limbs
+	addu.cio r6,r6,r7
+	ld	r9,r4,20
+	st	r6,r2,16
+	ld	r6,r3,24		; add 3 + 8r limbs
+	addu.cio r8,r8,r9
+	ld	r7,r4,24
+	st	r8,r2,20
+	ld	r8,r3,28		; add 2 + 8r limbs
+	addu.cio r6,r6,r7
+	ld	r9,r4,28
+	st	r6,r2,24
+	bcnd.n	ne0,r10,Loop		; add 1 + 8r limbs
+	 addu.cio r8,r8,r9
+
+	st	r8,r2,28		; store most significant limb
+
+	jmp.n	 r1
+	 addu.ci r2,r0,r0		; return carry-out from most sign. limb
diff --git a/mpn/m88k/mc88110/add_n.S b/mpn/m88k/mc88110/add_n.S
new file mode 100644
index 000000000..39a44e557
--- /dev/null
+++ b/mpn/m88k/mc88110/add_n.S
@@ -0,0 +1,200 @@
+; mc88110 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+; sum in a third limb vector.
+
+; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr	r2
+#define s1_ptr	r3
+#define s2_ptr	r4
+#define size	r5
+
+#include "sysdep.h"
+
+	text
+	align	16
+	global	C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+	addu.co	 r0,r0,r0		; clear cy flag
+	xor	 r12,s2_ptr,res_ptr
+	bb1	 2,r12,L1
+; **  V1a  **
+L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned?
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+	ld	 r10,s1_ptr,0
+	addu	 s1_ptr,s1_ptr,4
+	ld	 r8,s2_ptr,0
+	addu	 s2_ptr,s2_ptr,4
+	subu	 size,size,1
+	addu.co	 r6,r10,r8
+	st	 r6,res_ptr,0
+	addu	 res_ptr,res_ptr,4
+L_v1:	cmp	 r12,size,2
+	bb1	 lt,r12,Lend2
+
+	ld	 r10,s1_ptr,0
+	ld	 r12,s1_ptr,4
+	ld.d	 r8,s2_ptr,0
+	subu	 size,size,10
+	bcnd	 lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+	align	 8
+Loop1:	subu	 size,size,8
+	addu.cio r6,r10,r8
+	ld	 r10,s1_ptr,8
+	addu.cio r7,r12,r9
+	ld	 r12,s1_ptr,12
+	ld.d	 r8,s2_ptr,8
+	st.d	 r6,res_ptr,0
+	addu.cio r6,r10,r8
+	ld	 r10,s1_ptr,16
+	addu.cio r7,r12,r9
+	ld	 r12,s1_ptr,20
+	ld.d	 r8,s2_ptr,16
+	st.d	 r6,res_ptr,8
+	addu.cio r6,r10,r8
+	ld	 r10,s1_ptr,24
+	addu.cio r7,r12,r9
+	ld	 r12,s1_ptr,28
+	ld.d	 r8,s2_ptr,24
+	st.d	 r6,res_ptr,16
+	addu.cio r6,r10,r8
+	ld	 r10,s1_ptr,32
+	addu.cio r7,r12,r9
+	ld	 r12,s1_ptr,36
+	addu	 s1_ptr,s1_ptr,32
+	ld.d	 r8,s2_ptr,32
+	addu	 s2_ptr,s2_ptr,32
+	st.d	 r6,res_ptr,24
+	addu	 res_ptr,res_ptr,32
+	bcnd	 ge0,size,Loop1
+
+Lfin1:	addu	 size,size,8-2
+	bcnd	 lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:	addu.cio r6,r10,r8
+	ld	 r10,s1_ptr,8
+	addu.cio r7,r12,r9
+	ld	 r12,s1_ptr,12
+	ld.d	 r8,s2_ptr,8
+	st.d	 r6,res_ptr,0
+	subu	 size,size,2
+	addu	 s1_ptr,s1_ptr,8
+	addu	 s2_ptr,s2_ptr,8
+	addu	 res_ptr,res_ptr,8
+	bcnd	 ge0,size,Loope1
+Lend1:	addu.cio r6,r10,r8
+	addu.cio r7,r12,r9
+	st.d	 r6,res_ptr,0
+
+	bb0	 0,size,Lret1
+/* Add last limb */
+	ld	 r10,s1_ptr,8
+	ld	 r8,s2_ptr,8
+	addu.cio r6,r10,r8
+	st	 r6,res_ptr,8
+
+Lret1:	jmp.n	 r1
+	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
+
+L1:	xor	 r12,s1_ptr,res_ptr
+	bb1	 2,r12,L2
+; **  V1b  **
+	or	 r12,r0,s2_ptr
+	or	 s2_ptr,r0,s1_ptr
+	or	 s1_ptr,r0,r12
+	br	 L0
+
+; **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:	cmp	 r12,size,1
+	bb1	 eq,r12,Ljone
+	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+	ld	 r10,s1_ptr,0
+	addu	 s1_ptr,s1_ptr,4
+	ld	 r8,s2_ptr,0
+	addu	 s2_ptr,s2_ptr,4
+	subu	 size,size,1
+	addu.co	 r6,r10,r8
+	st	 r6,res_ptr,0
+	addu	 res_ptr,res_ptr,4
+
+L_v2:	subu	 size,size,8
+	bcnd	 lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+	align	 8
+Loop2:	subu	 size,size,8
+	ld.d	 r8,s1_ptr,0
+	ld.d	 r6,s2_ptr,0
+	addu.cio r8,r8,r6
+	st	 r8,res_ptr,0
+	addu.cio r9,r9,r7
+	st	 r9,res_ptr,4
+	ld.d	 r8,s1_ptr,8
+	ld.d	 r6,s2_ptr,8
+	addu.cio r8,r8,r6
+	st	 r8,res_ptr,8
+	addu.cio r9,r9,r7
+	st	 r9,res_ptr,12
+	ld.d	 r8,s1_ptr,16
+	ld.d	 r6,s2_ptr,16
+	addu.cio r8,r8,r6
+	st	 r8,res_ptr,16
+	addu.cio r9,r9,r7
+	st	 r9,res_ptr,20
+	ld.d	 r8,s1_ptr,24
+	ld.d	 r6,s2_ptr,24
+	addu.cio r8,r8,r6
+	st	 r8,res_ptr,24
+	addu.cio r9,r9,r7
+	st	 r9,res_ptr,28
+	addu	 s1_ptr,s1_ptr,32
+	addu	 s2_ptr,s2_ptr,32
+	addu	 res_ptr,res_ptr,32
+	bcnd	 ge0,size,Loop2
+
+Lfin2:	addu	 size,size,8-2
+	bcnd	 lt0,size,Lend2
+Loope2:	ld.d	 r8,s1_ptr,0
+	ld.d	 r6,s2_ptr,0
+	addu.cio r8,r8,r6
+	st	 r8,res_ptr,0
+	addu.cio r9,r9,r7
+	st	 r9,res_ptr,4
+	subu	 size,size,2
+	addu	 s1_ptr,s1_ptr,8
+	addu	 s2_ptr,s2_ptr,8
+	addu	 res_ptr,res_ptr,8
+	bcnd	 ge0,size,Loope2
+Lend2:	bb0	 0,size,Lret2
+/* Add last limb */
+Ljone:	ld	 r10,s1_ptr,0
+	ld	 r8,s2_ptr,0
+	addu.cio r6,r10,r8
+	st	 r6,res_ptr,0
+
+Lret2:	jmp.n	 r1
+	addu.ci	 r2,r0,r0		; return carry-out from most sign. limb
diff --git a/mpn/m88k/mc88110/addmul_1.s b/mpn/m88k/mc88110/addmul_1.s
new file mode 100644
index 000000000..2bd6f21af
--- /dev/null
+++ b/mpn/m88k/mc88110/addmul_1.s
@@ -0,0 +1,61 @@
+; mc88110 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r2
+; s1_ptr	r3
+; size		r4
+; s2_limb	r5
+
+	text
+	align	16
+	global	___mpn_addmul_1
+___mpn_addmul_1:
+	lda	 r3,r3[r4]
+	lda	 r8,r2[r4]		; RES_PTR in r8 since r2 is retval
+	subu	 r4,r0,r4
+	addu.co	 r2,r0,r0		; r2 = cy = 0
+
+	ld	 r6,r3[r4]
+	addu	 r4,r4,1
+	subu	 r8,r8,4
+	bcnd.n	 eq0,r4,Lend
+	 mulu.d	 r10,r6,r5
+
+Loop:	ld	 r7,r8[r4]
+	ld	 r6,r3[r4]
+	addu.cio r9,r11,r2
+	addu.ci	 r2,r10,r0
+	addu.co	 r9,r9,r7
+	st	 r9,r8[r4]
+	addu	 r4,r4,1
+	mulu.d	 r10,r6,r5
+	bcnd	 ne0,r4,Loop
+
+Lend:	ld	 r7,r8,0
+	addu.cio r9,r11,r2
+	addu.ci	 r2,r10,r0
+	addu.co	 r9,r9,r7
+	st	 r9,r8,0
+	jmp.n	 r1
+	 addu.ci r2,r2,r0
diff --git a/mpn/m88k/mc88110/mul_1.s b/mpn/m88k/mc88110/mul_1.s
new file mode 100644
index 000000000..151890060
--- /dev/null
+++ b/mpn/m88k/mc88110/mul_1.s
@@ -0,0 +1,59 @@
+; mc88110 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r2
+; s1_ptr	r3
+; size		r4
+; s2_limb	r5
+
+	text
+	align	16
+	global	___mpn_mul_1
+___mpn_mul_1:
+	; Make S1_PTR and RES_PTR point at the end of their blocks
+	; and negate SIZE.
+	lda	 r3,r3[r4]
+	lda	 r8,r2[r4]		; RES_PTR in r8 since r2 is retval
+	subu	 r4,r0,r4
+
+	addu.co	 r2,r0,r0		; r2 = cy = 0
+
+	ld	 r6,r3[r4]
+	addu	 r4,r4,1
+	mulu.d	 r10,r6,r5
+	bcnd.n	 eq0,r4,Lend
+	 subu	 r8,r8,8
+
+Loop:	ld	 r6,r3[r4]
+	addu.cio r9,r11,r2
+	or	 r2,r10,r0		; could be avoided if unrolled
+	addu	 r4,r4,1
+	mulu.d	 r10,r6,r5
+	bcnd.n	 ne0,r4,Loop
+	 st	 r9,r8[r4]
+
+Lend:	addu.cio r9,r11,r2
+	st	 r9,r8,4
+	jmp.n	 r1
+	 addu.ci r2,r10,r0
diff --git a/mpn/m88k/mc88110/sub_n.S b/mpn/m88k/mc88110/sub_n.S
new file mode 100644
index 000000000..685f024fd
--- /dev/null
+++ b/mpn/m88k/mc88110/sub_n.S
@@ -0,0 +1,276 @@
+; mc88110 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+#define res_ptr	r2
+#define s1_ptr	r3
+#define s2_ptr	r4
+#define size	r5
+
+#include "sysdep.h"
+
+	text
+	align	16
+	global	C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+	subu.co	 r0,r0,r0		; set cy flag
+	xor	 r12,s2_ptr,res_ptr
+	bb1	 2,r12,L1
+; **  V1a  **
+L0:	bb0	 2,res_ptr,L_v1		; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+	ld	 r10,s1_ptr,0
+	addu	 s1_ptr,s1_ptr,4
+	ld	 r8,s2_ptr,0
+	addu	 s2_ptr,s2_ptr,4
+	subu	 size,size,1
+	subu.co	 r6,r10,r8
+	st	 r6,res_ptr,0
+	addu	 res_ptr,res_ptr,4
+L_v1:	cmp	 r12,size,2
+	bb1	 lt,r12,Lend2
+
+	ld	 r10,s1_ptr,0
+	ld	 r12,s1_ptr,4
+	ld.d	 r8,s2_ptr,0
+	subu	 size,size,10
+	bcnd	 lt0,size,Lfin1
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+	align	 8
+Loop1:	subu	 size,size,8
+	subu.cio r6,r10,r8
+	ld	 r10,s1_ptr,8
+	subu.cio r7,r12,r9
+	ld	 r12,s1_ptr,12
+	ld.d	 r8,s2_ptr,8
+	st.d	 r6,res_ptr,0
+	subu.cio r6,r10,r8
+	ld	 r10,s1_ptr,16
+	subu.cio r7,r12,r9
+	ld	 r12,s1_ptr,20
+	ld.d	 r8,s2_ptr,16
+	st.d	 r6,res_ptr,8
+	subu.cio r6,r10,r8
+	ld	 r10,s1_ptr,24
+	subu.cio r7,r12,r9
+	ld	 r12,s1_ptr,28
+	ld.d	 r8,s2_ptr,24
+	st.d	 r6,res_ptr,16
+	subu.cio r6,r10,r8
+	ld	 r10,s1_ptr,32
+	subu.cio r7,r12,r9
+	ld	 r12,s1_ptr,36
+	addu	 s1_ptr,s1_ptr,32
+	ld.d	 r8,s2_ptr,32
+	addu	 s2_ptr,s2_ptr,32
+	st.d	 r6,res_ptr,24
+	addu	 res_ptr,res_ptr,32
+	bcnd	 ge0,size,Loop1
+
+Lfin1:	addu	 size,size,8-2
+	bcnd	 lt0,size,Lend1
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:	subu.cio r6,r10,r8
+	ld	 r10,s1_ptr,8
+	subu.cio r7,r12,r9
+	ld	 r12,s1_ptr,12
+	ld.d	 r8,s2_ptr,8
+	st.d	 r6,res_ptr,0
+	subu	 size,size,2
+	addu	 s1_ptr,s1_ptr,8
+	addu	 s2_ptr,s2_ptr,8
+	addu	 res_ptr,res_ptr,8
+	bcnd	 ge0,size,Loope1
+Lend1:	subu.cio r6,r10,r8
+	subu.cio r7,r12,r9
+	st.d	 r6,res_ptr,0
+
+	bb0	 0,size,Lret1
+/* Add last limb */
+	ld	 r10,s1_ptr,8
+	ld	 r8,s2_ptr,8
+	subu.cio r6,r10,r8
+	st	 r6,res_ptr,8
+
+Lret1:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
+	jmp.n	 r1
+	 xor	r2,r2,1
+
+L1:	xor	 r12,s1_ptr,res_ptr
+	bb1	 2,r12,L2
+; **  V1b  **
+	bb0	 2,res_ptr,L_v1b	; branch if res_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+	ld	 r10,s2_ptr,0
+	addu	 s2_ptr,s2_ptr,4
+	ld	 r8,s1_ptr,0
+	addu	 s1_ptr,s1_ptr,4
+	subu	 size,size,1
+	subu.co	 r6,r8,r10
+	st	 r6,res_ptr,0
+	addu	 res_ptr,res_ptr,4
+L_v1b:	cmp	 r12,size,2
+	bb1	 lt,r12,Lend2
+
+	ld	 r10,s2_ptr,0
+	ld	 r12,s2_ptr,4
+	ld.d	 r8,s1_ptr,0
+	subu	 size,size,10
+	bcnd	 lt0,size,Lfin1b
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+	align	 8
+Loop1b:	subu	 size,size,8
+	subu.cio r6,r8,r10
+	ld	 r10,s2_ptr,8
+	subu.cio r7,r9,r12
+	ld	 r12,s2_ptr,12
+	ld.d	 r8,s1_ptr,8
+	st.d	 r6,res_ptr,0
+	subu.cio r6,r8,r10
+	ld	 r10,s2_ptr,16
+	subu.cio r7,r9,r12
+	ld	 r12,s2_ptr,20
+	ld.d	 r8,s1_ptr,16
+	st.d	 r6,res_ptr,8
+	subu.cio r6,r8,r10
+	ld	 r10,s2_ptr,24
+	subu.cio r7,r9,r12
+	ld	 r12,s2_ptr,28
+	ld.d	 r8,s1_ptr,24
+	st.d	 r6,res_ptr,16
+	subu.cio r6,r8,r10
+	ld	 r10,s2_ptr,32
+	subu.cio r7,r9,r12
+	ld	 r12,s2_ptr,36
+	addu	 s2_ptr,s2_ptr,32
+	ld.d	 r8,s1_ptr,32
+	addu	 s1_ptr,s1_ptr,32
+	st.d	 r6,res_ptr,24
+	addu	 res_ptr,res_ptr,32
+	bcnd	 ge0,size,Loop1b
+
+Lfin1b:	addu	 size,size,8-2
+	bcnd	 lt0,size,Lend1b
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subu.cio r6,r8,r10
+	ld	 r10,s2_ptr,8
+	subu.cio r7,r9,r12
+	ld	 r12,s2_ptr,12
+	ld.d	 r8,s1_ptr,8
+	st.d	 r6,res_ptr,0
+	subu	 size,size,2
+	addu	 s1_ptr,s1_ptr,8
+	addu	 s2_ptr,s2_ptr,8
+	addu	 res_ptr,res_ptr,8
+	bcnd	 ge0,size,Loope1b
+Lend1b:	subu.cio r6,r8,r10
+	subu.cio r7,r9,r12
+	st.d	 r6,res_ptr,0
+
+	bb0	 0,size,Lret1b
+/* Add last limb */
+	ld	 r10,s2_ptr,8
+	ld	 r8,s1_ptr,8
+	subu.cio r6,r8,r10
+	st	 r6,res_ptr,8
+
+Lret1b:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
+	jmp.n	 r1
+	 xor	r2,r2,1
+
+; **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:	cmp	 r12,size,1
+	bb1	 eq,r12,Ljone
+	bb0	 2,s1_ptr,L_v2		; branch if s1_ptr is aligned
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+	ld	 r10,s1_ptr,0
+	addu	 s1_ptr,s1_ptr,4
+	ld	 r8,s2_ptr,0
+	addu	 s2_ptr,s2_ptr,4
+	subu	 size,size,1
+	subu.co	 r6,r10,r8
+	st	 r6,res_ptr,0
+	addu	 res_ptr,res_ptr,4
+
+L_v2:	subu	 size,size,8
+	bcnd	 lt0,size,Lfin2
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+	align	 8
+Loop2:	subu	 size,size,8
+	ld.d	 r8,s1_ptr,0
+	ld.d	 r6,s2_ptr,0
+	subu.cio r8,r8,r6
+	st	 r8,res_ptr,0
+	subu.cio r9,r9,r7
+	st	 r9,res_ptr,4
+	ld.d	 r8,s1_ptr,8
+	ld.d	 r6,s2_ptr,8
+	subu.cio r8,r8,r6
+	st	 r8,res_ptr,8
+	subu.cio r9,r9,r7
+	st	 r9,res_ptr,12
+	ld.d	 r8,s1_ptr,16
+	ld.d	 r6,s2_ptr,16
+	subu.cio r8,r8,r6
+	st	 r8,res_ptr,16
+	subu.cio r9,r9,r7
+	st	 r9,res_ptr,20
+	ld.d	 r8,s1_ptr,24
+	ld.d	 r6,s2_ptr,24
+	subu.cio r8,r8,r6
+	st	 r8,res_ptr,24
+	subu.cio r9,r9,r7
+	st	 r9,res_ptr,28
+	addu	 s1_ptr,s1_ptr,32
+	addu	 s2_ptr,s2_ptr,32
+	addu	 res_ptr,res_ptr,32
+	bcnd	 ge0,size,Loop2
+
+Lfin2:	addu	 size,size,8-2
+	bcnd	 lt0,size,Lend2
+Loope2:	ld.d	 r8,s1_ptr,0
+	ld.d	 r6,s2_ptr,0
+	subu.cio r8,r8,r6
+	st	 r8,res_ptr,0
+	subu.cio r9,r9,r7
+	st	 r9,res_ptr,4
+	subu	 size,size,2
+	addu	 s1_ptr,s1_ptr,8
+	addu	 s2_ptr,s2_ptr,8
+	addu	 res_ptr,res_ptr,8
+	bcnd	 ge0,size,Loope2
+Lend2:	bb0	 0,size,Lret2
+/* Add last limb */
+Ljone:	ld	 r10,s1_ptr,0
+	ld	 r8,s2_ptr,0
+	subu.cio r6,r10,r8
+	st	 r6,res_ptr,0
+
+Lret2:	addu.ci r2,r0,r0		; return carry-out from most sign. limb
+	jmp.n	 r1
+	 xor	r2,r2,1
diff --git a/mpn/m88k/mul_1.s b/mpn/m88k/mul_1.s
new file mode 100644
index 000000000..26626bf95
--- /dev/null
+++ b/mpn/m88k/mul_1.s
@@ -0,0 +1,127 @@
+; mc88100 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+; store the product in a second limb vector.
+
+; Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r2
+; s1_ptr	r3
+; size		r4
+; s2_limb	r5
+
+; Common overhead is about 11 cycles/invocation.
+
+; The speed for S2_LIMB >= 0x10000 is approximately 21 cycles/limb.  (The
+; pipeline stalls 2 cycles due to WB contention.)
+
+; The speed for S2_LIMB < 0x10000 is approximately 16 cycles/limb.  (The
+; pipeline stalls 2 cycles due to WB contention and 1 cycle due to latency.)
+
+; To enhance speed:
+; 1. Unroll main loop 4-8 times.
+; 2. Schedule code to avoid WB contention.  It might be tempting to move the
+;    ld instruction in the loops down to save 2 cycles (less WB contention),
+;    but that looses because the ultimate value will be read from outside
+;    the allocated space.  But if we handle the ultimate multiplication in
+;    the tail, we can do this.
+; 3. Make the multiplication with less instructions.  I think the code for
+;    (S2_LIMB >= 0x10000) is not minimal.
+; With these techniques the (S2_LIMB >= 0x10000) case would run in 17 or
+; less cycles/limb; the (S2_LIMB < 0x10000) case would run in 11
+; cycles/limb.  (Assuming infinite unrolling.)
+
+	text
+	align	 16
+	global	 ___mpn_mul_1
+___mpn_mul_1:
+
+	; Make S1_PTR and RES_PTR point at the end of their blocks
+	; and negate SIZE.
+	lda	 r3,r3[r4]
+	lda	 r6,r2[r4]	; RES_PTR in r6 since r2 is retval
+	subu	 r4,r0,r4
+
+	addu.co	 r2,r0,r0	; r2 = cy = 0
+	ld	 r9,r3[r4]
+	mask	 r7,r5,0xffff	; r7 = lo(S2_LIMB)
+	extu	 r8,r5,16	; r8 = hi(S2_LIMB)
+	bcnd.n	 eq0,r8,Lsmall	; jump if (hi(S2_LIMB) == 0)
+	 subu	 r6,r6,4
+
+; General code for any value of S2_LIMB.
+
+	; Make a stack frame and save r25 and r26
+	subu	 r31,r31,16
+	st.d	 r25,r31,8
+
+	; Enter the loop in the middle
+	br.n	L1
+	addu	 r4,r4,1
+
+Loop:	ld	 r9,r3[r4]
+	st	 r26,r6[r4]
+; bcnd	ne0,r0,0		; bubble
+	addu	 r4,r4,1
+L1:	mul	 r26,r9,r5	; low word of product	mul_1	WB ld
+	mask	 r12,r9,0xffff	; r12 = lo(s1_limb)	mask_1
+	mul	 r11,r12,r7	; r11 =  prod_0		mul_2	WB mask_1
+	mul	 r10,r12,r8	; r10 = prod_1a		mul_3
+	extu	 r13,r9,16	; r13 = hi(s1_limb)	extu_1	WB mul_1
+	mul	 r12,r13,r7	; r12 = prod_1b		mul_4	WB extu_1
+	mul	 r25,r13,r8	; r25  = prod_2		mul_5	WB mul_2
+	extu	 r11,r11,16	; r11 = hi(prod_0)	extu_2	WB mul_3
+	addu	 r10,r10,r11	;			addu_1	WB extu_2
+; bcnd	ne0,r0,0		; bubble			WB addu_1
+	addu.co	 r10,r10,r12	;				WB mul_4
+	mask.u	 r10,r10,0xffff	; move the 16 most significant bits...
+	addu.ci	 r10,r10,r0	; ...to the low half of the word...
+	rot	 r10,r10,16	; ...and put carry in pos 16.
+	addu.co	 r26,r26,r2	; add old carry limb
+	bcnd.n	 ne0,r4,Loop
+	 addu.ci r2,r25,r10	; compute new carry limb
+
+	st	 r26,r6[r4]
+	ld.d	 r25,r31,8
+	jmp.n	 r1
+	 addu	 r31,r31,16
+
+; Fast code for S2_LIMB < 0x10000
+Lsmall:
+	; Enter the loop in the middle
+	br.n	SL1
+	addu	 r4,r4,1
+
+SLoop:	ld	 r9,r3[r4]	;
+	st	 r8,r6[r4]	;
+	addu	 r4,r4,1	;
+SL1:	mul	 r8,r9,r5	; low word of product
+	mask	 r12,r9,0xffff	; r12 = lo(s1_limb)
+	extu	 r13,r9,16	; r13 = hi(s1_limb)
+	mul	 r11,r12,r7	; r11 =  prod_0
+	mul	 r12,r13,r7	; r12 = prod_1b
+	addu.cio r8,r8,r2	; add old carry limb
+	extu	 r10,r11,16	; r11 = hi(prod_0)
+	addu	 r10,r10,r12	;
+	bcnd.n	 ne0,r4,SLoop
+	extu	 r2,r10,16	; r2 = new carry limb
+
+	jmp.n	 r1
+	st	 r8,r6[r4]
diff --git a/mpn/m88k/sub_n.s b/mpn/m88k/sub_n.s
new file mode 100644
index 000000000..7dfffc980
--- /dev/null
+++ b/mpn/m88k/sub_n.s
@@ -0,0 +1,106 @@
+; mc88100 __mpn_sub -- Subtract two limb vectors of the same length > 0 and
+; store difference in a third limb vector.
+
+; Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+; This file is part of the GNU MP Library.
+
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Library General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or (at your
+; option) any later version.
+
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+; License for more details.
+
+; You should have received a copy of the GNU Library General Public License
+; along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+; MA 02111-1307, USA.
+
+
+; INPUT PARAMETERS
+; res_ptr	r2
+; s1_ptr	r3
+; s2_ptr	r4
+; size		r5
+
+; This code has been optimized to run one instruction per clock, avoiding
+; load stalls and writeback contention.  As a result, the instruction
+; order is not always natural.
+
+; The speed is about 4.6 clocks/limb + 18 clocks/limb-vector on an 88100,
+; but on the 88110, it seems to run much slower, 6.6 clocks/limb.
+
+	text
+	align	 16
+	global	 ___mpn_sub_n
+___mpn_sub_n:
+	ld	r6,r3,0			; read first limb from s1_ptr
+	extu	r10,r5,3
+	ld	r7,r4,0			; read first limb from s2_ptr
+
+	subu	r5,r0,r5
+	mak	r5,r5,3<4>
+	bcnd.n	eq0,r5,Lzero
+	subu.co	r0,r0,r0		; initialize carry
+
+	or	r12,r0,lo16(Lbase)
+	or.u	r12,r12,hi16(Lbase)
+	addu	r12,r12,r5		; r12 is address for entering in loop
+
+	extu	r5,r5,2			; divide by 4
+	subu	r2,r2,r5		; adjust res_ptr
+	subu	r3,r3,r5		; adjust s1_ptr
+	subu	r4,r4,r5		; adjust s2_ptr
+
+	or	r8,r6,r0
+
+	jmp.n	r12
+	 or	r9,r7,r0
+
+Loop:	addu	r3,r3,32
+	st	r8,r2,28
+	addu	r4,r4,32
+	ld	r6,r3,0
+	addu	r2,r2,32
+	ld	r7,r4,0
+Lzero:	subu	r10,r10,1		; subtract 0 + 8r limbs (adj loop cnt)
+Lbase:	ld	r8,r3,4
+	subu.cio r6,r6,r7
+	ld	r9,r4,4
+	st	r6,r2,0
+	ld	r6,r3,8			; subtract 7 + 8r limbs
+	subu.cio r8,r8,r9
+	ld	r7,r4,8
+	st	r8,r2,4
+	ld	r8,r3,12		; subtract 6 + 8r limbs
+	subu.cio r6,r6,r7
+	ld	r9,r4,12
+	st	r6,r2,8
+	ld	r6,r3,16		; subtract 5 + 8r limbs
+	subu.cio r8,r8,r9
+	ld	r7,r4,16
+	st	r8,r2,12
+	ld	r8,r3,20		; subtract 4 + 8r limbs
+	subu.cio r6,r6,r7
+	ld	r9,r4,20
+	st	r6,r2,16
+	ld	r6,r3,24		; subtract 3 + 8r limbs
+	subu.cio r8,r8,r9
+	ld	r7,r4,24
+	st	r8,r2,20
+	ld	r8,r3,28		; subtract 2 + 8r limbs
+	subu.cio r6,r6,r7
+	ld	r9,r4,28
+	st	r6,r2,24
+	bcnd.n	ne0,r10,Loop		; subtract 1 + 8r limbs
+	 subu.cio r8,r8,r9
+
+	st	r8,r2,28		; store most significant limb
+
+	addu.ci r2,r0,r0		; return carry-out from most sign. limb
+	jmp.n	 r1
+	 xor	r2,r2,1
diff --git a/mpn/mips2/add_n.s b/mpn/mips2/add_n.s
new file mode 100644
index 000000000..f5525cec4
--- /dev/null
+++ b/mpn/mips2/add_n.s
@@ -0,0 +1,120 @@
+ # MIPS2 __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # s2_ptr	$6
+ # size		$7
+
+	.text
+	.align	2
+	.globl	__mpn_add_n
+	.ent	__mpn_add_n
+__mpn_add_n:
+	.set	noreorder
+	.set	nomacro
+
+	lw	$10,0($5)
+	lw	$11,0($6)
+
+	addiu	$7,$7,-1
+	and	$9,$7,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 move	$2,$0
+
+	subu	$7,$7,$9
+
+.Loop0:	addiu	$9,$9,-1
+	lw	$12,4($5)
+	addu	$11,$11,$2
+	lw	$13,4($6)
+	sltu	$8,$11,$2
+	addu	$11,$10,$11
+	sltu	$2,$11,$10
+	sw	$11,0($4)
+	or	$2,$2,$8
+
+	addiu	$5,$5,4
+	addiu	$6,$6,4
+	move	$10,$12
+	move	$11,$13
+	bne	$9,$0,.Loop0
+	 addiu	$4,$4,4
+
+.L0:	beq	$7,$0,.Lend
+	 nop
+
+.Loop:	addiu	$7,$7,-4
+
+	lw	$12,4($5)
+	addu	$11,$11,$2
+	lw	$13,4($6)
+	sltu	$8,$11,$2
+	addu	$11,$10,$11
+	sltu	$2,$11,$10
+	sw	$11,0($4)
+	or	$2,$2,$8
+
+	lw	$10,8($5)
+	addu	$13,$13,$2
+	lw	$11,8($6)
+	sltu	$8,$13,$2
+	addu	$13,$12,$13
+	sltu	$2,$13,$12
+	sw	$13,4($4)
+	or	$2,$2,$8
+
+	lw	$12,12($5)
+	addu	$11,$11,$2
+	lw	$13,12($6)
+	sltu	$8,$11,$2
+	addu	$11,$10,$11
+	sltu	$2,$11,$10
+	sw	$11,8($4)
+	or	$2,$2,$8
+
+	lw	$10,16($5)
+	addu	$13,$13,$2
+	lw	$11,16($6)
+	sltu	$8,$13,$2
+	addu	$13,$12,$13
+	sltu	$2,$13,$12
+	sw	$13,12($4)
+	or	$2,$2,$8
+
+	addiu	$5,$5,16
+	addiu	$6,$6,16
+
+	bne	$7,$0,.Loop
+	 addiu	$4,$4,16
+
+.Lend:	addu	$11,$11,$2
+	sltu	$8,$11,$2
+	addu	$11,$10,$11
+	sltu	$2,$11,$10
+	sw	$11,0($4)
+	j	$31
+	or	$2,$2,$8
+
+	.end	__mpn_add_n
diff --git a/mpn/mips2/addmul_1.s b/mpn/mips2/addmul_1.s
new file mode 100644
index 000000000..6145771e3
--- /dev/null
+++ b/mpn/mips2/addmul_1.s
@@ -0,0 +1,97 @@
+ # MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+ # add the product to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # size		$6
+ # s2_limb	$7
+
+	.text
+	.align	 4
+	.globl	 __mpn_addmul_1
+	.ent	__mpn_addmul_1
+__mpn_addmul_1:
+	.set    noreorder
+	.set    nomacro
+
+ # warm up phase 0
+	lw	$8,0($5)
+
+ # warm up phase 1
+	addiu	$5,$5,4
+	multu	$8,$7
+
+	addiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	addiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	lw	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	lw	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	addiu	$5,$5,4
+	addu	$3,$3,$2	# add old carry limb to low product limb
+	multu	$8,$7
+	lw	$8,0($5)	# load new s1 limb as early as possible
+	addiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$3,$2	# carry from previous addition -> $2
+	addu	$3,$10,$3
+	sltu	$10,$3,$10
+	addu	$2,$2,$10
+	sw	$3,0($4)
+	addiu	$4,$4,4
+	bne	$6,$0,Loop
+	 addu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1:	lw	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	addu	$3,$3,$2
+	sltu	$2,$3,$2
+	multu	$8,$7
+	addu	$3,$10,$3
+	sltu	$10,$3,$10
+	addu	$2,$2,$10
+	sw	$3,0($4)
+	addiu	$4,$4,4
+	addu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0:	lw	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	addu	$3,$3,$2
+	sltu	$2,$3,$2
+	addu	$3,$10,$3
+	sltu	$10,$3,$10
+	addu	$2,$2,$10
+	sw	$3,0($4)
+	j	$31
+	addu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	__mpn_addmul_1
diff --git a/mpn/mips2/lshift.s b/mpn/mips2/lshift.s
new file mode 100644
index 000000000..ee92d7916
--- /dev/null
+++ b/mpn/mips2/lshift.s
@@ -0,0 +1,95 @@
+ # MIPS2 __mpn_lshift --
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # src_ptr	$5
+ # size		$6
+ # cnt		$7
+
+	.text
+	.align	2
+	.globl	__mpn_lshift
+	.ent	__mpn_lshift
+__mpn_lshift:
+	.set	noreorder
+	.set	nomacro
+
+	sll	$2,$6,2
+	addu	$5,$5,$2	# make r5 point at end of src
+	lw	$10,-4($5)	# load first limb
+	subu	$13,$0,$7
+	addu	$4,$4,$2	# make r4 point at end of res
+	addiu	$6,$6,-1
+	and	$9,$6,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 srl	$2,$10,$13	# compute function result
+
+	subu	$6,$6,$9
+
+.Loop0:	lw	$3,-8($5)
+	addiu	$4,$4,-4
+	addiu	$5,$5,-4
+	addiu	$9,$9,-1
+	sll	$11,$10,$7
+	srl	$12,$3,$13
+	move	$10,$3
+	or	$8,$11,$12
+	bne	$9,$0,.Loop0
+	 sw	$8,0($4)
+
+.L0:	beq	$6,$0,.Lend
+	 nop
+
+.Loop:	lw	$3,-8($5)
+	addiu	$4,$4,-16
+	addiu	$6,$6,-4
+	sll	$11,$10,$7
+	srl	$12,$3,$13
+
+	lw	$10,-12($5)
+	sll	$14,$3,$7
+	or	$8,$11,$12
+	sw	$8,12($4)
+	srl	$9,$10,$13
+
+	lw	$3,-16($5)
+	sll	$11,$10,$7
+	or	$8,$14,$9
+	sw	$8,8($4)
+	srl	$12,$3,$13
+
+	lw	$10,-20($5)
+	sll	$14,$3,$7
+	or	$8,$11,$12
+	sw	$8,4($4)
+	srl	$9,$10,$13
+
+	addiu	$5,$5,-16
+	or	$8,$14,$9
+	bgtz	$6,.Loop
+	 sw	$8,0($4)
+
+.Lend:	sll	$8,$10,$7
+	j	$31
+	sw	$8,-4($4)
+	.end	__mpn_lshift
diff --git a/mpn/mips2/mul_1.s b/mpn/mips2/mul_1.s
new file mode 100644
index 000000000..d006fa122
--- /dev/null
+++ b/mpn/mips2/mul_1.s
@@ -0,0 +1,85 @@
+ # MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
+ # store the product in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # size		$6
+ # s2_limb	$7
+
+	.text
+	.align	 4
+	.globl	 __mpn_mul_1
+	.ent	__mpn_mul_1
+__mpn_mul_1:
+	.set    noreorder
+	.set    nomacro
+
+ # warm up phase 0
+	lw	$8,0($5)
+
+ # warm up phase 1
+	addiu	$5,$5,4
+	multu	$8,$7
+
+	addiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	addiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	lw	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	mflo	$10
+	mfhi	$9
+	addiu	$5,$5,4
+	addu	$10,$10,$2	# add old carry limb to low product limb
+	multu	$8,$7
+	lw	$8,0($5)	# load new s1 limb as early as possible
+	addiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$10,$2	# carry from previous addition -> $2
+	sw	$10,0($4)
+	addiu	$4,$4,4
+	bne	$6,$0,Loop
+	 addu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1:	mflo	$10
+	mfhi	$9
+	addu	$10,$10,$2
+	sltu	$2,$10,$2
+	multu	$8,$7
+	sw	$10,0($4)
+	addiu	$4,$4,4
+	addu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0:	mflo	$10
+	mfhi	$9
+	addu	$10,$10,$2
+	sltu	$2,$10,$2
+	sw	$10,0($4)
+	j	$31
+	addu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	__mpn_mul_1
diff --git a/mpn/mips2/rshift.s b/mpn/mips2/rshift.s
new file mode 100644
index 000000000..a8beb4057
--- /dev/null
+++ b/mpn/mips2/rshift.s
@@ -0,0 +1,92 @@
+ # MIPS2 __mpn_rshift --
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # src_ptr	$5
+ # size		$6
+ # cnt		$7
+
+	.text
+	.align	2
+	.globl	__mpn_rshift
+	.ent	__mpn_rshift
+__mpn_rshift:
+	.set	noreorder
+	.set	nomacro
+
+	lw	$10,0($5)	# load first limb
+	subu	$13,$0,$7
+	addiu	$6,$6,-1
+	and	$9,$6,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 sll	$2,$10,$13	# compute function result
+
+	subu	$6,$6,$9
+
+.Loop0:	lw	$3,4($5)
+	addiu	$4,$4,4
+	addiu	$5,$5,4
+	addiu	$9,$9,-1
+	srl	$11,$10,$7
+	sll	$12,$3,$13
+	move	$10,$3
+	or	$8,$11,$12
+	bne	$9,$0,.Loop0
+	 sw	$8,-4($4)
+
+.L0:	beq	$6,$0,.Lend
+	 nop
+
+.Loop:	lw	$3,4($5)
+	addiu	$4,$4,16
+	addiu	$6,$6,-4
+	srl	$11,$10,$7
+	sll	$12,$3,$13
+
+	lw	$10,8($5)
+	srl	$14,$3,$7
+	or	$8,$11,$12
+	sw	$8,-16($4)
+	sll	$9,$10,$13
+
+	lw	$3,12($5)
+	srl	$11,$10,$7
+	or	$8,$14,$9
+	sw	$8,-12($4)
+	sll	$12,$3,$13
+
+	lw	$10,16($5)
+	srl	$14,$3,$7
+	or	$8,$11,$12
+	sw	$8,-8($4)
+	sll	$9,$10,$13
+
+	addiu	$5,$5,16
+	or	$8,$14,$9
+	bgtz	$6,.Loop
+	 sw	$8,-4($4)
+
+.Lend:	srl	$8,$10,$7
+	j	$31
+	sw	$8,0($4)
+	.end	__mpn_rshift
diff --git a/mpn/mips2/sub_n.s b/mpn/mips2/sub_n.s
new file mode 100644
index 000000000..3368ef29d
--- /dev/null
+++ b/mpn/mips2/sub_n.s
@@ -0,0 +1,120 @@
+ # MIPS2 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # s2_ptr	$6
+ # size		$7
+
+	.text
+	.align	2
+	.globl	__mpn_sub_n
+	.ent	__mpn_sub_n
+__mpn_sub_n:
+	.set	noreorder
+	.set	nomacro
+
+	lw	$10,0($5)
+	lw	$11,0($6)
+
+	addiu	$7,$7,-1
+	and	$9,$7,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 move	$2,$0
+
+	subu	$7,$7,$9
+
+.Loop0:	addiu	$9,$9,-1
+	lw	$12,4($5)
+	addu	$11,$11,$2
+	lw	$13,4($6)
+	sltu	$8,$11,$2
+	subu	$11,$10,$11
+	sltu	$2,$10,$11
+	sw	$11,0($4)
+	or	$2,$2,$8
+
+	addiu	$5,$5,4
+	addiu	$6,$6,4
+	move	$10,$12
+	move	$11,$13
+	bne	$9,$0,.Loop0
+	 addiu	$4,$4,4
+
+.L0:	beq	$7,$0,.Lend
+	 nop
+
+.Loop:	addiu	$7,$7,-4
+
+	lw	$12,4($5)
+	addu	$11,$11,$2
+	lw	$13,4($6)
+	sltu	$8,$11,$2
+	subu	$11,$10,$11
+	sltu	$2,$10,$11
+	sw	$11,0($4)
+	or	$2,$2,$8
+
+	lw	$10,8($5)
+	addu	$13,$13,$2
+	lw	$11,8($6)
+	sltu	$8,$13,$2
+	subu	$13,$12,$13
+	sltu	$2,$12,$13
+	sw	$13,4($4)
+	or	$2,$2,$8
+
+	lw	$12,12($5)
+	addu	$11,$11,$2
+	lw	$13,12($6)
+	sltu	$8,$11,$2
+	subu	$11,$10,$11
+	sltu	$2,$10,$11
+	sw	$11,8($4)
+	or	$2,$2,$8
+
+	lw	$10,16($5)
+	addu	$13,$13,$2
+	lw	$11,16($6)
+	sltu	$8,$13,$2
+	subu	$13,$12,$13
+	sltu	$2,$12,$13
+	sw	$13,12($4)
+	or	$2,$2,$8
+
+	addiu	$5,$5,16
+	addiu	$6,$6,16
+
+	bne	$7,$0,.Loop
+	 addiu	$4,$4,16
+
+.Lend:	addu	$11,$11,$2
+	sltu	$8,$11,$2
+	subu	$11,$10,$11
+	sltu	$2,$10,$11
+	sw	$11,0($4)
+	j	$31
+	or	$2,$2,$8
+
+	.end	__mpn_sub_n
diff --git a/mpn/mips2/submul_1.s b/mpn/mips2/submul_1.s
new file mode 100644
index 000000000..1324b6609
--- /dev/null
+++ b/mpn/mips2/submul_1.s
@@ -0,0 +1,97 @@
+ # MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
+ # subtract the product from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # size		$6
+ # s2_limb	$7
+
+	.text
+	.align	 4
+	.globl	 __mpn_submul_1
+	.ent	__mpn_submul_1
+__mpn_submul_1:
+	.set    noreorder
+	.set    nomacro
+
+ # warm up phase 0
+	lw	$8,0($5)
+
+ # warm up phase 1
+	addiu	$5,$5,4
+	multu	$8,$7
+
+	addiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	addiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	lw	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	lw	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	addiu	$5,$5,4
+	addu	$3,$3,$2	# add old carry limb to low product limb
+	multu	$8,$7
+	lw	$8,0($5)	# load new s1 limb as early as possible
+	addiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$3,$2	# carry from previous addition -> $2
+	subu	$3,$10,$3
+	sgtu	$10,$3,$10
+	addu	$2,$2,$10
+	sw	$3,0($4)
+	addiu	$4,$4,4
+	bne	$6,$0,Loop
+	 addu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1:	lw	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	addu	$3,$3,$2
+	sltu	$2,$3,$2
+	multu	$8,$7
+	subu	$3,$10,$3
+	sgtu	$10,$3,$10
+	addu	$2,$2,$10
+	sw	$3,0($4)
+	addiu	$4,$4,4
+	addu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0:	lw	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	addu	$3,$3,$2
+	sltu	$2,$3,$2
+	subu	$3,$10,$3
+	sgtu	$10,$3,$10
+	addu	$2,$2,$10
+	sw	$3,0($4)
+	j	$31
+	addu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	__mpn_submul_1
diff --git a/mpn/mips3/README b/mpn/mips3/README
new file mode 100644
index 000000000..e94b2c746
--- /dev/null
+++ b/mpn/mips3/README
@@ -0,0 +1,23 @@
+This directory contains mpn functions optimized for MIPS3.  Example of
+processors that implement MIPS3 are R4000, R4400, R4600, R4700, and R8000.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. On the R4000 and R4400, branches, both the plain and the "likely" ones,
+   take 3 cycles to execute.  (The fastest possible loop will take 4 cycles,
+   because of the delay insn.)
+
+   On the R4600, branches takes a single cycle
+
+   On the R8000, branches often take no noticable cycles, as they are
+   executed in a separate function unit..
+
+2. The R4000 and R4400 have a load latency of 4 cycles.
+
+3. On the R4000 and R4400, multiplies take a data-dependent number of
+   cycles, contrary to the SGI documentation.  There seem to be 3 or 4
+   possible latencies.
+
+STATUS
+
+Good...
diff --git a/mpn/mips3/add_n.s b/mpn/mips3/add_n.s
new file mode 100644
index 000000000..996a449eb
--- /dev/null
+++ b/mpn/mips3/add_n.s
@@ -0,0 +1,120 @@
+ # MIPS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # s2_ptr	$6
+ # size		$7
+
+	.text
+	.align	2
+	.globl	__mpn_add_n
+	.ent	__mpn_add_n
+__mpn_add_n:
+	.set	noreorder
+	.set	nomacro
+
+	ld	$10,0($5)
+	ld	$11,0($6)
+
+	daddiu	$7,$7,-1
+	and	$9,$7,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 move	$2,$0
+
+	dsubu	$7,$7,$9
+
+.Loop0:	daddiu	$9,$9,-1
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,8
+	daddiu	$6,$6,8
+	move	$10,$12
+	move	$11,$13
+	bne	$9,$0,.Loop0
+	 daddiu	$4,$4,8
+
+.L0:	beq	$7,$0,.Lend
+	 nop
+
+.Loop:	daddiu	$7,$7,-4
+
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	ld	$10,16($5)
+	daddu	$13,$13,$2
+	ld	$11,16($6)
+	sltu	$8,$13,$2
+	daddu	$13,$12,$13
+	sltu	$2,$13,$12
+	sd	$13,8($4)
+	or	$2,$2,$8
+
+	ld	$12,24($5)
+	daddu	$11,$11,$2
+	ld	$13,24($6)
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,16($4)
+	or	$2,$2,$8
+
+	ld	$10,32($5)
+	daddu	$13,$13,$2
+	ld	$11,32($6)
+	sltu	$8,$13,$2
+	daddu	$13,$12,$13
+	sltu	$2,$13,$12
+	sd	$13,24($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,32
+	daddiu	$6,$6,32
+
+	bne	$7,$0,.Loop
+	 daddiu	$4,$4,32
+
+.Lend:	daddu	$11,$11,$2
+	sltu	$8,$11,$2
+	daddu	$11,$10,$11
+	sltu	$2,$11,$10
+	sd	$11,0($4)
+	j	$31
+	or	$2,$2,$8
+
+	.end	__mpn_add_n
diff --git a/mpn/mips3/addmul_1.s b/mpn/mips3/addmul_1.s
new file mode 100644
index 000000000..cd75c1801
--- /dev/null
+++ b/mpn/mips3/addmul_1.s
@@ -0,0 +1,97 @@
+ # MIPS3 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+ # add the product to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # size		$6
+ # s2_limb	$7
+
+	.text
+	.align	4
+	.globl	__mpn_addmul_1
+	.ent	__mpn_addmul_1
+__mpn_addmul_1:
+	.set    noreorder
+	.set    nomacro
+
+ # warm up phase 0
+	ld	$8,0($5)
+
+ # warm up phase 1
+	daddiu	$5,$5,8
+	dmultu	$8,$7
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	ld	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddiu	$5,$5,8
+	daddu	$3,$3,$2	# add old carry limb to low product limb
+	dmultu	$8,$7
+	ld	$8,0($5)	# load new s1 limb as early as possible
+	daddiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$3,$2	# carry from previous addition -> $2
+	daddu	$3,$10,$3
+	sltu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	bne	$6,$0,Loop
+	 daddu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	dmultu	$8,$7
+	daddu	$3,$10,$3
+	sltu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	daddu	$3,$10,$3
+	sltu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	j	$31
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	__mpn_addmul_1
diff --git a/mpn/mips3/gmp-mparam.h b/mpn/mips3/gmp-mparam.h
new file mode 100644
index 000000000..f3df7ff6e
--- /dev/null
+++ b/mpn/mips3/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/mpn/mips3/lshift.s b/mpn/mips3/lshift.s
new file mode 100644
index 000000000..324a6020c
--- /dev/null
+++ b/mpn/mips3/lshift.s
@@ -0,0 +1,95 @@
+ # MIPS3 __mpn_lshift --
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # src_ptr	$5
+ # size		$6
+ # cnt		$7
+
+	.text
+	.align	2
+	.globl	__mpn_lshift
+	.ent	__mpn_lshift
+__mpn_lshift:
+	.set	noreorder
+	.set	nomacro
+
+	dsll	$2,$6,3
+	daddu	$5,$5,$2	# make r5 point at end of src
+	ld	$10,-8($5)	# load first limb
+	dsubu	$13,$0,$7
+	daddu	$4,$4,$2	# make r4 point at end of res
+	daddiu	$6,$6,-1
+	and	$9,$6,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 dsrl	$2,$10,$13	# compute function result
+
+	dsubu	$6,$6,$9
+
+.Loop0:	ld	$3,-16($5)
+	daddiu	$4,$4,-8
+	daddiu	$5,$5,-8
+	daddiu	$9,$9,-1
+	dsll	$11,$10,$7
+	dsrl	$12,$3,$13
+	move	$10,$3
+	or	$8,$11,$12
+	bne	$9,$0,.Loop0
+	 sd	$8,0($4)
+
+.L0:	beq	$6,$0,.Lend
+	 nop
+
+.Loop:	ld	$3,-16($5)
+	daddiu	$4,$4,-32
+	daddiu	$6,$6,-4
+	dsll	$11,$10,$7
+	dsrl	$12,$3,$13
+
+	ld	$10,-24($5)
+	dsll	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,24($4)
+	dsrl	$9,$10,$13
+
+	ld	$3,-32($5)
+	dsll	$11,$10,$7
+	or	$8,$14,$9
+	sd	$8,16($4)
+	dsrl	$12,$3,$13
+
+	ld	$10,-40($5)
+	dsll	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,8($4)
+	dsrl	$9,$10,$13
+
+	daddiu	$5,$5,-32
+	or	$8,$14,$9
+	bgtz	$6,.Loop
+	 sd	$8,0($4)
+
+.Lend:	dsll	$8,$10,$7
+	j	$31
+	sd	$8,-8($4)
+	.end	__mpn_lshift
diff --git a/mpn/mips3/mul_1.s b/mpn/mips3/mul_1.s
new file mode 100644
index 000000000..281d0574a
--- /dev/null
+++ b/mpn/mips3/mul_1.s
@@ -0,0 +1,85 @@
+ # MIPS3 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+ # store the product in a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # size		$6
+ # s2_limb	$7
+
+	.text
+	.align	4
+	.globl	__mpn_mul_1
+	.ent	__mpn_mul_1
+__mpn_mul_1:
+	.set    noreorder
+	.set    nomacro
+
+ # warm up phase 0
+	ld	$8,0($5)
+
+ # warm up phase 1
+	daddiu	$5,$5,8
+	dmultu	$8,$7
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	ld	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	mflo	$10
+	mfhi	$9
+	daddiu	$5,$5,8
+	daddu	$10,$10,$2	# add old carry limb to low product limb
+	dmultu	$8,$7
+	ld	$8,0($5)	# load new s1 limb as early as possible
+	daddiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$10,$2	# carry from previous addition -> $2
+	sd	$10,0($4)
+	daddiu	$4,$4,8
+	bne	$6,$0,Loop
+	 daddu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1:	mflo	$10
+	mfhi	$9
+	daddu	$10,$10,$2
+	sltu	$2,$10,$2
+	dmultu	$8,$7
+	sd	$10,0($4)
+	daddiu	$4,$4,8
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0:	mflo	$10
+	mfhi	$9
+	daddu	$10,$10,$2
+	sltu	$2,$10,$2
+	sd	$10,0($4)
+	j	$31
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	__mpn_mul_1
diff --git a/mpn/mips3/rshift.s b/mpn/mips3/rshift.s
new file mode 100644
index 000000000..9920e1a9e
--- /dev/null
+++ b/mpn/mips3/rshift.s
@@ -0,0 +1,92 @@
+ # MIPS3 __mpn_rshift --
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # src_ptr	$5
+ # size		$6
+ # cnt		$7
+
+	.text
+	.align	2
+	.globl	__mpn_rshift
+	.ent	__mpn_rshift
+__mpn_rshift:
+	.set	noreorder
+	.set	nomacro
+
+	ld	$10,0($5)	# load first limb
+	dsubu	$13,$0,$7
+	daddiu	$6,$6,-1
+	and	$9,$6,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 dsll	$2,$10,$13	# compute function result
+
+	dsubu	$6,$6,$9
+
+.Loop0:	ld	$3,8($5)
+	daddiu	$4,$4,8
+	daddiu	$5,$5,8
+	daddiu	$9,$9,-1
+	dsrl	$11,$10,$7
+	dsll	$12,$3,$13
+	move	$10,$3
+	or	$8,$11,$12
+	bne	$9,$0,.Loop0
+	 sd	$8,-8($4)
+
+.L0:	beq	$6,$0,.Lend
+	 nop
+
+.Loop:	ld	$3,8($5)
+	daddiu	$4,$4,32
+	daddiu	$6,$6,-4
+	dsrl	$11,$10,$7
+	dsll	$12,$3,$13
+
+	ld	$10,16($5)
+	dsrl	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,-32($4)
+	dsll	$9,$10,$13
+
+	ld	$3,24($5)
+	dsrl	$11,$10,$7
+	or	$8,$14,$9
+	sd	$8,-24($4)
+	dsll	$12,$3,$13
+
+	ld	$10,32($5)
+	dsrl	$14,$3,$7
+	or	$8,$11,$12
+	sd	$8,-16($4)
+	dsll	$9,$10,$13
+
+	daddiu	$5,$5,32
+	or	$8,$14,$9
+	bgtz	$6,.Loop
+	 sd	$8,-8($4)
+
+.Lend:	dsrl	$8,$10,$7
+	j	$31
+	sd	$8,0($4)
+	.end	__mpn_rshift
diff --git a/mpn/mips3/sub_n.s b/mpn/mips3/sub_n.s
new file mode 100644
index 000000000..56c77d8bc
--- /dev/null
+++ b/mpn/mips3/sub_n.s
@@ -0,0 +1,120 @@
+ # MIPS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # s2_ptr	$6
+ # size		$7
+
+	.text
+	.align	2
+	.globl	__mpn_sub_n
+	.ent	__mpn_sub_n
+__mpn_sub_n:
+	.set	noreorder
+	.set	nomacro
+
+	ld	$10,0($5)
+	ld	$11,0($6)
+
+	daddiu	$7,$7,-1
+	and	$9,$7,4-1	# number of limbs in first loop
+	beq	$9,$0,.L0	# if multiple of 4 limbs, skip first loop
+	 move	$2,$0
+
+	dsubu	$7,$7,$9
+
+.Loop0:	daddiu	$9,$9,-1
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,8
+	daddiu	$6,$6,8
+	move	$10,$12
+	move	$11,$13
+	bne	$9,$0,.Loop0
+	 daddiu	$4,$4,8
+
+.L0:	beq	$7,$0,.Lend
+	 nop
+
+.Loop:	daddiu	$7,$7,-4
+
+	ld	$12,8($5)
+	daddu	$11,$11,$2
+	ld	$13,8($6)
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,0($4)
+	or	$2,$2,$8
+
+	ld	$10,16($5)
+	daddu	$13,$13,$2
+	ld	$11,16($6)
+	sltu	$8,$13,$2
+	dsubu	$13,$12,$13
+	sltu	$2,$12,$13
+	sd	$13,8($4)
+	or	$2,$2,$8
+
+	ld	$12,24($5)
+	daddu	$11,$11,$2
+	ld	$13,24($6)
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,16($4)
+	or	$2,$2,$8
+
+	ld	$10,32($5)
+	daddu	$13,$13,$2
+	ld	$11,32($6)
+	sltu	$8,$13,$2
+	dsubu	$13,$12,$13
+	sltu	$2,$12,$13
+	sd	$13,24($4)
+	or	$2,$2,$8
+
+	daddiu	$5,$5,32
+	daddiu	$6,$6,32
+
+	bne	$7,$0,.Loop
+	 daddiu	$4,$4,32
+
+.Lend:	daddu	$11,$11,$2
+	sltu	$8,$11,$2
+	dsubu	$11,$10,$11
+	sltu	$2,$10,$11
+	sd	$11,0($4)
+	j	$31
+	or	$2,$2,$8
+
+	.end	__mpn_sub_n
diff --git a/mpn/mips3/submul_1.s b/mpn/mips3/submul_1.s
new file mode 100644
index 000000000..a9c9fa251
--- /dev/null
+++ b/mpn/mips3/submul_1.s
@@ -0,0 +1,97 @@
+ # MIPS3 __mpn_submul_1 -- Multiply a limb vector with a single limb and
+ # subtract the product from a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr	$4
+ # s1_ptr	$5
+ # size		$6
+ # s2_limb	$7
+
+	.text
+	.align	4
+	.globl	__mpn_submul_1
+	.ent	__mpn_submul_1
+__mpn_submul_1:
+	.set    noreorder
+	.set    nomacro
+
+ # warm up phase 0
+	ld	$8,0($5)
+
+ # warm up phase 1
+	daddiu	$5,$5,8
+	dmultu	$8,$7
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC0
+	 move	$2,$0		# zero cy2
+
+	daddiu	$6,$6,-1
+	beq	$6,$0,$LC1
+	ld	$8,0($5)	# load new s1 limb as early as possible
+
+Loop:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddiu	$5,$5,8
+	daddu	$3,$3,$2	# add old carry limb to low product limb
+	dmultu	$8,$7
+	ld	$8,0($5)	# load new s1 limb as early as possible
+	daddiu	$6,$6,-1	# decrement loop counter
+	sltu	$2,$3,$2	# carry from previous addition -> $2
+	dsubu	$3,$10,$3
+	sgtu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	bne	$6,$0,Loop
+	 daddu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 1
+$LC1:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	dmultu	$8,$7
+	dsubu	$3,$10,$3
+	sgtu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	daddiu	$4,$4,8
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+ # cool down phase 0
+$LC0:	ld	$10,0($4)
+	mflo	$3
+	mfhi	$9
+	daddu	$3,$3,$2
+	sltu	$2,$3,$2
+	dsubu	$3,$10,$3
+	sgtu	$10,$3,$10
+	daddu	$2,$2,$10
+	sd	$3,0($4)
+	j	$31
+	daddu	$2,$9,$2	# add high product limb and carry from addition
+
+	.end	__mpn_submul_1
diff --git a/mpn/mp_bases.c b/mpn/mp_bases.c
new file mode 100644
index 000000000..bbe39b029
--- /dev/null
+++ b/mpn/mp_bases.c
@@ -0,0 +1,549 @@
+/* __mp_bases -- Structure for conversion between internal binary
+   format and strings in base 2..255.  The fields are explained in
+   gmp-impl.h.
+
+
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if BITS_PER_MP_LIMB == 32
+const struct bases __mp_bases[256] =
+{
+  /*  0 */ {0, 0.0, 0, 0},
+  /*  1 */ {0, 1e38, 0, 0},
+  /*  2 */ {32, 1.00000000, 0x1, 0x0},
+  /*  3 */ {20, 0.63092975, 0xcfd41b91, 0x3b563c24},
+  /*  4 */ {16, 0.50000000, 0x2, 0x0},
+  /*  5 */ {13, 0.43067656, 0x48c27395, 0xc25c2684},
+  /*  6 */ {12, 0.38685281, 0x81bf1000, 0xf91bd1b6},
+  /*  7 */ {11, 0.35620719, 0x75db9c97, 0x1607a2cb},
+  /*  8 */ {10, 0.33333333, 0x3, 0x0},
+  /*  9 */ {10, 0.31546488, 0xcfd41b91, 0x3b563c24},
+  /* 10 */ {9, 0.30103000, 0x3b9aca00, 0x12e0be82},
+  /* 11 */ {9, 0.28906483, 0x8c8b6d2b, 0xd24cde04},
+  /* 12 */ {8, 0.27894295, 0x19a10000, 0x3fa39ab5},
+  /* 13 */ {8, 0.27023815, 0x309f1021, 0x50f8ac5f},
+  /* 14 */ {8, 0.26264954, 0x57f6c100, 0x74843b1e},
+  /* 15 */ {8, 0.25595802, 0x98c29b81, 0xad0326c2},
+  /* 16 */ {8, 0.25000000, 0x4, 0x0},
+  /* 17 */ {7, 0.24465054, 0x18754571, 0x4ef0b6bd},
+  /* 18 */ {7, 0.23981247, 0x247dbc80, 0xc0fc48a1},
+  /* 19 */ {7, 0.23540891, 0x3547667b, 0x33838942},
+  /* 20 */ {7, 0.23137821, 0x4c4b4000, 0xad7f29ab},
+  /* 21 */ {7, 0.22767025, 0x6b5a6e1d, 0x313c3d15},
+  /* 22 */ {7, 0.22424382, 0x94ace180, 0xb8cca9e0},
+  /* 23 */ {7, 0.22106473, 0xcaf18367, 0x42ed6de9},
+  /* 24 */ {6, 0.21810429, 0xb640000, 0x67980e0b},
+  /* 25 */ {6, 0.21533828, 0xe8d4a51, 0x19799812},
+  /* 26 */ {6, 0.21274605, 0x1269ae40, 0xbce85396},
+  /* 27 */ {6, 0.21030992, 0x17179149, 0x62c103a9},
+  /* 28 */ {6, 0.20801460, 0x1cb91000, 0x1d353d43},
+  /* 29 */ {6, 0.20584683, 0x23744899, 0xce1decea},
+  /* 30 */ {6, 0.20379505, 0x2b73a840, 0x790fc511},
+  /* 31 */ {6, 0.20184909, 0x34e63b41, 0x35b865a0},
+  /* 32 */ {6, 0.20000000, 0x5, 0x0},
+  /* 33 */ {6, 0.19823986, 0x4cfa3cc1, 0xa9aed1b3},
+  /* 34 */ {6, 0.19656163, 0x5c13d840, 0x63dfc229},
+  /* 35 */ {6, 0.19495902, 0x6d91b519, 0x2b0fee30},
+  /* 36 */ {6, 0.19342640, 0x81bf1000, 0xf91bd1b6},
+  /* 37 */ {6, 0.19195872, 0x98ede0c9, 0xac89c3a9},
+  /* 38 */ {6, 0.19055141, 0xb3773e40, 0x6d2c32fe},
+  /* 39 */ {6, 0.18920036, 0xd1bbc4d1, 0x387907c9},
+  /* 40 */ {6, 0.18790182, 0xf4240000, 0xc6f7a0b},
+  /* 41 */ {5, 0.18665241, 0x6e7d349, 0x28928154},
+  /* 42 */ {5, 0.18544902, 0x7ca30a0, 0x6e8629d},
+  /* 43 */ {5, 0.18428883, 0x8c32bbb, 0xd373dca0},
+  /* 44 */ {5, 0.18316925, 0x9d46c00, 0xa0b17895},
+  /* 45 */ {5, 0.18208790, 0xaffacfd, 0x746811a5},
+  /* 46 */ {5, 0.18104260, 0xc46bee0, 0x4da6500f},
+  /* 47 */ {5, 0.18003133, 0xdab86ef, 0x2ba23582},
+  /* 48 */ {5, 0.17905223, 0xf300000, 0xdb20a88},
+  /* 49 */ {5, 0.17810359, 0x10d63af1, 0xe68d5ce4},
+  /* 50 */ {5, 0.17718382, 0x12a05f20, 0xb7cdfd9d},
+  /* 51 */ {5, 0.17629143, 0x1490aae3, 0x8e583933},
+  /* 52 */ {5, 0.17542506, 0x16a97400, 0x697cc3ea},
+  /* 53 */ {5, 0.17458343, 0x18ed2825, 0x48a5ca6c},
+  /* 54 */ {5, 0.17376534, 0x1b5e4d60, 0x2b52db16},
+  /* 55 */ {5, 0.17296969, 0x1dff8297, 0x111586a6},
+  /* 56 */ {5, 0.17219543, 0x20d38000, 0xf31d2b36},
+  /* 57 */ {5, 0.17144160, 0x23dd1799, 0xc8d76d19},
+  /* 58 */ {5, 0.17070728, 0x271f35a0, 0xa2cb1eb4},
+  /* 59 */ {5, 0.16999162, 0x2a9ce10b, 0x807c3ec3},
+  /* 60 */ {5, 0.16929381, 0x2e593c00, 0x617ec8bf},
+  /* 61 */ {5, 0.16861310, 0x3257844d, 0x45746cbe},
+  /* 62 */ {5, 0.16794878, 0x369b13e0, 0x2c0aa273},
+  /* 63 */ {5, 0.16730018, 0x3b27613f, 0x14f90805},
+  /* 64 */ {5, 0.16666667, 0x6, 0x0},
+  /* 65 */ {5, 0.16604765, 0x4528a141, 0xd9cf0829},
+  /* 66 */ {5, 0.16544255, 0x4aa51420, 0xb6fc4841},
+  /* 67 */ {5, 0.16485086, 0x50794633, 0x973054cb},
+  /* 68 */ {5, 0.16427205, 0x56a94400, 0x7a1dbe4b},
+  /* 69 */ {5, 0.16370566, 0x5d393975, 0x5f7fcd7f},
+  /* 70 */ {5, 0.16315122, 0x642d7260, 0x47196c84},
+  /* 71 */ {5, 0.16260831, 0x6b8a5ae7, 0x30b43635},
+  /* 72 */ {5, 0.16207652, 0x73548000, 0x1c1fa5f6},
+  /* 73 */ {5, 0.16155547, 0x7b908fe9, 0x930634a},
+  /* 74 */ {5, 0.16104477, 0x84435aa0, 0xef7f4a3c},
+  /* 75 */ {5, 0.16054409, 0x8d71d25b, 0xcf5552d2},
+  /* 76 */ {5, 0.16005307, 0x97210c00, 0xb1a47c8e},
+  /* 77 */ {5, 0.15957142, 0xa1563f9d, 0x9634b43e},
+  /* 78 */ {5, 0.15909881, 0xac16c8e0, 0x7cd3817d},
+  /* 79 */ {5, 0.15863496, 0xb768278f, 0x65536761},
+  /* 80 */ {5, 0.15817959, 0xc3500000, 0x4f8b588e},
+  /* 81 */ {5, 0.15773244, 0xcfd41b91, 0x3b563c24},
+  /* 82 */ {5, 0.15729325, 0xdcfa6920, 0x28928154},
+  /* 83 */ {5, 0.15686177, 0xeac8fd83, 0x1721bfb0},
+  /* 84 */ {5, 0.15643779, 0xf9461400, 0x6e8629d},
+  /* 85 */ {4, 0.15602107, 0x31c84b1, 0x491cc17c},
+  /* 86 */ {4, 0.15561139, 0x342ab10, 0x3a11d83b},
+  /* 87 */ {4, 0.15520856, 0x36a2c21, 0x2be074cd},
+  /* 88 */ {4, 0.15481238, 0x3931000, 0x1e7a02e7},
+  /* 89 */ {4, 0.15442266, 0x3bd5ee1, 0x11d10edd},
+  /* 90 */ {4, 0.15403922, 0x3e92110, 0x5d92c68},
+  /* 91 */ {4, 0.15366189, 0x4165ef1, 0xf50dbfb2},
+  /* 92 */ {4, 0.15329049, 0x4452100, 0xdf9f1316},
+  /* 93 */ {4, 0.15292487, 0x4756fd1, 0xcb52a684},
+  /* 94 */ {4, 0.15256487, 0x4a75410, 0xb8163e97},
+  /* 95 */ {4, 0.15221035, 0x4dad681, 0xa5d8f269},
+  /* 96 */ {4, 0.15186115, 0x5100000, 0x948b0fcd},
+  /* 97 */ {4, 0.15151715, 0x546d981, 0x841e0215},
+  /* 98 */ {4, 0.15117821, 0x57f6c10, 0x74843b1e},
+  /* 99 */ {4, 0.15084420, 0x5b9c0d1, 0x65b11e6e},
+  /* 100 */ {4, 0.15051500, 0x5f5e100, 0x5798ee23},
+  /* 101 */ {4, 0.15019048, 0x633d5f1, 0x4a30b99b},
+  /* 102 */ {4, 0.14987054, 0x673a910, 0x3d6e4d94},
+  /* 103 */ {4, 0.14955506, 0x6b563e1, 0x314825b0},
+  /* 104 */ {4, 0.14924394, 0x6f91000, 0x25b55f2e},
+  /* 105 */ {4, 0.14893706, 0x73eb721, 0x1aadaccb},
+  /* 106 */ {4, 0.14863434, 0x7866310, 0x10294ba2},
+  /* 107 */ {4, 0.14833567, 0x7d01db1, 0x620f8f6},
+  /* 108 */ {4, 0.14804096, 0x81bf100, 0xf91bd1b6},
+  /* 109 */ {4, 0.14775011, 0x869e711, 0xe6d37b2a},
+  /* 110 */ {4, 0.14746305, 0x8ba0a10, 0xd55cff6e},
+  /* 111 */ {4, 0.14717969, 0x90c6441, 0xc4ad2db2},
+  /* 112 */ {4, 0.14689994, 0x9610000, 0xb4b985cf},
+  /* 113 */ {4, 0.14662372, 0x9b7e7c1, 0xa5782bef},
+  /* 114 */ {4, 0.14635096, 0xa112610, 0x96dfdd2a},
+  /* 115 */ {4, 0.14608158, 0xa6cc591, 0x88e7e509},
+  /* 116 */ {4, 0.14581551, 0xacad100, 0x7b8813d3},
+  /* 117 */ {4, 0.14555268, 0xb2b5331, 0x6eb8b595},
+  /* 118 */ {4, 0.14529302, 0xb8e5710, 0x627289db},
+  /* 119 */ {4, 0.14503647, 0xbf3e7a1, 0x56aebc07},
+  /* 120 */ {4, 0.14478295, 0xc5c1000, 0x4b66dc33},
+  /* 121 */ {4, 0.14453241, 0xcc6db61, 0x4094d8a3},
+  /* 122 */ {4, 0.14428479, 0xd345510, 0x3632f7a5},
+  /* 123 */ {4, 0.14404003, 0xda48871, 0x2c3bd1f0},
+  /* 124 */ {4, 0.14379807, 0xe178100, 0x22aa4d5f},
+  /* 125 */ {4, 0.14355885, 0xe8d4a51, 0x19799812},
+  /* 126 */ {4, 0.14332233, 0xf05f010, 0x10a523e5},
+  /* 127 */ {4, 0.14308844, 0xf817e01, 0x828a237},
+  /* 128 */ {4, 0.14285714, 0x7, 0x0},
+  /* 129 */ {4, 0.14262838, 0x10818201, 0xf04ec452},
+  /* 130 */ {4, 0.14240211, 0x11061010, 0xe136444a},
+  /* 131 */ {4, 0.14217828, 0x118db651, 0xd2af9589},
+  /* 132 */ {4, 0.14195685, 0x12188100, 0xc4b42a83},
+  /* 133 */ {4, 0.14173777, 0x12a67c71, 0xb73dccf5},
+  /* 134 */ {4, 0.14152100, 0x1337b510, 0xaa4698c5},
+  /* 135 */ {4, 0.14130649, 0x13cc3761, 0x9dc8f729},
+  /* 136 */ {4, 0.14109421, 0x14641000, 0x91bf9a30},
+  /* 137 */ {4, 0.14088412, 0x14ff4ba1, 0x86257887},
+  /* 138 */ {4, 0.14067617, 0x159df710, 0x7af5c98c},
+  /* 139 */ {4, 0.14047033, 0x16401f31, 0x702c01a0},
+  /* 140 */ {4, 0.14026656, 0x16e5d100, 0x65c3ceb1},
+  /* 141 */ {4, 0.14006482, 0x178f1991, 0x5bb91502},
+  /* 142 */ {4, 0.13986509, 0x183c0610, 0x5207ec23},
+  /* 143 */ {4, 0.13966731, 0x18eca3c1, 0x48ac9c19},
+  /* 144 */ {4, 0.13947147, 0x19a10000, 0x3fa39ab5},
+  /* 145 */ {4, 0.13927753, 0x1a592841, 0x36e98912},
+  /* 146 */ {4, 0.13908545, 0x1b152a10, 0x2e7b3140},
+  /* 147 */ {4, 0.13889521, 0x1bd51311, 0x2655840b},
+  /* 148 */ {4, 0.13870677, 0x1c98f100, 0x1e7596ea},
+  /* 149 */ {4, 0.13852011, 0x1d60d1b1, 0x16d8a20d},
+  /* 150 */ {4, 0.13833519, 0x1e2cc310, 0xf7bfe87},
+  /* 151 */ {4, 0.13815199, 0x1efcd321, 0x85d2492},
+  /* 152 */ {4, 0.13797047, 0x1fd11000, 0x179a9f4},
+  /* 153 */ {4, 0.13779062, 0x20a987e1, 0xf59e80eb},
+  /* 154 */ {4, 0.13761241, 0x21864910, 0xe8b768db},
+  /* 155 */ {4, 0.13743580, 0x226761f1, 0xdc39d6d5},
+  /* 156 */ {4, 0.13726078, 0x234ce100, 0xd021c5d1},
+  /* 157 */ {4, 0.13708732, 0x2436d4d1, 0xc46b5e37},
+  /* 158 */ {4, 0.13691539, 0x25254c10, 0xb912f39c},
+  /* 159 */ {4, 0.13674498, 0x26185581, 0xae150294},
+  /* 160 */ {4, 0.13657605, 0x27100000, 0xa36e2eb1},
+  /* 161 */ {4, 0.13640859, 0x280c5a81, 0x991b4094},
+  /* 162 */ {4, 0.13624257, 0x290d7410, 0x8f19241e},
+  /* 163 */ {4, 0.13607797, 0x2a135bd1, 0x8564e6b7},
+  /* 164 */ {4, 0.13591477, 0x2b1e2100, 0x7bfbb5b4},
+  /* 165 */ {4, 0.13575295, 0x2c2dd2f1, 0x72dadcc8},
+  /* 166 */ {4, 0.13559250, 0x2d428110, 0x69ffc498},
+  /* 167 */ {4, 0.13543338, 0x2e5c3ae1, 0x6167f154},
+  /* 168 */ {4, 0.13527558, 0x2f7b1000, 0x5911016e},
+  /* 169 */ {4, 0.13511908, 0x309f1021, 0x50f8ac5f},
+  /* 170 */ {4, 0.13496386, 0x31c84b10, 0x491cc17c},
+  /* 171 */ {4, 0.13480991, 0x32f6d0b1, 0x417b26d8},
+  /* 172 */ {4, 0.13465720, 0x342ab100, 0x3a11d83b},
+  /* 173 */ {4, 0.13450572, 0x3563fc11, 0x32dee622},
+  /* 174 */ {4, 0.13435545, 0x36a2c210, 0x2be074cd},
+  /* 175 */ {4, 0.13420637, 0x37e71341, 0x2514bb58},
+  /* 176 */ {4, 0.13405847, 0x39310000, 0x1e7a02e7},
+  /* 177 */ {4, 0.13391173, 0x3a8098c1, 0x180ea5d0},
+  /* 178 */ {4, 0.13376614, 0x3bd5ee10, 0x11d10edd},
+  /* 179 */ {4, 0.13362168, 0x3d311091, 0xbbfb88e},
+  /* 180 */ {4, 0.13347832, 0x3e921100, 0x5d92c68},
+  /* 181 */ {4, 0.13333607, 0x3ff90031, 0x1c024c},
+  /* 182 */ {4, 0.13319491, 0x4165ef10, 0xf50dbfb2},
+  /* 183 */ {4, 0.13305481, 0x42d8eea1, 0xea30efa3},
+  /* 184 */ {4, 0.13291577, 0x44521000, 0xdf9f1316},
+  /* 185 */ {4, 0.13277777, 0x45d16461, 0xd555c0c9},
+  /* 186 */ {4, 0.13264079, 0x4756fd10, 0xcb52a684},
+  /* 187 */ {4, 0.13250483, 0x48e2eb71, 0xc193881f},
+  /* 188 */ {4, 0.13236988, 0x4a754100, 0xb8163e97},
+  /* 189 */ {4, 0.13223591, 0x4c0e0f51, 0xaed8b724},
+  /* 190 */ {4, 0.13210292, 0x4dad6810, 0xa5d8f269},
+  /* 191 */ {4, 0.13197089, 0x4f535d01, 0x9d15039d},
+  /* 192 */ {4, 0.13183981, 0x51000000, 0x948b0fcd},
+  /* 193 */ {4, 0.13170967, 0x52b36301, 0x8c394d1d},
+  /* 194 */ {4, 0.13158046, 0x546d9810, 0x841e0215},
+  /* 195 */ {4, 0.13145216, 0x562eb151, 0x7c3784f8},
+  /* 196 */ {4, 0.13132477, 0x57f6c100, 0x74843b1e},
+  /* 197 */ {4, 0.13119827, 0x59c5d971, 0x6d02985d},
+  /* 198 */ {4, 0.13107265, 0x5b9c0d10, 0x65b11e6e},
+  /* 199 */ {4, 0.13094791, 0x5d796e61, 0x5e8e5c64},
+  /* 200 */ {4, 0.13082402, 0x5f5e1000, 0x5798ee23},
+  /* 201 */ {4, 0.13070099, 0x614a04a1, 0x50cf7bde},
+  /* 202 */ {4, 0.13057879, 0x633d5f10, 0x4a30b99b},
+  /* 203 */ {4, 0.13045743, 0x65383231, 0x43bb66bd},
+  /* 204 */ {4, 0.13033688, 0x673a9100, 0x3d6e4d94},
+  /* 205 */ {4, 0.13021715, 0x69448e91, 0x374842ee},
+  /* 206 */ {4, 0.13009822, 0x6b563e10, 0x314825b0},
+  /* 207 */ {4, 0.12998007, 0x6d6fb2c1, 0x2b6cde75},
+  /* 208 */ {4, 0.12986271, 0x6f910000, 0x25b55f2e},
+  /* 209 */ {4, 0.12974613, 0x71ba3941, 0x2020a2c5},
+  /* 210 */ {4, 0.12963031, 0x73eb7210, 0x1aadaccb},
+  /* 211 */ {4, 0.12951524, 0x7624be11, 0x155b891f},
+  /* 212 */ {4, 0.12940092, 0x78663100, 0x10294ba2},
+  /* 213 */ {4, 0.12928734, 0x7aafdeb1, 0xb160fe9},
+  /* 214 */ {4, 0.12917448, 0x7d01db10, 0x620f8f6},
+  /* 215 */ {4, 0.12906235, 0x7f5c3a21, 0x14930ef},
+  /* 216 */ {4, 0.12895094, 0x81bf1000, 0xf91bd1b6},
+  /* 217 */ {4, 0.12884022, 0x842a70e1, 0xefdcb0c7},
+  /* 218 */ {4, 0.12873021, 0x869e7110, 0xe6d37b2a},
+  /* 219 */ {4, 0.12862089, 0x891b24f1, 0xddfeb94a},
+  /* 220 */ {4, 0.12851224, 0x8ba0a100, 0xd55cff6e},
+  /* 221 */ {4, 0.12840428, 0x8e2ef9d1, 0xcceced50},
+  /* 222 */ {4, 0.12829698, 0x90c64410, 0xc4ad2db2},
+  /* 223 */ {4, 0.12819034, 0x93669481, 0xbc9c75f9},
+  /* 224 */ {4, 0.12808435, 0x96100000, 0xb4b985cf},
+  /* 225 */ {4, 0.12797901, 0x98c29b81, 0xad0326c2},
+  /* 226 */ {4, 0.12787431, 0x9b7e7c10, 0xa5782bef},
+  /* 227 */ {4, 0.12777024, 0x9e43b6d1, 0x9e1771a9},
+  /* 228 */ {4, 0.12766680, 0xa1126100, 0x96dfdd2a},
+  /* 229 */ {4, 0.12756398, 0xa3ea8ff1, 0x8fd05c41},
+  /* 230 */ {4, 0.12746176, 0xa6cc5910, 0x88e7e509},
+  /* 231 */ {4, 0.12736016, 0xa9b7d1e1, 0x8225759d},
+  /* 232 */ {4, 0.12725915, 0xacad1000, 0x7b8813d3},
+  /* 233 */ {4, 0.12715874, 0xafac2921, 0x750eccf9},
+  /* 234 */ {4, 0.12705891, 0xb2b53310, 0x6eb8b595},
+  /* 235 */ {4, 0.12695967, 0xb5c843b1, 0x6884e923},
+  /* 236 */ {4, 0.12686100, 0xb8e57100, 0x627289db},
+  /* 237 */ {4, 0.12676290, 0xbc0cd111, 0x5c80c07b},
+  /* 238 */ {4, 0.12666537, 0xbf3e7a10, 0x56aebc07},
+  /* 239 */ {4, 0.12656839, 0xc27a8241, 0x50fbb19b},
+  /* 240 */ {4, 0.12647197, 0xc5c10000, 0x4b66dc33},
+  /* 241 */ {4, 0.12637609, 0xc91209c1, 0x45ef7c7c},
+  /* 242 */ {4, 0.12628075, 0xcc6db610, 0x4094d8a3},
+  /* 243 */ {4, 0.12618595, 0xcfd41b91, 0x3b563c24},
+  /* 244 */ {4, 0.12609168, 0xd3455100, 0x3632f7a5},
+  /* 245 */ {4, 0.12599794, 0xd6c16d31, 0x312a60c3},
+  /* 246 */ {4, 0.12590471, 0xda488710, 0x2c3bd1f0},
+  /* 247 */ {4, 0.12581200, 0xdddab5a1, 0x2766aa45},
+  /* 248 */ {4, 0.12571980, 0xe1781000, 0x22aa4d5f},
+  /* 249 */ {4, 0.12562811, 0xe520ad61, 0x1e06233c},
+  /* 250 */ {4, 0.12553692, 0xe8d4a510, 0x19799812},
+  /* 251 */ {4, 0.12544622, 0xec940e71, 0x15041c33},
+  /* 252 */ {4, 0.12535601, 0xf05f0100, 0x10a523e5},
+  /* 253 */ {4, 0.12526629, 0xf4359451, 0xc5c2749},
+  /* 254 */ {4, 0.12517705, 0xf817e010, 0x828a237},
+  /* 255 */ {4, 0.12508829, 0xfc05fc01, 0x40a1423},
+};
+#endif
+#if BITS_PER_MP_LIMB == 64
+const struct bases __mp_bases[256] =
+{
+  /*  0 */ {0, 0.0, 0, 0},
+  /*  1 */ {0, 1e38, 0, 0},
+  /*  2 */ {64, 1.00000000, 0x1, 0x0},
+  /*  3 */ {40, 0.63092975, 0xa8b8b452291fe821L, 0x846d550e37b5063dL},
+  /*  4 */ {32, 0.50000000, 0x2L, 0x0L},
+  /*  5 */ {27, 0.43067656, 0x6765c793fa10079dL, 0x3ce9a36f23c0fc90L},
+  /*  6 */ {24, 0.38685281, 0x41c21cb8e1000000L, 0xf24f62335024a295L},
+  /*  7 */ {22, 0.35620719, 0x3642798750226111L, 0x2df495ccaa57147bL},
+  /*  8 */ {21, 0.33333333, 0x3L, 0x0L},
+  /*  9 */ {20, 0.31546488, 0xa8b8b452291fe821L, 0x846d550e37b5063dL},
+  /* 10 */ {19, 0.30103000, 0x8ac7230489e80000L, 0xd83c94fb6d2ac34aL},
+  /* 11 */ {18, 0.28906483, 0x4d28cb56c33fa539L, 0xa8adf7ae45e7577bL},
+  /* 12 */ {17, 0.27894295, 0x1eca170c00000000L, 0xa10c2bec5da8f8fL},
+  /* 13 */ {17, 0.27023815, 0x780c7372621bd74dL, 0x10f4becafe412ec3L},
+  /* 14 */ {16, 0.26264954, 0x1e39a5057d810000L, 0xf08480f672b4e86L},
+  /* 15 */ {16, 0.25595802, 0x5b27ac993df97701L, 0x6779c7f90dc42f48L},
+  /* 16 */ {16, 0.25000000, 0x4L, 0x0L},
+  /* 17 */ {15, 0.24465054, 0x27b95e997e21d9f1L, 0x9c71e11bab279323L},
+  /* 18 */ {15, 0.23981247, 0x5da0e1e53c5c8000L, 0x5dfaa697ec6f6a1cL},
+  /* 19 */ {15, 0.23540891, 0xd2ae3299c1c4aedbL, 0x3711783f6be7e9ecL},
+  /* 20 */ {14, 0.23137821, 0x16bcc41e90000000L, 0x6849b86a12b9b01eL},
+  /* 21 */ {14, 0.22767025, 0x2d04b7fdd9c0ef49L, 0x6bf097ba5ca5e239L},
+  /* 22 */ {14, 0.22424382, 0x5658597bcaa24000L, 0x7b8015c8d7af8f08L},
+  /* 23 */ {14, 0.22106473, 0xa0e2073737609371L, 0x975a24b3a3151b38L},
+  /* 24 */ {13, 0.21810429, 0xc29e98000000000L, 0x50bd367972689db1L},
+  /* 25 */ {13, 0.21533828, 0x14adf4b7320334b9L, 0x8c240c4aecb13bb5L},
+  /* 26 */ {13, 0.21274605, 0x226ed36478bfa000L, 0xdbd2e56854e118c9L},
+  /* 27 */ {13, 0.21030992, 0x383d9170b85ff80bL, 0x2351ffcaa9c7c4aeL},
+  /* 28 */ {13, 0.20801460, 0x5a3c23e39c000000L, 0x6b24188ca33b0636L},
+  /* 29 */ {13, 0.20584683, 0x8e65137388122bcdL, 0xcc3dceaf2b8ba99dL},
+  /* 30 */ {13, 0.20379505, 0xdd41bb36d259e000L, 0x2832e835c6c7d6b6L},
+  /* 31 */ {12, 0.20184909, 0xaee5720ee830681L, 0x76b6aa272e1873c5L},
+  /* 32 */ {12, 0.20000000, 0x5L, 0x0L},
+  /* 33 */ {12, 0.19823986, 0x172588ad4f5f0981L, 0x61eaf5d402c7bf4fL},
+  /* 34 */ {12, 0.19656163, 0x211e44f7d02c1000L, 0xeeb658123ffb27ecL},
+  /* 35 */ {12, 0.19495902, 0x2ee56725f06e5c71L, 0x5d5e3762e6fdf509L},
+  /* 36 */ {12, 0.19342640, 0x41c21cb8e1000000L, 0xf24f62335024a295L},
+  /* 37 */ {12, 0.19195872, 0x5b5b57f8a98a5dd1L, 0x66ae7831762efb6fL},
+  /* 38 */ {12, 0.19055141, 0x7dcff8986ea31000L, 0x47388865a00f544L},
+  /* 39 */ {12, 0.18920036, 0xabd4211662a6b2a1L, 0x7d673c33a123b54cL},
+  /* 40 */ {12, 0.18790182, 0xe8d4a51000000000L, 0x19799812dea11197L},
+  /* 41 */ {11, 0.18665241, 0x7a32956ad081b79L, 0xc27e62e0686feaeL},
+  /* 42 */ {11, 0.18544902, 0x9f49aaff0e86800L, 0x9b6e7507064ce7c7L},
+  /* 43 */ {11, 0.18428883, 0xce583bb812d37b3L, 0x3d9ac2bf66cfed94L},
+  /* 44 */ {11, 0.18316925, 0x109b79a654c00000L, 0xed46bc50ce59712aL},
+  /* 45 */ {11, 0.18208790, 0x1543beff214c8b95L, 0x813d97e2c89b8d46L},
+  /* 46 */ {11, 0.18104260, 0x1b149a79459a3800L, 0x2e81751956af8083L},
+  /* 47 */ {11, 0.18003133, 0x224edfb5434a830fL, 0xdd8e0a95e30c0988L},
+  /* 48 */ {11, 0.17905223, 0x2b3fb00000000000L, 0x7ad4dd48a0b5b167L},
+  /* 49 */ {11, 0.17810359, 0x3642798750226111L, 0x2df495ccaa57147bL},
+  /* 50 */ {11, 0.17718382, 0x43c33c1937564800L, 0xe392010175ee5962L},
+  /* 51 */ {11, 0.17629143, 0x54411b2441c3cd8bL, 0x84eaf11b2fe7738eL},
+  /* 52 */ {11, 0.17542506, 0x6851455acd400000L, 0x3a1e3971e008995dL},
+  /* 53 */ {11, 0.17458343, 0x80a23b117c8feb6dL, 0xfd7a462344ffce25L},
+  /* 54 */ {11, 0.17376534, 0x9dff7d32d5dc1800L, 0x9eca40b40ebcef8aL},
+  /* 55 */ {11, 0.17296969, 0xc155af6faeffe6a7L, 0x52fa161a4a48e43dL},
+  /* 56 */ {11, 0.17219543, 0xebb7392e00000000L, 0x1607a2cbacf930c1L},
+  /* 57 */ {10, 0.17144160, 0x50633659656d971L, 0x97a014f8e3be55f1L},
+  /* 58 */ {10, 0.17070728, 0x5fa8624c7fba400L, 0x568df8b76cbf212cL},
+  /* 59 */ {10, 0.16999162, 0x717d9faa73c5679L, 0x20ba7c4b4e6ef492L},
+  /* 60 */ {10, 0.16929381, 0x86430aac6100000L, 0xe81ee46b9ef492f5L},
+  /* 61 */ {10, 0.16861310, 0x9e64d9944b57f29L, 0x9dc0d10d51940416L},
+  /* 62 */ {10, 0.16794878, 0xba5ca5392cb0400L, 0x5fa8ed2f450272a5L},
+  /* 63 */ {10, 0.16730018, 0xdab2ce1d022cd81L, 0x2ba9eb8c5e04e641L},
+  /* 64 */ {10, 0.16666667, 0x6L, 0x0L},
+  /* 65 */ {10, 0.16604765, 0x12aeed5fd3e2d281L, 0xb67759cc00287bf1L},
+  /* 66 */ {10, 0.16544255, 0x15c3da1572d50400L, 0x78621feeb7f4ed33L},
+  /* 67 */ {10, 0.16485086, 0x194c05534f75ee29L, 0x43d55b5f72943bc0L},
+  /* 68 */ {10, 0.16427205, 0x1d56299ada100000L, 0x173decb64d1d4409L},
+  /* 69 */ {10, 0.16370566, 0x21f2a089a4ff4f79L, 0xe29fb54fd6b6074fL},
+  /* 70 */ {10, 0.16315122, 0x2733896c68d9a400L, 0xa1f1f5c210d54e62L},
+  /* 71 */ {10, 0.16260831, 0x2d2cf2c33b533c71L, 0x6aac7f9bfafd57b2L},
+  /* 72 */ {10, 0.16207652, 0x33f506e440000000L, 0x3b563c2478b72ee2L},
+  /* 73 */ {10, 0.16155547, 0x3ba43bec1d062211L, 0x12b536b574e92d1bL},
+  /* 74 */ {10, 0.16104477, 0x4455872d8fd4e400L, 0xdf86c03020404fa5L},
+  /* 75 */ {10, 0.16054409, 0x4e2694539f2f6c59L, 0xa34adf02234eea8eL},
+  /* 76 */ {10, 0.16005307, 0x5938006c18900000L, 0x6f46eb8574eb59ddL},
+  /* 77 */ {10, 0.15957142, 0x65ad9912474aa649L, 0x42459b481df47cecL},
+  /* 78 */ {10, 0.15909881, 0x73ae9ff4241ec400L, 0x1b424b95d80ca505L},
+  /* 79 */ {10, 0.15863496, 0x836612ee9c4ce1e1L, 0xf2c1b982203a0dacL},
+  /* 80 */ {10, 0.15817959, 0x9502f90000000000L, 0xb7cdfd9d7bdbab7dL},
+  /* 81 */ {10, 0.15773244, 0xa8b8b452291fe821L, 0x846d550e37b5063dL},
+  /* 82 */ {10, 0.15729325, 0xbebf59a07dab4400L, 0x57931eeaf85cf64fL},
+  /* 83 */ {10, 0.15686177, 0xd7540d4093bc3109L, 0x305a944507c82f47L},
+  /* 84 */ {10, 0.15643779, 0xf2b96616f1900000L, 0xe007ccc9c22781aL},
+  /* 85 */ {9, 0.15602107, 0x336de62af2bca35L, 0x3e92c42e000eeed4L},
+  /* 86 */ {9, 0.15561139, 0x39235ec33d49600L, 0x1ebe59130db2795eL},
+  /* 87 */ {9, 0.15520856, 0x3f674e539585a17L, 0x268859e90f51b89L},
+  /* 88 */ {9, 0.15481238, 0x4645b6958000000L, 0xd24cde0463108cfaL},
+  /* 89 */ {9, 0.15442266, 0x4dcb74afbc49c19L, 0xa536009f37adc383L},
+  /* 90 */ {9, 0.15403922, 0x56064e1d18d9a00L, 0x7cea06ce1c9ace10L},
+  /* 91 */ {9, 0.15366189, 0x5f04fe2cd8a39fbL, 0x58db032e72e8ba43L},
+  /* 92 */ {9, 0.15329049, 0x68d74421f5c0000L, 0x388cc17cae105447L},
+  /* 93 */ {9, 0.15292487, 0x738df1f6ab4827dL, 0x1b92672857620ce0L},
+  /* 94 */ {9, 0.15256487, 0x7f3afbc9cfb5e00L, 0x18c6a9575c2ade4L},
+  /* 95 */ {9, 0.15221035, 0x8bf187fba88f35fL, 0xd44da7da8e44b24fL},
+  /* 96 */ {9, 0.15186115, 0x99c600000000000L, 0xaa2f78f1b4cc6794L},
+  /* 97 */ {9, 0.15151715, 0xa8ce21eb6531361L, 0x843c067d091ee4ccL},
+  /* 98 */ {9, 0.15117821, 0xb92112c1a0b6200L, 0x62005e1e913356e3L},
+  /* 99 */ {9, 0.15084420, 0xcad7718b8747c43L, 0x4316eed01dedd518L},
+  /* 100 */ {9, 0.15051500, 0xde0b6b3a7640000L, 0x2725dd1d243aba0eL},
+  /* 101 */ {9, 0.15019048, 0xf2d8cf5fe6d74c5L, 0xddd9057c24cb54fL},
+  /* 102 */ {9, 0.14987054, 0x1095d25bfa712600L, 0xedeee175a736d2a1L},
+  /* 103 */ {9, 0.14955506, 0x121b7c4c3698faa7L, 0xc4699f3df8b6b328L},
+  /* 104 */ {9, 0.14924394, 0x13c09e8d68000000L, 0x9ebbe7d859cb5a7cL},
+  /* 105 */ {9, 0.14893706, 0x15876ccb0b709ca9L, 0x7c828b9887eb2179L},
+  /* 106 */ {9, 0.14863434, 0x17723c2976da2a00L, 0x5d652ab99001adcfL},
+  /* 107 */ {9, 0.14833567, 0x198384e9c259048bL, 0x4114f1754e5d7b32L},
+  /* 108 */ {9, 0.14804096, 0x1bbde41dfeec0000L, 0x274b7c902f7e0188L},
+  /* 109 */ {9, 0.14775011, 0x1e241d6e3337910dL, 0xfc9e0fbb32e210cL},
+  /* 110 */ {9, 0.14746305, 0x20b91cee9901ee00L, 0xf4afa3e594f8ea1fL},
+  /* 111 */ {9, 0.14717969, 0x237ff9079863dfefL, 0xcd85c32e9e4437b0L},
+  /* 112 */ {9, 0.14689994, 0x267bf47000000000L, 0xa9bbb147e0dd92a8L},
+  /* 113 */ {9, 0.14662372, 0x29b08039fbeda7f1L, 0x8900447b70e8eb82L},
+  /* 114 */ {9, 0.14635096, 0x2d213df34f65f200L, 0x6b0a92adaad5848aL},
+  /* 115 */ {9, 0.14608158, 0x30d201d957a7c2d3L, 0x4f990ad8740f0ee5L},
+  /* 116 */ {9, 0.14581551, 0x34c6d52160f40000L, 0x3670a9663a8d3610L},
+  /* 117 */ {9, 0.14555268, 0x3903f855d8f4c755L, 0x1f5c44188057be3cL},
+  /* 118 */ {9, 0.14529302, 0x3d8de5c8ec59b600L, 0xa2bea956c4e4977L},
+  /* 119 */ {9, 0.14503647, 0x4269541d1ff01337L, 0xed68b23033c3637eL},
+  /* 120 */ {9, 0.14478295, 0x479b38e478000000L, 0xc99cf624e50549c5L},
+  /* 121 */ {9, 0.14453241, 0x4d28cb56c33fa539L, 0xa8adf7ae45e7577bL},
+  /* 122 */ {9, 0.14428479, 0x5317871fa13aba00L, 0x8a5bc740b1c113e5L},
+  /* 123 */ {9, 0.14404003, 0x596d2f44de9fa71bL, 0x6e6c7efb81cfbb9bL},
+  /* 124 */ {9, 0.14379807, 0x602fd125c47c0000L, 0x54aba5c5cada5f10L},
+  /* 125 */ {9, 0.14355885, 0x6765c793fa10079dL, 0x3ce9a36f23c0fc90L},
+  /* 126 */ {9, 0.14332233, 0x6f15be069b847e00L, 0x26fb43de2c8cd2a8L},
+  /* 127 */ {9, 0.14308844, 0x7746b3e82a77047fL, 0x12b94793db8486a1L},
+  /* 128 */ {9, 0.14285714, 0x7L, 0x0L},
+  /* 129 */ {9, 0.14262838, 0x894953f7ea890481L, 0xdd5deca404c0156dL},
+  /* 130 */ {9, 0.14240211, 0x932abffea4848200L, 0xbd51373330291de0L},
+  /* 131 */ {9, 0.14217828, 0x9dacb687d3d6a163L, 0x9fa4025d66f23085L},
+  /* 132 */ {9, 0.14195685, 0xa8d8102a44840000L, 0x842530ee2db4949dL},
+  /* 133 */ {9, 0.14173777, 0xb4b60f9d140541e5L, 0x6aa7f2766b03dc25L},
+  /* 134 */ {9, 0.14152100, 0xc15065d4856e4600L, 0x53035ba7ebf32e8dL},
+  /* 135 */ {9, 0.14130649, 0xceb1363f396d23c7L, 0x3d12091fc9fb4914L},
+  /* 136 */ {9, 0.14109421, 0xdce31b2488000000L, 0x28b1cb81b1ef1849L},
+  /* 137 */ {9, 0.14088412, 0xebf12a24bca135c9L, 0x15c35be67ae3e2c9L},
+  /* 138 */ {9, 0.14067617, 0xfbe6f8dbf88f4a00L, 0x42a17bd09be1ff0L},
+  /* 139 */ {8, 0.14047033, 0x1ef156c084ce761L, 0x8bf461f03cf0bbfL},
+  /* 140 */ {8, 0.14026656, 0x20c4e3b94a10000L, 0xf3fbb43f68a32d05L},
+  /* 141 */ {8, 0.14006482, 0x22b0695a08ba421L, 0xd84f44c48564dc19L},
+  /* 142 */ {8, 0.13986509, 0x24b4f35d7a4c100L, 0xbe58ebcce7956abeL},
+  /* 143 */ {8, 0.13966731, 0x26d397284975781L, 0xa5fac463c7c134b7L},
+  /* 144 */ {8, 0.13947147, 0x290d74100000000L, 0x8f19241e28c7d757L},
+  /* 145 */ {8, 0.13927753, 0x2b63b3a37866081L, 0x799a6d046c0ae1aeL},
+  /* 146 */ {8, 0.13908545, 0x2dd789f4d894100L, 0x6566e37d746a9e40L},
+  /* 147 */ {8, 0.13889521, 0x306a35e51b58721L, 0x526887dbfb5f788fL},
+  /* 148 */ {8, 0.13870677, 0x331d01712e10000L, 0x408af3382b8efd3dL},
+  /* 149 */ {8, 0.13852011, 0x35f14200a827c61L, 0x2fbb374806ec05f1L},
+  /* 150 */ {8, 0.13833519, 0x38e858b62216100L, 0x1fe7c0f0afce87feL},
+  /* 151 */ {8, 0.13815199, 0x3c03b2c13176a41L, 0x11003d517540d32eL},
+  /* 152 */ {8, 0.13797047, 0x3f44c9b21000000L, 0x2f5810f98eff0dcL},
+  /* 153 */ {8, 0.13779062, 0x42ad23cef3113c1L, 0xeb72e35e7840d910L},
+  /* 154 */ {8, 0.13761241, 0x463e546b19a2100L, 0xd27de19593dc3614L},
+  /* 155 */ {8, 0.13743580, 0x49f9fc3f96684e1L, 0xbaf391fd3e5e6fc2L},
+  /* 156 */ {8, 0.13726078, 0x4de1c9c5dc10000L, 0xa4bd38c55228c81dL},
+  /* 157 */ {8, 0.13708732, 0x51f77994116d2a1L, 0x8fc5a8de8e1de782L},
+  /* 158 */ {8, 0.13691539, 0x563cd6bb3398100L, 0x7bf9265bea9d3a3bL},
+  /* 159 */ {8, 0.13674498, 0x5ab3bb270beeb01L, 0x69454b325983dccdL},
+  /* 160 */ {8, 0.13657605, 0x5f5e10000000000L, 0x5798ee2308c39df9L},
+  /* 161 */ {8, 0.13640859, 0x643dce0ec16f501L, 0x46e40ba0fa66a753L},
+  /* 162 */ {8, 0.13624257, 0x6954fe21e3e8100L, 0x3717b0870b0db3a7L},
+  /* 163 */ {8, 0.13607797, 0x6ea5b9755f440a1L, 0x2825e6775d11cdebL},
+  /* 164 */ {8, 0.13591477, 0x74322a1c0410000L, 0x1a01a1c09d1b4dacL},
+  /* 165 */ {8, 0.13575295, 0x79fc8b6ae8a46e1L, 0xc9eb0a8bebc8f3eL},
+  /* 166 */ {8, 0.13559250, 0x80072a66d512100L, 0xffe357ff59e6a004L},
+  /* 167 */ {8, 0.13543338, 0x86546633b42b9c1L, 0xe7dfd1be05fa61a8L},
+  /* 168 */ {8, 0.13527558, 0x8ce6b0861000000L, 0xd11ed6fc78f760e5L},
+  /* 169 */ {8, 0.13511908, 0x93c08e16a022441L, 0xbb8db609dd29ebfeL},
+  /* 170 */ {8, 0.13496386, 0x9ae49717f026100L, 0xa71aec8d1813d532L},
+  /* 171 */ {8, 0.13480991, 0xa25577ae24c1a61L, 0x93b612a9f20fbc02L},
+  /* 172 */ {8, 0.13465720, 0xaa15f068e610000L, 0x814fc7b19a67d317L},
+  /* 173 */ {8, 0.13450572, 0xb228d6bf7577921L, 0x6fd9a03f2e0a4b7cL},
+  /* 174 */ {8, 0.13435545, 0xba91158ef5c4100L, 0x5f4615a38d0d316eL},
+  /* 175 */ {8, 0.13420637, 0xc351ad9aec0b681L, 0x4f8876863479a286L},
+  /* 176 */ {8, 0.13405847, 0xcc6db6100000000L, 0x4094d8a3041b60ebL},
+  /* 177 */ {8, 0.13391173, 0xd5e85d09025c181L, 0x32600b8ed883a09bL},
+  /* 178 */ {8, 0.13376614, 0xdfc4e816401c100L, 0x24df8c6eb4b6d1f1L},
+  /* 179 */ {8, 0.13362168, 0xea06b4c72947221L, 0x18097a8ee151acefL},
+  /* 180 */ {8, 0.13347832, 0xf4b139365210000L, 0xbd48cc8ec1cd8e3L},
+  /* 181 */ {8, 0.13333607, 0xffc80497d520961L, 0x3807a8d67485fbL},
+  /* 182 */ {8, 0.13319491, 0x10b4ebfca1dee100L, 0xea5768860b62e8d8L},
+  /* 183 */ {8, 0.13305481, 0x117492de921fc141L, 0xd54faf5b635c5005L},
+  /* 184 */ {8, 0.13291577, 0x123bb2ce41000000L, 0xc14a56233a377926L},
+  /* 185 */ {8, 0.13277777, 0x130a8b6157bdecc1L, 0xae39a88db7cd329fL},
+  /* 186 */ {8, 0.13264079, 0x13e15dede0e8a100L, 0x9c10bde69efa7ab6L},
+  /* 187 */ {8, 0.13250483, 0x14c06d941c0ca7e1L, 0x8ac36c42a2836497L},
+  /* 188 */ {8, 0.13236988, 0x15a7ff487a810000L, 0x7a463c8b84f5ef67L},
+  /* 189 */ {8, 0.13223591, 0x169859ddc5c697a1L, 0x6a8e5f5ad090fd4bL},
+  /* 190 */ {8, 0.13210292, 0x1791c60f6fed0100L, 0x5b91a2943596fc56L},
+  /* 191 */ {8, 0.13197089, 0x18948e8c0e6fba01L, 0x4d4667b1c468e8f0L},
+  /* 192 */ {8, 0.13183981, 0x19a1000000000000L, 0x3fa39ab547994dafL},
+  /* 193 */ {8, 0.13170967, 0x1ab769203dafc601L, 0x32a0a9b2faee1e2aL},
+  /* 194 */ {8, 0.13158046, 0x1bd81ab557f30100L, 0x26357ceac0e96962L},
+  /* 195 */ {8, 0.13145216, 0x1d0367a69fed1ba1L, 0x1a5a6f65caa5859eL},
+  /* 196 */ {8, 0.13132477, 0x1e39a5057d810000L, 0xf08480f672b4e86L},
+  /* 197 */ {8, 0.13119827, 0x1f7b2a18f29ac3e1L, 0x4383340615612caL},
+  /* 198 */ {8, 0.13107265, 0x20c850694c2aa100L, 0xf3c77969ee4be5a2L},
+  /* 199 */ {8, 0.13094791, 0x222173cc014980c1L, 0xe00993cc187c5ec9L},
+  /* 200 */ {8, 0.13082402, 0x2386f26fc1000000L, 0xcd2b297d889bc2b6L},
+  /* 201 */ {8, 0.13070099, 0x24f92ce8af296d41L, 0xbb214d5064862b22L},
+  /* 202 */ {8, 0.13057879, 0x2678863cd0ece100L, 0xa9e1a7ca7ea10e20L},
+  /* 203 */ {8, 0.13045743, 0x280563f0a9472d61L, 0x99626e72b39ea0cfL},
+  /* 204 */ {8, 0.13033688, 0x29a02e1406210000L, 0x899a5ba9c13fafd9L},
+  /* 205 */ {8, 0.13021715, 0x2b494f4efe6d2e21L, 0x7a80a705391e96ffL},
+  /* 206 */ {8, 0.13009822, 0x2d0134ef21cbc100L, 0x6c0cfe23de23042aL},
+  /* 207 */ {8, 0.12998007, 0x2ec84ef4da2ef581L, 0x5e377df359c944ddL},
+  /* 208 */ {8, 0.12986271, 0x309f102100000000L, 0x50f8ac5fc8f53985L},
+  /* 209 */ {8, 0.12974613, 0x3285ee02a1420281L, 0x44497266278e35b7L},
+  /* 210 */ {8, 0.12963031, 0x347d6104fc324100L, 0x382316831f7ee175L},
+  /* 211 */ {8, 0.12951524, 0x3685e47dade53d21L, 0x2c7f377833b8946eL},
+  /* 212 */ {8, 0.12940092, 0x389ff6bb15610000L, 0x2157c761ab4163efL},
+  /* 213 */ {8, 0.12928734, 0x3acc1912ebb57661L, 0x16a7071803cc49a9L},
+  /* 214 */ {8, 0.12917448, 0x3d0acff111946100L, 0xc6781d80f8224fcL},
+  /* 215 */ {8, 0.12906235, 0x3f5ca2e692eaf841L, 0x294092d370a900bL},
+  /* 216 */ {8, 0.12895094, 0x41c21cb8e1000000L, 0xf24f62335024a295L},
+  /* 217 */ {8, 0.12884022, 0x443bcb714399a5c1L, 0xe03b98f103fad6d2L},
+  /* 218 */ {8, 0.12873021, 0x46ca406c81af2100L, 0xcee3d32cad2a9049L},
+  /* 219 */ {8, 0.12862089, 0x496e106ac22aaae1L, 0xbe3f9df9277fdadaL},
+  /* 220 */ {8, 0.12851224, 0x4c27d39fa5410000L, 0xae46f0d94c05e933L},
+  /* 221 */ {8, 0.12840428, 0x4ef825c296e43ca1L, 0x9ef2280fb437a33dL},
+  /* 222 */ {8, 0.12829698, 0x51dfa61f5ad88100L, 0x9039ff426d3f284bL},
+  /* 223 */ {8, 0.12819034, 0x54def7a6d2f16901L, 0x82178c6d6b51f8f4L},
+  /* 224 */ {8, 0.12808435, 0x57f6c10000000000L, 0x74843b1ee4c1e053L},
+  /* 225 */ {8, 0.12797901, 0x5b27ac993df97701L, 0x6779c7f90dc42f48L},
+  /* 226 */ {8, 0.12787431, 0x5e7268b9bbdf8100L, 0x5af23c74f9ad9fe9L},
+  /* 227 */ {8, 0.12777024, 0x61d7a7932ff3d6a1L, 0x4ee7eae2acdc617eL},
+  /* 228 */ {8, 0.12766680, 0x65581f53c8c10000L, 0x43556aa2ac262a0bL},
+  /* 229 */ {8, 0.12756398, 0x68f48a385b8320e1L, 0x3835949593b8ddd1L},
+  /* 230 */ {8, 0.12746176, 0x6cada69ed07c2100L, 0x2d837fbe78458762L},
+  /* 231 */ {8, 0.12736016, 0x70843718cdbf27c1L, 0x233a7e150a54a555L},
+  /* 232 */ {8, 0.12725915, 0x7479027ea1000000L, 0x19561984a50ff8feL},
+  /* 233 */ {8, 0.12715874, 0x788cd40268f39641L, 0xfd211159fe3490fL},
+  /* 234 */ {8, 0.12705891, 0x7cc07b437ecf6100L, 0x6aa563e655033e3L},
+  /* 235 */ {8, 0.12695967, 0x8114cc6220762061L, 0xfbb614b3f2d3b14cL},
+  /* 236 */ {8, 0.12686100, 0x858aa0135be10000L, 0xeac0f8837fb05773L},
+  /* 237 */ {8, 0.12676290, 0x8a22d3b53c54c321L, 0xda6e4c10e8615ca5L},
+  /* 238 */ {8, 0.12666537, 0x8ede496339f34100L, 0xcab755a8d01fa67fL},
+  /* 239 */ {8, 0.12656839, 0x93bde80aec3a1481L, 0xbb95a9ae71aa3e0cL},
+  /* 240 */ {8, 0.12647197, 0x98c29b8100000000L, 0xad0326c296b4f529L},
+  /* 241 */ {8, 0.12637609, 0x9ded549671832381L, 0x9ef9f21eed31b7c1L},
+  /* 242 */ {8, 0.12628075, 0xa33f092e0b1ac100L, 0x91747422be14b0b2L},
+  /* 243 */ {8, 0.12618595, 0xa8b8b452291fe821L, 0x846d550e37b5063dL},
+  /* 244 */ {8, 0.12609168, 0xae5b564ac3a10000L, 0x77df79e9a96c06f6L},
+  /* 245 */ {8, 0.12599794, 0xb427f4b3be74c361L, 0x6bc6019636c7d0c2L},
+  /* 246 */ {8, 0.12590471, 0xba1f9a938041e100L, 0x601c4205aebd9e47L},
+  /* 247 */ {8, 0.12581200, 0xc0435871d1110f41L, 0x54ddc59756f05016L},
+  /* 248 */ {8, 0.12571980, 0xc694446f01000000L, 0x4a0648979c838c18L},
+  /* 249 */ {8, 0.12562811, 0xcd137a5b57ac3ec1L, 0x3f91b6e0bb3a053dL},
+  /* 250 */ {8, 0.12553692, 0xd3c21bcecceda100L, 0x357c299a88ea76a5L},
+  /* 251 */ {8, 0.12544622, 0xdaa150410b788de1L, 0x2bc1e517aecc56e3L},
+  /* 252 */ {8, 0.12535601, 0xe1b24521be010000L, 0x225f56ceb3da9f5dL},
+  /* 253 */ {8, 0.12526629, 0xe8f62df12777c1a1L, 0x1951136d53ad63acL},
+  /* 254 */ {8, 0.12517705, 0xf06e445906fc0100L, 0x1093d504b3cd7d93L},
+  /* 255 */ {8, 0.12508829, 0xf81bc845c81bf801L, 0x824794d1ec1814fL},
+};
+#endif
diff --git a/mpn/msdos/asm-syntax.h b/mpn/msdos/asm-syntax.h
new file mode 100755
index 000000000..e6327e074
--- /dev/null
+++ b/mpn/msdos/asm-syntax.h
@@ -0,0 +1,2 @@
+#define ELF_SYNTAX
+#include "x86/syntax.h"
diff --git a/mpn/ns32k/add_n.s b/mpn/ns32k/add_n.s
new file mode 100644
index 000000000..dde2e15b5
--- /dev/null
+++ b/mpn/ns32k/add_n.s
@@ -0,0 +1,46 @@
+# ns32000 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+	.align 1
+.globl ___mpn_add_n
+___mpn_add_n:
+	save	[r3,r4,r5]
+	negd	28(sp),r3
+	movd	r3,r0
+	lshd	2,r0
+	movd	24(sp),r4
+	subd	r0,r4			# r4 -> to end of S2
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r2
+	subd	r0,r2			# r2 -> to end of RES
+	subd	r0,r0			# cy = 0
+
+Loop:	movd	r5[r3:d],r0
+	addcd	r4[r3:d],r0
+	movd	r0,r2[r3:d]
+	acbd	1,r3,Loop
+
+	scsd	r0			# r0 = cy.
+	restore	[r5,r4,r3]
+	ret	0
diff --git a/mpn/ns32k/addmul_1.s b/mpn/ns32k/addmul_1.s
new file mode 100644
index 000000000..205bfe3b3
--- /dev/null
+++ b/mpn/ns32k/addmul_1.s
@@ -0,0 +1,48 @@
+# ns32000 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+	.align 1
+.globl ___mpn_addmul_1
+___mpn_addmul_1:
+	save	[r3,r4,r5,r6,r7]
+	negd	24(sp),r4
+	movd	r4,r0
+	lshd	2,r0
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r6
+	subd	r0,r6			# r6 -> to end of RES
+	subd	r0,r0			# r0 = 0, cy = 0
+	movd	28(sp),r7		# r7 = s2_limb
+
+Loop:	movd	r5[r4:d],r2
+	meid	r7,r2			# r2 = low_prod, r3 = high_prod
+	addcd	r0,r2			# r2 = low_prod + cy_limb
+	movd	r3,r0			# r0 = new cy_limb
+	addcd	0,r0
+	addd	r2,r6[r4:d]
+	acbd	1,r4,Loop
+
+	addcd	0,r0
+	restore	[r7,r6,r5,r4,r3]
+	ret	0
diff --git a/mpn/ns32k/mul_1.s b/mpn/ns32k/mul_1.s
new file mode 100644
index 000000000..64e4abbba
--- /dev/null
+++ b/mpn/ns32k/mul_1.s
@@ -0,0 +1,47 @@
+# ns32000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+	.align 1
+.globl ___mpn_mul_1
+___mpn_mul_1:
+	save	[r3,r4,r5,r6,r7]
+	negd	24(sp),r4
+	movd	r4,r0
+	lshd	2,r0
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r6
+	subd	r0,r6			# r6 -> to end of RES
+	subd	r0,r0			# r0 = 0, cy = 0
+	movd	28(sp),r7		# r7 = s2_limb
+
+Loop:	movd	r5[r4:d],r2
+	meid	r7,r2			# r2 = low_prod, r3 = high_prod
+	addcd	r0,r2			# r2 = low_prod + cy_limb
+	movd	r3,r0			# r0 = new cy_limb
+	movd	r2,r6[r4:d]
+	acbd	1,r4,Loop
+
+	addcd	0,r0
+	restore	[r7,r6,r5,r4,r3]
+	ret	0
diff --git a/mpn/ns32k/sub_n.s b/mpn/ns32k/sub_n.s
new file mode 100644
index 000000000..ef6c889c5
--- /dev/null
+++ b/mpn/ns32k/sub_n.s
@@ -0,0 +1,46 @@
+# ns32000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+# store difference in a third limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+	.align 1
+.globl ___mpn_sub_n
+___mpn_sub_n:
+	save	[r3,r4,r5]
+	negd	28(sp),r3
+	movd	r3,r0
+	lshd	2,r0
+	movd	24(sp),r4
+	subd	r0,r4			# r4 -> to end of S2
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r2
+	subd	r0,r2			# r2 -> to end of RES
+	subd	r0,r0			# cy = 0
+
+Loop:	movd	r5[r3:d],r0
+	subcd	r4[r3:d],r0
+	movd	r0,r2[r3:d]
+	acbd	1,r3,Loop
+
+	scsd	r0			# r0 = cy.
+	restore	[r5,r4,r3]
+	ret	0
diff --git a/mpn/ns32k/submul_1.s b/mpn/ns32k/submul_1.s
new file mode 100644
index 000000000..509309532
--- /dev/null
+++ b/mpn/ns32k/submul_1.s
@@ -0,0 +1,48 @@
+# ns32000 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+	.align 1
+.globl ___mpn_submul_1
+___mpn_submul_1:
+	save	[r3,r4,r5,r6,r7]
+	negd	24(sp),r4
+	movd	r4,r0
+	lshd	2,r0
+	movd	20(sp),r5
+	subd	r0,r5			# r5 -> to end of S1
+	movd	16(sp),r6
+	subd	r0,r6			# r6 -> to end of RES
+	subd	r0,r0			# r0 = 0, cy = 0
+	movd	28(sp),r7		# r7 = s2_limb
+
+Loop:	movd	r5[r4:d],r2
+	meid	r7,r2			# r2 = low_prod, r3 = high_prod
+	addcd	r0,r2			# r2 = low_prod + cy_limb
+	movd	r3,r0			# r0 = new cy_limb
+	addcd	0,r0
+	subd	r2,r6[r4:d]
+	acbd	1,r4,Loop
+
+	addcd	0,r0
+	restore	[r7,r6,r5,r4,r3]
+	ret	0
diff --git a/mpn/power/add_n.s b/mpn/power/add_n.s
new file mode 100644
index 000000000..9e1c94897
--- /dev/null
+++ b/mpn/power/add_n.s
@@ -0,0 +1,81 @@
+# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+
+	.toc
+	.extern __mpn_add_n[DS]
+	.extern .__mpn_add_n
+.csect [PR]
+	.align 2
+	.globl __mpn_add_n
+	.globl .__mpn_add_n
+	.csect __mpn_add_n[DS]
+__mpn_add_n:
+	.long .__mpn_add_n, TOC[tc0], 0
+	.csect [PR]
+.__mpn_add_n:
+	andil.	10,6,1		# odd or even number of limbs?
+	l	8,0(4)		# load least significant s1 limb
+	l	0,0(5)		# load least significant s2 limb
+	cal	3,-4(3)		# offset res_ptr, it's updated before it's used
+	sri	10,6,1		# count for unrolled loop
+	a	7,0,8		# add least significant limbs, set cy
+	mtctr	10		# copy count into CTR
+	beq	0,Leven		# branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		# is count for unrolled loop zero?
+	bne	1,L1		# branch if not
+	st	7,4(3)
+	aze	3,10		# use the fact that r10 is zero...
+	br			# return
+
+# We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	stu	7,4(3)
+	ae	7,0,8		# add limbs, set cy
+Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5)		# load s2 limb and update s2_ptr
+	bdz	Lend		# If done, skip loop
+
+Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	ae	11,9,10		# add previous limbs with cy, set cy
+	stu	7,4(3)		# 
+	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5)		# load s2 limb and update s2_ptr
+	ae	7,0,8		# add previous limbs with cy, set cy
+	stu	11,4(3)		# 
+	bdn	Loop		# decrement CTR and loop back
+
+Lend:	ae	11,9,10		# add limbs with cy, set cy
+	st	7,4(3)		# 
+	st	11,8(3)		# 
+	lil	3,0		# load cy into ...
+	aze	3,3		# ... return value register
+	br
diff --git a/mpn/power/addmul_1.s b/mpn/power/addmul_1.s
new file mode 100644
index 000000000..2db69841c
--- /dev/null
+++ b/mpn/power/addmul_1.s
@@ -0,0 +1,123 @@
+# IBM POWER __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.  We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+
+	.toc
+	.csect .__mpn_addmul_1[PR]
+	.align 2
+	.globl __mpn_addmul_1
+	.globl .__mpn_addmul_1
+	.csect __mpn_addmul_1[DS]
+__mpn_addmul_1:
+	.long .__mpn_addmul_1[PR], TOC[tc0], 0
+	.csect .__mpn_addmul_1[PR]
+.__mpn_addmul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	cax	9,9,7
+	l	7,4(3)
+	a	8,8,7		# add res_limb
+	blt	Lneg
+Lpos:	bdz	Lend
+
+Lploop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9		# low limb + old_cy_limb + old cy
+	l	7,4(3)
+	aze	10,10		# propagate cy to new cy_limb
+	a	8,8,7		# add res_limb
+	bge	Lp0
+	cax	10,10,6		# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	l	7,4(3)
+	aze	9,9
+	a	8,8,7
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	7
+	ae	8,7,9
+	l	7,4(3)
+	ae	10,10,0		# propagate cy to new cy_limb
+	a	8,8,7		# add res_limb
+	bge	Ln0
+	cax	10,10,6		# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	7
+	ae	8,7,10
+	l	7,4(3)
+	ae	9,9,0		# propagate cy to new cy_limb
+	a	8,8,7		# add res_limb
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
diff --git a/mpn/power/lshift.s b/mpn/power/lshift.s
new file mode 100644
index 000000000..38169bf53
--- /dev/null
+++ b/mpn/power/lshift.s
@@ -0,0 +1,59 @@
+# IBM POWER __mpn_lshift -- 
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s_ptr		r4
+# size		r5
+# cnt		r6
+
+	.toc
+	.extern __mpn_lshift[DS]
+	.extern .__mpn_lshift
+.csect [PR]
+	.align 2
+	.globl __mpn_lshift
+	.globl .__mpn_lshift
+	.csect __mpn_lshift[DS]
+__mpn_lshift:
+	.long .__mpn_lshift, TOC[tc0], 0
+	.csect [PR]
+.__mpn_lshift:
+	sli	0,5,2
+	cax	9,3,0
+	cax	4,4,0
+	sfi	8,6,32
+	mtctr	5		# put limb count in CTR loop register
+	lu	0,-4(4)		# read most significant limb
+	sre	3,0,8		# compute carry out limb, and init MQ register
+	bdz	Lend2		# if just one limb, skip loop
+	lu	0,-4(4)		# read 2:nd most significant limb
+	sreq	7,0,8		# compute most significant limb of result
+	bdz	Lend		# if just two limb, skip loop
+Loop:	lu	0,-4(4)		# load next lower limb
+	stu	7,-4(9)		# store previous result during read latency
+	sreq	7,0,8		# compute result limb
+	bdn	Loop		# loop back until CTR is zero
+Lend:	stu	7,-4(9)		# store 2:nd least significant limb
+Lend2:	sle	7,0,6		# compute least significant limb
+	st      7,-4(9)		# store it"				\
+	br
diff --git a/mpn/power/mul_1.s b/mpn/power/mul_1.s
new file mode 100644
index 000000000..a72bce660
--- /dev/null
+++ b/mpn/power/mul_1.s
@@ -0,0 +1,110 @@
+# IBM POWER __mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.  We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+
+	.toc
+	.csect .__mpn_mul_1[PR]
+	.align 2
+	.globl __mpn_mul_1
+	.globl .__mpn_mul_1
+	.csect __mpn_mul_1[DS]
+__mpn_mul_1:
+	.long .__mpn_mul_1[PR], TOC[tc0], 0
+	.csect .__mpn_mul_1[PR]
+.__mpn_mul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	8
+	ai	0,0,0		# reset carry
+	cax	9,9,7
+	blt	Lneg
+Lpos:	bdz	Lend
+Lploop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	8,0,9
+	bge	Lp0
+	cax	10,10,6		# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	8,0,10
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	cax	10,10,0		# adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,9
+	bge	Ln0
+	cax	10,10,6		# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	cax	9,9,0		# adjust high limb for negative s2_limb
+	mfmq	0
+	ae	8,0,10
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
diff --git a/mpn/power/rshift.s b/mpn/power/rshift.s
new file mode 100644
index 000000000..30d408a24
--- /dev/null
+++ b/mpn/power/rshift.s
@@ -0,0 +1,57 @@
+# IBM POWER __mpn_rshift -- 
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s_ptr		r4
+# size		r5
+# cnt		r6
+
+	.toc
+	.extern __mpn_rshift[DS]
+	.extern .__mpn_rshift
+.csect [PR]
+	.align 2
+	.globl __mpn_rshift
+	.globl .__mpn_rshift
+	.csect __mpn_rshift[DS]
+__mpn_rshift:
+	.long .__mpn_rshift, TOC[tc0], 0
+	.csect [PR]
+.__mpn_rshift:
+	sfi	8,6,32
+	mtctr	5		# put limb count in CTR loop register
+	l	0,0(4)		# read least significant limb
+	ai	9,3,-4		# adjust res_ptr since it's offset in the stu:s
+	sle	3,0,8		# compute carry limb, and init MQ register
+	bdz	Lend2		# if just one limb, skip loop
+	lu	0,4(4)		# read 2:nd least significant limb
+	sleq	7,0,8		# compute least significant limb of result
+	bdz	Lend		# if just two limb, skip loop
+Loop:	lu	0,4(4)		# load next higher limb
+	stu	7,4(9)		# store previous result during read latency
+	sleq	7,0,8		# compute result limb
+	bdn	Loop		# loop back until CTR is zero
+Lend:	stu	7,4(9)		# store 2:nd most significant limb
+Lend2:	sre	7,0,6		# compute most significant limb
+	st      7,4(9)		# store it"				\
+	br
diff --git a/mpn/power/sub_n.s b/mpn/power/sub_n.s
new file mode 100644
index 000000000..30d4fee86
--- /dev/null
+++ b/mpn/power/sub_n.s
@@ -0,0 +1,82 @@
+# IBM POWER __mpn_sub_n -- Subtract two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+
+	.toc
+	.extern __mpn_sub_n[DS]
+	.extern .__mpn_sub_n
+.csect [PR]
+	.align 2
+	.globl __mpn_sub_n
+	.globl .__mpn_sub_n
+	.csect __mpn_sub_n[DS]
+__mpn_sub_n:
+	.long .__mpn_sub_n, TOC[tc0], 0
+	.csect [PR]
+.__mpn_sub_n:
+	andil.	10,6,1		# odd or even number of limbs?
+	l	8,0(4)		# load least significant s1 limb
+	l	0,0(5)		# load least significant s2 limb
+	cal	3,-4(3)		# offset res_ptr, it's updated before it's used
+	sri	10,6,1		# count for unrolled loop
+	sf	7,0,8		# subtract least significant limbs, set cy
+	mtctr	10		# copy count into CTR
+	beq	0,Leven		# branch if even # of limbs (# of limbs >= 2)
+
+# We have an odd # of limbs.  Add the first limbs separately.
+	cmpi	1,10,0		# is count for unrolled loop zero?
+	bne	1,L1		# branch if not
+	st	7,4(3)
+	sfe	3,0,0		# load !cy into ...
+	sfi	3,3,0		# ... return value register
+	br			# return
+
+# We added least significant limbs.  Now reload the next limbs to enter loop.
+L1:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	stu	7,4(3)
+	sfe	7,0,8		# subtract limbs, set cy
+Leven:	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5)		# load s2 limb and update s2_ptr
+	bdz	Lend		# If done, skip loop
+
+Loop:	lu	8,4(4)		# load s1 limb and update s1_ptr
+	lu	0,4(5)		# load s2 limb and update s2_ptr
+	sfe	11,10,9		# subtract previous limbs with cy, set cy
+	stu	7,4(3)		# 
+	lu	9,4(4)		# load s1 limb and update s1_ptr
+	lu	10,4(5)		# load s2 limb and update s2_ptr
+	sfe	7,0,8		# subtract previous limbs with cy, set cy
+	stu	11,4(3)		# 
+	bdn	Loop		# decrement CTR and loop back
+
+Lend:	sfe	11,10,9		# subtract limbs with cy, set cy
+	st	7,4(3)		# 
+	st	11,8(3)		# 
+	sfe	3,0,0		# load !cy into ...
+	sfi	3,3,0		# ... return value register
+	br
diff --git a/mpn/power/submul_1.s b/mpn/power/submul_1.s
new file mode 100644
index 000000000..8e5946fe1
--- /dev/null
+++ b/mpn/power/submul_1.s
@@ -0,0 +1,128 @@
+# IBM POWER __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# The RS/6000 has no unsigned 32x32->64 bit multiplication instruction.  To
+# obtain that operation, we have to use the 32x32->64 signed multiplication
+# instruction, and add the appropriate compensation to the high limb of the
+# result.  We add the multiplicand if the multiplier has its most significant
+# bit set, and we add the multiplier if the multiplicand has its most
+# significant bit set.  We need to preserve the carry flag between each
+# iteration, so we have to compute the compensation carefully (the natural,
+# srai+and doesn't work).  Since the POWER architecture has a branch unit
+# we can branch in zero cycles, so that's how we perform the additions.
+
+	.toc
+	.csect .__mpn_submul_1[PR]
+	.align 2
+	.globl __mpn_submul_1
+	.globl .__mpn_submul_1
+	.csect __mpn_submul_1[DS]
+__mpn_submul_1:
+	.long .__mpn_submul_1[PR], TOC[tc0], 0
+	.csect .__mpn_submul_1[PR]
+.__mpn_submul_1:
+
+	cal	3,-4(3)
+	l	0,0(4)
+	cmpi	0,6,0
+	mtctr	5
+	mul	9,0,6
+	srai	7,0,31
+	and	7,7,6
+	mfmq	11
+	cax	9,9,7
+	l	7,4(3)
+	sf	8,11,7		# add res_limb
+	a	11,8,11		# invert cy (r11 is junk)
+	blt	Lneg
+Lpos:	bdz	Lend
+
+Lploop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	0
+	ae	11,0,9		# low limb + old_cy_limb + old cy
+	l	7,4(3)
+	aze	10,10		# propagate cy to new cy_limb
+	sf	8,11,7		# add res_limb
+	a	11,8,11		# invert cy (r11 is junk)
+	bge	Lp0
+	cax	10,10,6		# adjust high limb for negative limb from s1
+Lp0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	0
+	ae	11,0,10
+	l	7,4(3)
+	aze	9,9
+	sf	8,11,7
+	a	11,8,11		# invert cy (r11 is junk)
+	bge	Lp1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Lp1:	bdn	Lploop
+
+	b	Lend
+
+Lneg:	cax	9,9,0
+	bdz	Lend
+Lnloop:	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	10,0,6
+	mfmq	7
+	ae	11,7,9
+	l	7,4(3)
+	ae	10,10,0		# propagate cy to new cy_limb
+	sf	8,11,7		# add res_limb
+	a	11,8,11		# invert cy (r11 is junk)
+	bge	Ln0
+	cax	10,10,6		# adjust high limb for negative limb from s1
+Ln0:	bdz	Lend0
+	lu	0,4(4)
+	stu	8,4(3)
+	cmpi	0,0,0
+	mul	9,0,6
+	mfmq	7
+	ae	11,7,10
+	l	7,4(3)
+	ae	9,9,0		# propagate cy to new cy_limb
+	sf	8,11,7		# add res_limb
+	a	11,8,11		# invert cy (r11 is junk)
+	bge	Ln1
+	cax	9,9,6		# adjust high limb for negative limb from s1
+Ln1:	bdn	Lnloop
+	b	Lend
+
+Lend0:	cal	9,0(10)
+Lend:	st	8,4(3)
+	aze	3,9
+	br
diff --git a/mpn/powerpc32/add_n.s b/mpn/powerpc32/add_n.s
new file mode 100644
index 000000000..e2e97ee86
--- /dev/null
+++ b/mpn/powerpc32/add_n.s
@@ -0,0 +1,55 @@
+# PowerPC-32 __mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+
+	.toc
+	.extern __mpn_add_n[DS]
+	.extern .__mpn_add_n
+.csect [PR]
+	.align 2
+	.globl __mpn_add_n
+	.globl .__mpn_add_n
+	.csect __mpn_add_n[DS]
+__mpn_add_n:
+	.long .__mpn_add_n, TOC[tc0], 0
+	.csect [PR]
+.__mpn_add_n:
+	mtctr	6		# copy size into CTR
+	lwz	8,0(4)		# load least significant s1 limb
+	lwz	0,0(5)		# load least significant s2 limb
+	addi	3,3,-4		# offset res_ptr, it's updated before used
+	addc	7,0,8		# add least significant limbs, set cy
+	bdz	Lend		# If done, skip loop
+Loop:	lwzu	8,4(4)		# load s1 limb and update s1_ptr
+	lwzu	0,4(5)		# load s2 limb and update s2_ptr
+	stwu	7,4(3)		# store previous limb in load latency slot
+	adde	7,0,8		# add new limbs with cy, set cy
+	bdn	Loop		# decrement CTR and loop back
+Lend:	stw	7,4(3)		# store ultimate result limb
+	li	3,0		# load cy into ...
+	aze	3,3		# ... return value register
+	blr
diff --git a/mpn/powerpc32/addmul_1.s b/mpn/powerpc32/addmul_1.s
new file mode 100644
index 000000000..71a45e2ca
--- /dev/null
+++ b/mpn/powerpc32/addmul_1.s
@@ -0,0 +1,68 @@
+# PowerPC-32 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# This is a fairly straightforward implementation.  The timing of the PC601
+# is hard to understand, so I will wait to optimize this until I have some
+# hardware to play with.
+
+# The code trivially generalizes to 64 bit limbs for the PC620.
+
+	.toc
+	.csect .__mpn_addmul_1[PR]
+	.align 2
+	.globl __mpn_addmul_1
+	.globl .__mpn_addmul_1
+	.csect __mpn_addmul_1[DS]
+__mpn_addmul_1:
+	.long .__mpn_addmul_1[PR], TOC[tc0], 0
+	.csect .__mpn_addmul_1[PR]
+.__mpn_addmul_1:
+	mtctr	5
+
+	lwz	0,0(4)
+	mullw	7,0,6
+	mulhwu	10,0,6
+	lwz	9,0(3)
+	addc	8,7,9
+	addi	3,3,-4
+	bdz	Lend
+
+Loop:	lwzu	0,4(4)
+	stwu	8,4(3)
+	mullw	8,0,6
+	adde	7,8,10
+	mulhwu	10,0,6
+	lwz	9,4(3)
+	addze	10,10
+	addc	8,7,9
+	bdn	Loop
+
+Lend:	stw	8,4(3)
+	addze	3,10
+	blr
diff --git a/mpn/powerpc32/lshift.s b/mpn/powerpc32/lshift.s
new file mode 100644
index 000000000..a93aa75c7
--- /dev/null
+++ b/mpn/powerpc32/lshift.s
@@ -0,0 +1,67 @@
+# PowerPC-32 __mpn_lshift --
+
+# Copyright (C) 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# cnt		r6
+
+	.toc
+.csect	.text[PR]
+	.align	2
+	.globl	__mpn_lshift
+	.globl	.__mpn_lshift
+	.csect	__mpn_lshift[DS]
+__mpn_lshift:
+	.long	.__mpn_lshift,	TOC[tc0],	0
+	.csect	.text[PR]
+.__mpn_lshift:
+	mtctr	5		# copy size into CTR
+	slwi	0,5,2
+	add	7,3,0		# make r7 point at end of res
+	add	4,4,0		# make r4 point at end of s1
+	subfic	8,6,32
+	lwzu	11,-4(4)	# load first s1 limb
+	srw	3,11,8		# compute function return value
+	bdz	Lend1
+
+Loop:	lwzu	10,-4(4)
+	slw	9,11,6
+	srw	12,10,8
+	or	9,9,12
+	stwu	9,-4(7)
+	bdz	Lend2
+	lwzu	11,-4(4)
+	slw	9,10,6
+	srw	12,11,8
+	or	9,9,12
+	stwu	9,-4(7)
+	bdn	Loop
+
+Lend1:	slw	0,11,6
+	stw	0,-4(7)
+	blr
+
+Lend2:	slw	0,10,6
+	stw	0,-4(7)
+	blr
diff --git a/mpn/powerpc32/mul_1.s b/mpn/powerpc32/mul_1.s
new file mode 100644
index 000000000..a72dae774
--- /dev/null
+++ b/mpn/powerpc32/mul_1.s
@@ -0,0 +1,63 @@
+# PowerPC-32 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# This is a fairly straightforward implementation.  The timing of the PC601
+# is hard to understand, so I will wait to optimize this until I have some
+# hardware to play with.
+
+# The code trivially generalizes to 64 bit limbs for the PC620.
+
+	.toc
+	.csect .__mpn_mul_1[PR]
+	.align 2
+	.globl __mpn_mul_1
+	.globl .__mpn_mul_1
+	.csect __mpn_mul_1[DS]
+__mpn_mul_1:
+	.long .__mpn_mul_1[PR], TOC[tc0], 0
+	.csect .__mpn_mul_1[PR]
+.__mpn_mul_1:
+	mtctr	5
+
+	lwz	0,0(4)
+	mullw	7,0,6
+	mulhwu	10,0,6
+	addic	3,3,-4		# adjust res_ptr and reset carry
+	bdz	Lend
+
+Loop:	lwzu	0,4(4)
+	stwu	7,4(3)
+	mullw	8,0,6
+	adde	7,8,10
+	mulhwu	10,0,6
+	bdn	Loop
+
+Lend:	stw	7,4(3)
+	addze	3,10
+	blr
diff --git a/mpn/powerpc32/rshift.s b/mpn/powerpc32/rshift.s
new file mode 100644
index 000000000..c957fcdf0
--- /dev/null
+++ b/mpn/powerpc32/rshift.s
@@ -0,0 +1,65 @@
+# PowerPC-32 __mpn_rshift --
+
+# Copyright (C) 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# cnt		r6
+
+	.toc
+.csect	.text[PR]
+	.align	2
+	.globl	__mpn_rshift
+	.globl	.__mpn_rshift
+	.csect	__mpn_rshift[DS]
+__mpn_rshift:
+	.long	.__mpn_rshift,	TOC[tc0],	0
+	.csect	.text[PR]
+.__mpn_rshift:
+	mtctr	5		# copy size into CTR
+	addi	7,3,-4		# move adjusted res_ptr to free return reg
+	subfic	8,6,32
+	lwz	11,0(4)		# load first s1 limb
+	slw	3,11,8		# compute function return value
+	bdz	Lend1
+
+Loop:	lwzu	10,4(4)
+	srw	9,11,6
+	slw	12,10,8
+	or	9,9,12
+	stwu	9,4(7)
+	bdz	Lend2
+	lwzu	11,4(4)
+	srw	9,10,6
+	slw	12,11,8
+	or	9,9,12
+	stwu	9,4(7)
+	bdn	Loop
+
+Lend1:	srw	0,11,6
+	stw	0,4(7)
+	blr
+
+Lend2:	srw	0,10,6
+	stw	0,4(7)
+	blr
diff --git a/mpn/powerpc32/sub_n.s b/mpn/powerpc32/sub_n.s
new file mode 100644
index 000000000..c7634d680
--- /dev/null
+++ b/mpn/powerpc32/sub_n.s
@@ -0,0 +1,56 @@
+# PowerPC-32  __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+# and store difference in a third limb vector.
+
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# s2_ptr	r5
+# size		r6
+
+	.toc
+	.extern __mpn_sub_n[DS]
+	.extern .__mpn_sub_n
+.csect [PR]
+	.align 2
+	.globl __mpn_sub_n
+	.globl .__mpn_sub_n
+	.csect __mpn_sub_n[DS]
+__mpn_sub_n:
+	.long .__mpn_sub_n, TOC[tc0], 0
+	.csect [PR]
+.__mpn_sub_n:
+	mtctr	6		# copy size into CTR
+	lwz	8,0(4)		# load least significant s1 limb
+	lwz	0,0(5)		# load least significant s2 limb
+	addi	3,3,-4		# offset res_ptr, it's updated before used
+	subfc	7,0,8		# add least significant limbs, set cy
+	bdz	Lend		# If done, skip loop
+Loop:	lwzu	8,4(4)		# load s1 limb and update s1_ptr
+	lwzu	0,4(5)		# load s2 limb and update s2_ptr
+	stwu	7,4(3)		# store previous limb in load latency slot
+	subfe	7,0,8		# add new limbs with cy, set cy
+	bdn	Loop		# decrement CTR and loop back
+Lend:	stw	7,4(3)		# store ultimate result limb
+	subfe	3,0,0		# load !cy into ...
+	subfic	3,3,0		# ... return value register
+	blr
diff --git a/mpn/powerpc32/submul_1.s b/mpn/powerpc32/submul_1.s
new file mode 100644
index 000000000..a830ce87a
--- /dev/null
+++ b/mpn/powerpc32/submul_1.s
@@ -0,0 +1,70 @@
+# PowerPC-32 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1995 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	r3
+# s1_ptr	r4
+# size		r5
+# s2_limb	r6
+
+# This is a fairly straightforward implementation.  The timing of the PC601
+# is hard to understand, so I will wait to optimize this until I have some
+# hardware to play with.
+
+# The code trivially generalizes to 64 bit limbs for the PC620.
+
+	.toc
+	.csect .__mpn_submul_1[PR]
+	.align 2
+	.globl __mpn_submul_1
+	.globl .__mpn_submul_1
+	.csect __mpn_submul_1[DS]
+__mpn_submul_1:
+	.long .__mpn_submul_1[PR], TOC[tc0], 0
+	.csect .__mpn_submul_1[PR]
+.__mpn_submul_1:
+	mtctr	5
+
+	lwz	0,0(4)
+	mullw	7,0,6
+	mulhwu	10,0,6
+	lwz	9,0(3)
+	subfc	8,7,9
+	addc	7,7,8		# invert cy (r7 is junk)
+	addi	3,3,-4
+	bdz	Lend
+
+Loop:	lwzu	0,4(4)
+	stwu	8,4(3)
+	mullw	8,0,6
+	adde	7,8,10
+	mulhwu	10,0,6
+	lwz	9,4(3)
+	addze	10,10
+	subfc	8,7,9
+	addc	7,7,8		# invert cy (r7 is junk)
+	bdn	Loop
+
+Lend:	stw	8,4(3)
+	addze	3,10
+	blr
diff --git a/mpn/powerpc64/gmp-mparam.h b/mpn/powerpc64/gmp-mparam.h
new file mode 100644
index 000000000..48eb85de6
--- /dev/null
+++ b/mpn/powerpc64/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/mpn/pyr/add_n.s b/mpn/pyr/add_n.s
new file mode 100644
index 000000000..082362e9d
--- /dev/null
+++ b/mpn/pyr/add_n.s
@@ -0,0 +1,54 @@
+.text
+	.align	2
+.globl	___mpn_add_n
+___mpn_add_n:
+	movw	$-1,tr0		# representation for carry clear
+
+	movw	pr3,tr2
+	andw	$3,tr2
+	beq	Lend0
+	subw	tr2,pr3
+
+Loop0:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	addwc	(pr2),tr1
+	movw	tr1,(pr0)
+
+	subwb	tr0,tr0
+	addw	$4,pr0
+	addw	$4,pr1
+	addw	$4,pr2
+	addw	$-1,tr2
+	bne	Loop0
+
+	mtstw	pr3,pr3
+	beq	Lend
+Lend0:
+Loop:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	addwc	(pr2),tr1
+	movw	tr1,(pr0)
+
+	movw	4(pr1),tr1
+	addwc	4(pr2),tr1
+	movw	tr1,4(pr0)
+
+	movw	8(pr1),tr1
+	addwc	8(pr2),tr1
+	movw	tr1,8(pr0)
+
+	movw	12(pr1),tr1
+	addwc	12(pr2),tr1
+	movw	tr1,12(pr0)
+
+	subwb	tr0,tr0
+	addw	$16,pr0
+	addw	$16,pr1
+	addw	$16,pr2
+	addw	$-4,pr3
+	bne	Loop
+Lend:
+	mnegw	tr0,pr0
+	ret
diff --git a/mpn/pyr/addmul_1.s b/mpn/pyr/addmul_1.s
new file mode 100644
index 000000000..dc4702541
--- /dev/null
+++ b/mpn/pyr/addmul_1.s
@@ -0,0 +1,23 @@
+.text
+	.align	2
+.globl	___mpn_addmul_1
+___mpn_addmul_1:
+	mova	(pr0)[pr2*4],pr0
+	mova	(pr1)[pr2*4],pr1
+	mnegw	pr2,pr2
+	movw	$0,tr3
+
+Loop:	movw	(pr1)[pr2*4],tr1
+	uemul	pr3,tr0
+	addw	tr3,tr1
+	movw	$0,tr3
+	addwc	tr0,tr3
+	movw	(pr0)[pr2*0x4],tr0
+	addw	tr0,tr1
+	addwc	$0,tr3
+	movw	tr1,(pr0)[pr2*4]
+	addw	$1,pr2
+	bne	Loop
+
+	movw	tr3,pr0
+	ret
diff --git a/mpn/pyr/mul_1.s b/mpn/pyr/mul_1.s
new file mode 100644
index 000000000..ddd919b79
--- /dev/null
+++ b/mpn/pyr/mul_1.s
@@ -0,0 +1,20 @@
+.text
+	.align	2
+.globl	___mpn_mul_1
+___mpn_mul_1:
+	mova	(pr0)[pr2*4],pr0
+	mova	(pr1)[pr2*4],pr1
+	mnegw	pr2,pr2
+	movw	$0,tr3
+
+Loop:	movw	(pr1)[pr2*4],tr1
+	uemul	pr3,tr0
+	addw	tr3,tr1
+	movw	$0,tr3
+	addwc	tr0,tr3
+	movw	tr1,(pr0)[pr2*4]
+	addw	$1,pr2
+	bne	Loop
+
+	movw	tr3,pr0
+	ret
diff --git a/mpn/pyr/sub_n.s b/mpn/pyr/sub_n.s
new file mode 100644
index 000000000..6b4117ff5
--- /dev/null
+++ b/mpn/pyr/sub_n.s
@@ -0,0 +1,54 @@
+.text
+	.align	2
+.globl	___mpn_sub_n
+___mpn_sub_n:
+	movw	$-1,tr0		# representation for carry clear
+
+	movw	pr3,tr2
+	andw	$3,tr2
+	beq	Lend0
+	subw	tr2,pr3
+
+Loop0:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	subwb	(pr2),tr1
+	movw	tr1,(pr0)
+
+	subwb	tr0,tr0
+	addw	$4,pr0
+	addw	$4,pr1
+	addw	$4,pr2
+	addw	$-1,tr2
+	bne	Loop0
+
+	mtstw	pr3,pr3
+	beq	Lend
+Lend0:
+Loop:	rsubw	$0,tr0		# restore carry bit from carry-save register
+
+	movw	(pr1),tr1
+	subwb	(pr2),tr1
+	movw	tr1,(pr0)
+
+	movw	4(pr1),tr1
+	subwb	4(pr2),tr1
+	movw	tr1,4(pr0)
+
+	movw	8(pr1),tr1
+	subwb	8(pr2),tr1
+	movw	tr1,8(pr0)
+
+	movw	12(pr1),tr1
+	subwb	12(pr2),tr1
+	movw	tr1,12(pr0)
+
+	subwb	tr0,tr0
+	addw	$16,pr0
+	addw	$16,pr1
+	addw	$16,pr2
+	addw	$-4,pr3
+	bne	Loop
+Lend:
+	mnegw	tr0,pr0
+	ret
diff --git a/mpn/sh/add_n.s b/mpn/sh/add_n.s
new file mode 100644
index 000000000..93dad51e4
--- /dev/null
+++ b/mpn/sh/add_n.s
@@ -0,0 +1,47 @@
+! SH __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! s2_ptr	r6
+! size		r7
+
+	.text
+	.align 2
+	.global	___mpn_add_n
+___mpn_add_n:
+	mov	#0,r3		! clear cy save reg
+
+Loop:	mov.l	@r5+,r1
+	mov.l	@r6+,r2
+	shlr	r3		! restore cy
+	addc	r2,r1
+	movt	r3		! save cy
+	mov.l	r1,@r4
+	dt	r7
+	bf.s	Loop
+	 add	#4,r4
+
+	rts
+	movt	r0		! return carry-out from most sign. limb
diff --git a/mpn/sh/sh2/addmul_1.s b/mpn/sh/sh2/addmul_1.s
new file mode 100644
index 000000000..19d81da3d
--- /dev/null
+++ b/mpn/sh/sh2/addmul_1.s
@@ -0,0 +1,53 @@
+! SH2 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! size		r6
+! s2_limb	r7
+
+	.text
+	.align 1
+	.global	___mpn_addmul_1
+___mpn_addmul_1:
+	mov	#0,r2		! cy_limb = 0
+	mov	#0,r0		! Keep r0 = 0 for entire loop
+	clrt
+
+Loop:	mov.l	@r5+,r3
+	dmulu.l	r3,r7
+	sts	macl,r1
+	addc	r2,r1		! lo_prod += old cy_limb
+	sts	mach,r2		! new cy_limb = hi_prod
+	mov.l	@r4,r3
+	addc	r0,r2		! cy_limb += T, T = 0
+	addc	r3,r1
+	addc	r0,r2		! cy_limb += T, T = 0
+	dt	r6
+	mov.l	r1,@r4
+	bf.s	Loop
+	add	#4,r4
+
+	rts
+	mov	r2,r0
diff --git a/mpn/sh/sh2/mul_1.s b/mpn/sh/sh2/mul_1.s
new file mode 100644
index 000000000..7ca275671
--- /dev/null
+++ b/mpn/sh/sh2/mul_1.s
@@ -0,0 +1,50 @@
+! SH2 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! size		r6
+! s2_limb	r7
+
+	.text
+	.align 1
+	.global	___mpn_mul_1
+___mpn_mul_1:
+	mov	#0,r2		! cy_limb = 0
+	mov	#0,r0		! Keep r0 = 0 for entire loop
+	clrt
+
+Loop:	mov.l	@r5+,r3
+	dmulu.l	r3,r7
+	sts	macl,r1
+	addc	r2,r1
+	sts	mach,r2
+	addc	r0,r2		! propagate carry to cy_limb (dt clobbers T)
+	dt	r6
+	mov.l	r1,@r4
+	bf.s	Loop
+	add	#4,r4
+
+	rts
+	mov	r2,r0
diff --git a/mpn/sh/sh2/submul_1.s b/mpn/sh/sh2/submul_1.s
new file mode 100644
index 000000000..9ef380ced
--- /dev/null
+++ b/mpn/sh/sh2/submul_1.s
@@ -0,0 +1,53 @@
+! SH2 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! size		r6
+! s2_limb	r7
+
+	.text
+	.align 1
+	.global	___mpn_submul_1
+___mpn_submul_1:
+	mov	#0,r2		! cy_limb = 0
+	mov	#0,r0		! Keep r0 = 0 for entire loop
+	clrt
+
+Loop:	mov.l	@r5+,r3
+	dmulu.l	r3,r7
+	sts	macl,r1
+	addc	r2,r1		! lo_prod += old cy_limb
+	sts	mach,r2		! new cy_limb = hi_prod
+	mov.l	@r4,r3
+	addc	r0,r2		! cy_limb += T, T = 0
+	subc	r3,r1
+	addc	r0,r2		! cy_limb += T, T = 0
+	dt	r6
+	mov.l	r1,@r4
+	bf.s	Loop
+	add	#4,r4
+
+	rts
+	mov	r2,r0
diff --git a/mpn/sh/sub_n.s b/mpn/sh/sub_n.s
new file mode 100644
index 000000000..6b201f60f
--- /dev/null
+++ b/mpn/sh/sub_n.s
@@ -0,0 +1,47 @@
+! SH __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+! difference in a third limb vector.
+
+! Copyright (C) 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r4
+! s1_ptr	r5
+! s2_ptr	r6
+! size		r7
+
+	.text
+	.align 2
+	.global	___mpn_sub_n
+___mpn_sub_n:
+	mov	#0,r3		! clear cy save reg
+
+Loop:	mov.l	@r5+,r1
+	mov.l	@r6+,r2
+	shlr	r3		! restore cy
+	subc	r2,r1
+	movt	r3		! save cy
+	mov.l	r1,@r4
+	dt	r7
+	bf.s	Loop
+	 add	#4,r4
+
+	rts
+	movt	r0		! return carry-out from most sign. limb
diff --git a/mpn/sparc32/README b/mpn/sparc32/README
new file mode 100644
index 000000000..7c19df7bc
--- /dev/null
+++ b/mpn/sparc32/README
@@ -0,0 +1,36 @@
+This directory contains mpn functions for various SPARC chips.  Code that
+runs only on version 8 SPARC implementations, is in the v8 subdirectory.
+
+RELEVANT OPTIMIZATION ISSUES
+
+  Load and Store timing
+
+On most early SPARC implementations, the ST instructions takes multiple
+cycles, while a STD takes just a single cycle more than an ST.  For the CPUs
+in SPARCstation I and II, the times are 3 and 4 cycles, respectively.
+Therefore, combining two ST instrucitons into a STD when possible is a
+significant optimiation.
+
+Later SPARC implementations have single cycle ST.
+
+For SuperSPARC, we can perform just one memory instruction per cycle, even
+if up to two integer instructions can be executed in its pipeline.  For
+programs that perform so many memory operations that there are not enough
+non-memory operations to issue in parallel with all memory operations, using
+LDD and STD when possible helps.
+
+STATUS
+
+1. On a SuperSPARC, mpn_lshift and mpn_rshift run at 3 cycles/limb, or 2.5
+   cycles/limb asymptotically.  We could optimize speed for special counts
+   by using ADDXCC.
+
+2. On a SuperSPARC, mpn_add_n and mpn_sub_n runs at 2.5 cycles/limb, or 2
+   cycles/limb asymptotically.
+
+3. mpn_mul_1 runs at what is believed to be optimal speed.
+
+4. On SuperSPARC, mpn_addmul_1 and mpn_submul_1 could both be improved by a
+   cycle by avoiding one of the add instrucitons.  See a29k/addmul_1.
+
+The speed of the code for other SPARC implementations is uncertain.
diff --git a/mpn/sparc32/add_n.S b/mpn/sparc32/add_n.S
new file mode 100644
index 000000000..9852c256a
--- /dev/null
+++ b/mpn/sparc32/add_n.S
@@ -0,0 +1,226 @@
+! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+#define res_ptr	%o0
+#define s1_ptr	%o1
+#define s2_ptr	%o2
+#define size	%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global	C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n):
+	xor	s2_ptr,res_ptr,%g1
+	andcc	%g1,4,%g0
+	bne	L1			! branch if alignment differs
+	nop
+! **  V1a  **
+L0:	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
+	be	L_v1			! if no, branch
+	nop
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+	ld	[s1_ptr],%g4
+	add	s1_ptr,4,s1_ptr
+	ld	[s2_ptr],%g2
+	add	s2_ptr,4,s2_ptr
+	add	size,-1,size
+	addcc	%g4,%g2,%o4
+	st	%o4,[res_ptr]
+	add	res_ptr,4,res_ptr
+L_v1:	addx	%g0,%g0,%o4		! save cy in register
+	cmp	size,2			! if size < 2 ...
+	bl	Lend2			! ... branch to tail code
+	subcc	%g0,%o4,%g0		! restore cy
+
+	ld	[s1_ptr+0],%g4
+	addcc	size,-10,size
+	ld	[s1_ptr+4],%g1
+	ldd	[s2_ptr+0],%g2
+	blt	Lfin1
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop1:	addxcc	%g4,%g2,%o4
+	ld	[s1_ptr+8],%g4
+	addxcc	%g1,%g3,%o5
+	ld	[s1_ptr+12],%g1
+	ldd	[s2_ptr+8],%g2
+	std	%o4,[res_ptr+0]
+	addxcc	%g4,%g2,%o4
+	ld	[s1_ptr+16],%g4
+	addxcc	%g1,%g3,%o5
+	ld	[s1_ptr+20],%g1
+	ldd	[s2_ptr+16],%g2
+	std	%o4,[res_ptr+8]
+	addxcc	%g4,%g2,%o4
+	ld	[s1_ptr+24],%g4
+	addxcc	%g1,%g3,%o5
+	ld	[s1_ptr+28],%g1
+	ldd	[s2_ptr+24],%g2
+	std	%o4,[res_ptr+16]
+	addxcc	%g4,%g2,%o4
+	ld	[s1_ptr+32],%g4
+	addxcc	%g1,%g3,%o5
+	ld	[s1_ptr+36],%g1
+	ldd	[s2_ptr+32],%g2
+	std	%o4,[res_ptr+24]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	add	s1_ptr,32,s1_ptr
+	add	s2_ptr,32,s2_ptr
+	add	res_ptr,32,res_ptr
+	bge	Loop1
+	subcc	%g0,%o4,%g0		! restore cy
+
+Lfin1:	addcc	size,8-2,size
+	blt	Lend1
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:	addxcc	%g4,%g2,%o4
+	ld	[s1_ptr+8],%g4
+	addxcc	%g1,%g3,%o5
+	ld	[s1_ptr+12],%g1
+	ldd	[s2_ptr+8],%g2
+	std	%o4,[res_ptr+0]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-2,size
+	add	s1_ptr,8,s1_ptr
+	add	s2_ptr,8,s2_ptr
+	add	res_ptr,8,res_ptr
+	bge	Loope1
+	subcc	%g0,%o4,%g0		! restore cy
+Lend1:	addxcc	%g4,%g2,%o4
+	addxcc	%g1,%g3,%o5
+	std	%o4,[res_ptr+0]
+	addx	%g0,%g0,%o4		! save cy in register
+
+	andcc	size,1,%g0
+	be	Lret1
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+	ld	[s1_ptr+8],%g4
+	ld	[s2_ptr+8],%g2
+	addxcc	%g4,%g2,%o4
+	st	%o4,[res_ptr+8]
+
+Lret1:	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+L1:	xor	s1_ptr,res_ptr,%g1
+	andcc	%g1,4,%g0
+	bne	L2
+	nop
+! **  V1b  **
+	mov	s2_ptr,%g1
+	mov	s1_ptr,s2_ptr
+	b	L0
+	mov	%g1,s1_ptr
+
+! **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:	cmp	size,1
+	be	Ljone
+	nop
+	andcc	s1_ptr,4,%g0		! s1_ptr unaligned? Side effect: cy=0
+	be	L_v2			! if no, branch
+	nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
+	ld	[s1_ptr],%g4
+	add	s1_ptr,4,s1_ptr
+	ld	[s2_ptr],%g2
+	add	s2_ptr,4,s2_ptr
+	add	size,-1,size
+	addcc	%g4,%g2,%o4
+	st	%o4,[res_ptr]
+	add	res_ptr,4,res_ptr
+
+L_v2:	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	blt	Lfin2
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop2:	ldd	[s1_ptr+0],%g2
+	ldd	[s2_ptr+0],%o4
+	addxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+0]
+	addxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+4]
+	ldd	[s1_ptr+8],%g2
+	ldd	[s2_ptr+8],%o4
+	addxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+8]
+	addxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+12]
+	ldd	[s1_ptr+16],%g2
+	ldd	[s2_ptr+16],%o4
+	addxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+16]
+	addxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+20]
+	ldd	[s1_ptr+24],%g2
+	ldd	[s2_ptr+24],%o4
+	addxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+24]
+	addxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+28]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	add	s1_ptr,32,s1_ptr
+	add	s2_ptr,32,s2_ptr
+	add	res_ptr,32,res_ptr
+	bge	Loop2
+	subcc	%g0,%o4,%g0		! restore cy
+
+Lfin2:	addcc	size,8-2,size
+	blt	Lend2
+	subcc	%g0,%o4,%g0		! restore cy
+Loope2:	ldd	[s1_ptr+0],%g2
+	ldd	[s2_ptr+0],%o4
+	addxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+0]
+	addxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+4]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-2,size
+	add	s1_ptr,8,s1_ptr
+	add	s2_ptr,8,s2_ptr
+	add	res_ptr,8,res_ptr
+	bge	Loope2
+	subcc	%g0,%o4,%g0		! restore cy
+Lend2:	andcc	size,1,%g0
+	be	Lret2
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+Ljone:	ld	[s1_ptr],%g4
+	ld	[s2_ptr],%g2
+	addxcc	%g4,%g2,%o4
+	st	%o4,[res_ptr]
+
+Lret2:	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
diff --git a/mpn/sparc32/addmul_1.S b/mpn/sparc32/addmul_1.S
new file mode 100644
index 000000000..375d25db6
--- /dev/null
+++ b/mpn/sparc32/addmul_1.S
@@ -0,0 +1,147 @@
+! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+! the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+	! Make S1_PTR and RES_PTR point at the end of their blocks
+	! and put (- 4 x SIZE) in index/loop counter.
+	sll	%o2,2,%o2
+	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
+	add	%o1,%o2,%o1
+	sub	%g0,%o2,%o2
+
+	cmp	%o3,0xfff
+	bgu	Large
+	nop
+
+	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	b	L0
+	 add	%o4,-4,%o4
+Loop0:
+	addcc	%o5,%g1,%g1
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g1,[%o4+%o2]
+L0:	wr	%g0,%o3,%y
+	sra	%o5,31,%g2
+	and	%o3,%g2,%g2
+	andcc	%g1,0,%g1
+	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,0,%g1
+	sra	%g1,20,%g4
+	sll	%g1,12,%g1
+ 	rd	%y,%g3
+	srl	%g3,20,%g3
+	or	%g1,%g3,%g1
+
+	addcc	%g1,%o0,%g1
+	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne	Loop0
+	 ld	[%o4+%o2],%o5
+
+	addcc	%o5,%g1,%g1
+	addx	%o0,%g0,%o0
+	retl
+	st	%g1,[%o4+%o2]
+
+
+Large:	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
+	b	L1
+	 add	%o4,-4,%o4
+Loop:
+	addcc	%o5,%g3,%g3
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g3,[%o4+%o2]
+L1:	wr	%g0,%o5,%y
+	and	%o5,%g4,%g2
+	andcc	%g0,%g0,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%g0,%g1
+	rd	%y,%g3
+	addcc	%g3,%o0,%g3
+	addx	%g2,%g1,%o0
+	addcc	%o2,4,%o2
+	bne	Loop
+	 ld	[%o4+%o2],%o5
+
+	addcc	%o5,%g3,%g3
+	addx	%o0,%g0,%o0
+	retl
+	st	%g3,[%o4+%o2]
diff --git a/mpn/sparc32/lshift.S b/mpn/sparc32/lshift.S
new file mode 100644
index 000000000..4f0595f2f
--- /dev/null
+++ b/mpn/sparc32/lshift.S
@@ -0,0 +1,95 @@
+! sparc __mpn_lshift --
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! src_ptr	%o1
+! size		%o2
+! cnt		%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global	C_SYMBOL_NAME(__mpn_lshift)
+C_SYMBOL_NAME(__mpn_lshift):
+	sll	%o2,2,%g1
+	add	%o1,%g1,%o1	! make %o1 point at end of src
+	ld	[%o1-4],%g2	! load first limb
+	sub	%g0,%o3,%o5	! negate shift count
+	add	%o0,%g1,%o0	! make %o0 point at end of res
+	add	%o2,-1,%o2
+	andcc	%o2,4-1,%g4	! number of limbs in first loop
+	srl	%g2,%o5,%g1	! compute function result
+	be	L0		! if multiple of 4 limbs, skip first loop
+	st	%g1,[%sp+80]
+
+	sub	%o2,%g4,%o2	! adjust count for main loop
+
+Loop0:	ld	[%o1-8],%g3
+	add	%o0,-4,%o0
+	add	%o1,-4,%o1
+	addcc	%g4,-1,%g4
+	sll	%g2,%o3,%o4
+	srl	%g3,%o5,%g1
+	mov	%g3,%g2
+	or	%o4,%g1,%o4
+	bne	Loop0
+	 st	%o4,[%o0+0]
+
+L0:	tst	%o2
+	be	Lend
+	 nop
+
+Loop:	ld	[%o1-8],%g3
+	add	%o0,-16,%o0
+	addcc	%o2,-4,%o2
+	sll	%g2,%o3,%o4
+	srl	%g3,%o5,%g1
+
+	ld	[%o1-12],%g2
+	sll	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0+12]
+	srl	%g2,%o5,%g1
+
+	ld	[%o1-16],%g3
+	sll	%g2,%o3,%o4
+	or	%g4,%g1,%g4
+	st	%g4,[%o0+8]
+	srl	%g3,%o5,%g1
+
+	ld	[%o1-20],%g2
+	sll	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0+4]
+	srl	%g2,%o5,%g1
+
+	add	%o1,-16,%o1
+	or	%g4,%g1,%g4
+	bne	Loop
+	 st	%g4,[%o0+0]
+
+Lend:	sll	%g2,%o3,%g2
+	st	%g2,[%o0-4]
+	retl
+	ld	[%sp+80],%o0
diff --git a/mpn/sparc32/mul_1.S b/mpn/sparc32/mul_1.S
new file mode 100644
index 000000000..142fd8ba2
--- /dev/null
+++ b/mpn/sparc32/mul_1.S
@@ -0,0 +1,199 @@
+! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+! ADD CODE FOR SMALL MULTIPLIERS!
+!1:	ld
+!	st
+!
+!2:	ld	,a
+!	addxcc	a,a,x
+!	st	x,
+!
+!3_unrolled:
+!	ld	,a
+!	addxcc	a,a,x1		! 2a + cy
+!	addx	%g0,%g0,x2
+!	addcc	a,x1,x		! 3a + c
+!	st	x,
+!
+!	ld	,a
+!	addxcc	a,a,y1
+!	addx	%g0,%g0,y2
+!	addcc	a,y1,x
+!	st	x,
+!
+!4_unrolled:
+!	ld	,a
+!	srl	a,2,x1		! 4a
+!	addxcc	y2,x1,x
+!	sll	a,30,x2
+!	st	x,
+!
+!	ld	,a
+!	srl	a,2,y1
+!	addxcc	x2,y1,y
+!	sll	a,30,y2
+!	st	x,
+!
+!5_unrolled:
+!	ld	,a
+!	srl	a,2,x1		! 4a
+!	addxcc	a,x1,x		! 5a + c
+!	sll	a,30,x2
+!	addx	%g0,x2,x2
+!	st	x,
+!
+!	ld	,a
+!	srl	a,2,y1
+!	addxcc	a,y1,x
+!	sll	a,30,y2
+!	addx	%g0,y2,y2
+!	st	x,
+!
+!8_unrolled:
+!	ld	,a
+!	srl	a,3,x1		! 8a
+!	addxcc	y2,x1,x
+!	sll	a,29,x2
+!	st	x,
+!
+!	ld	,a
+!	srl	a,3,y1
+!	addxcc	x2,y1,y
+!	sll	a,29,y2
+!	st	x,
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+	! Make S1_PTR and RES_PTR point at the end of their blocks
+	! and put (- 4 x SIZE) in index/loop counter.
+	sll	%o2,2,%o2
+	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
+	add	%o1,%o2,%o1
+	sub	%g0,%o2,%o2
+
+	cmp	%o3,0xfff
+	bgu	Large
+	nop
+
+	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	b	L0
+	 add	%o4,-4,%o4
+Loop0:
+	st	%g1,[%o4+%o2]
+L0:	wr	%g0,%o3,%y
+	sra	%o5,31,%g2
+	and	%o3,%g2,%g2
+	andcc	%g1,0,%g1
+	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,0,%g1
+	sra	%g1,20,%g4
+	sll	%g1,12,%g1
+ 	rd	%y,%g3
+	srl	%g3,20,%g3
+	or	%g1,%g3,%g1
+
+	addcc	%g1,%o0,%g1
+	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne,a	Loop0
+	 ld	[%o1+%o2],%o5
+
+	retl
+	st	%g1,[%o4+%o2]
+
+
+Large:	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
+	b	L1
+	 add	%o4,-4,%o4
+Loop:
+	st	%g3,[%o4+%o2]
+L1:	wr	%g0,%o5,%y
+	and	%o5,%g4,%g2	! g2 = S1_LIMB iff S2_LIMB < 0, else 0
+	andcc	%g0,%g0,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%g0,%g1
+	rd	%y,%g3
+	addcc	%g3,%o0,%g3
+	addx	%g2,%g1,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne,a	Loop
+	 ld	[%o1+%o2],%o5
+
+	retl
+	st	%g3,[%o4+%o2]
diff --git a/mpn/sparc32/rshift.S b/mpn/sparc32/rshift.S
new file mode 100644
index 000000000..fea4f3b92
--- /dev/null
+++ b/mpn/sparc32/rshift.S
@@ -0,0 +1,92 @@
+! sparc __mpn_rshift --
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! src_ptr	%o1
+! size		%o2
+! cnt		%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global	C_SYMBOL_NAME(__mpn_rshift)
+C_SYMBOL_NAME(__mpn_rshift):
+	ld	[%o1],%g2	! load first limb
+	sub	%g0,%o3,%o5	! negate shift count
+	add	%o2,-1,%o2
+	andcc	%o2,4-1,%g4	! number of limbs in first loop
+	sll	%g2,%o5,%g1	! compute function result
+	be	L0		! if multiple of 4 limbs, skip first loop
+	st	%g1,[%sp+80]
+
+	sub	%o2,%g4,%o2	! adjust count for main loop
+
+Loop0:	ld	[%o1+4],%g3
+	add	%o0,4,%o0
+	add	%o1,4,%o1
+	addcc	%g4,-1,%g4
+	srl	%g2,%o3,%o4
+	sll	%g3,%o5,%g1
+	mov	%g3,%g2
+	or	%o4,%g1,%o4
+	bne	Loop0
+	 st	%o4,[%o0-4]
+
+L0:	tst	%o2
+	be	Lend
+	 nop
+
+Loop:	ld	[%o1+4],%g3
+	add	%o0,16,%o0
+	addcc	%o2,-4,%o2
+	srl	%g2,%o3,%o4
+	sll	%g3,%o5,%g1
+
+	ld	[%o1+8],%g2
+	srl	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0-16]
+	sll	%g2,%o5,%g1
+
+	ld	[%o1+12],%g3
+	srl	%g2,%o3,%o4
+	or	%g4,%g1,%g4
+	st	%g4,[%o0-12]
+	sll	%g3,%o5,%g1
+
+	ld	[%o1+16],%g2
+	srl	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0-8]
+	sll	%g2,%o5,%g1
+
+	add	%o1,16,%o1
+	or	%g4,%g1,%g4
+	bne	Loop
+	 st	%g4,[%o0-4]
+
+Lend:	srl	%g2,%o3,%g2
+	st	%g2,[%o0-0]
+	retl
+	ld	[%sp+80],%o0
diff --git a/mpn/sparc32/sub_n.S b/mpn/sparc32/sub_n.S
new file mode 100644
index 000000000..b7a11958e
--- /dev/null
+++ b/mpn/sparc32/sub_n.S
@@ -0,0 +1,311 @@
+! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+#define res_ptr	%o0
+#define s1_ptr	%o1
+#define s2_ptr	%o2
+#define size	%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global	C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n):
+	xor	s2_ptr,res_ptr,%g1
+	andcc	%g1,4,%g0
+	bne	L1			! branch if alignment differs
+	nop
+! **  V1a  **
+	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
+	be	L_v1			! if no, branch
+	nop
+/* Add least significant limb separately to align res_ptr and s2_ptr */
+	ld	[s1_ptr],%g4
+	add	s1_ptr,4,s1_ptr
+	ld	[s2_ptr],%g2
+	add	s2_ptr,4,s2_ptr
+	add	size,-1,size
+	subcc	%g4,%g2,%o4
+	st	%o4,[res_ptr]
+	add	res_ptr,4,res_ptr
+L_v1:	addx	%g0,%g0,%o4		! save cy in register
+	cmp	size,2			! if size < 2 ...
+	bl	Lend2			! ... branch to tail code
+	subcc	%g0,%o4,%g0		! restore cy
+
+	ld	[s1_ptr+0],%g4
+	addcc	size,-10,size
+	ld	[s1_ptr+4],%g1
+	ldd	[s2_ptr+0],%g2
+	blt	Lfin1
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop1:	subxcc	%g4,%g2,%o4
+	ld	[s1_ptr+8],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[s1_ptr+12],%g1
+	ldd	[s2_ptr+8],%g2
+	std	%o4,[res_ptr+0]
+	subxcc	%g4,%g2,%o4
+	ld	[s1_ptr+16],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[s1_ptr+20],%g1
+	ldd	[s2_ptr+16],%g2
+	std	%o4,[res_ptr+8]
+	subxcc	%g4,%g2,%o4
+	ld	[s1_ptr+24],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[s1_ptr+28],%g1
+	ldd	[s2_ptr+24],%g2
+	std	%o4,[res_ptr+16]
+	subxcc	%g4,%g2,%o4
+	ld	[s1_ptr+32],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[s1_ptr+36],%g1
+	ldd	[s2_ptr+32],%g2
+	std	%o4,[res_ptr+24]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	add	s1_ptr,32,s1_ptr
+	add	s2_ptr,32,s2_ptr
+	add	res_ptr,32,res_ptr
+	bge	Loop1
+	subcc	%g0,%o4,%g0		! restore cy
+
+Lfin1:	addcc	size,8-2,size
+	blt	Lend1
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1:	subxcc	%g4,%g2,%o4
+	ld	[s1_ptr+8],%g4
+	subxcc	%g1,%g3,%o5
+	ld	[s1_ptr+12],%g1
+	ldd	[s2_ptr+8],%g2
+	std	%o4,[res_ptr+0]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-2,size
+	add	s1_ptr,8,s1_ptr
+	add	s2_ptr,8,s2_ptr
+	add	res_ptr,8,res_ptr
+	bge	Loope1
+	subcc	%g0,%o4,%g0		! restore cy
+Lend1:	subxcc	%g4,%g2,%o4
+	subxcc	%g1,%g3,%o5
+	std	%o4,[res_ptr+0]
+	addx	%g0,%g0,%o4		! save cy in register
+
+	andcc	size,1,%g0
+	be	Lret1
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+	ld	[s1_ptr+8],%g4
+	ld	[s2_ptr+8],%g2
+	subxcc	%g4,%g2,%o4
+	st	%o4,[res_ptr+8]
+
+Lret1:	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+L1:	xor	s1_ptr,res_ptr,%g1
+	andcc	%g1,4,%g0
+	bne	L2
+	nop
+! **  V1b  **
+	andcc	res_ptr,4,%g0		! res_ptr unaligned? Side effect: cy=0
+	be	L_v1b			! if no, branch
+	nop
+/* Add least significant limb separately to align res_ptr and s1_ptr */
+	ld	[s2_ptr],%g4
+	add	s2_ptr,4,s2_ptr
+	ld	[s1_ptr],%g2
+	add	s1_ptr,4,s1_ptr
+	add	size,-1,size
+	subcc	%g2,%g4,%o4
+	st	%o4,[res_ptr]
+	add	res_ptr,4,res_ptr
+L_v1b:	addx	%g0,%g0,%o4		! save cy in register
+	cmp	size,2			! if size < 2 ...
+	bl	Lend2			! ... branch to tail code
+	subcc	%g0,%o4,%g0		! restore cy
+
+	ld	[s2_ptr+0],%g4
+	addcc	size,-10,size
+	ld	[s2_ptr+4],%g1
+	ldd	[s1_ptr+0],%g2
+	blt	Lfin1b
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop1b:	subxcc	%g2,%g4,%o4
+	ld	[s2_ptr+8],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[s2_ptr+12],%g1
+	ldd	[s1_ptr+8],%g2
+	std	%o4,[res_ptr+0]
+	subxcc	%g2,%g4,%o4
+	ld	[s2_ptr+16],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[s2_ptr+20],%g1
+	ldd	[s1_ptr+16],%g2
+	std	%o4,[res_ptr+8]
+	subxcc	%g2,%g4,%o4
+	ld	[s2_ptr+24],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[s2_ptr+28],%g1
+	ldd	[s1_ptr+24],%g2
+	std	%o4,[res_ptr+16]
+	subxcc	%g2,%g4,%o4
+	ld	[s2_ptr+32],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[s2_ptr+36],%g1
+	ldd	[s1_ptr+32],%g2
+	std	%o4,[res_ptr+24]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	add	s1_ptr,32,s1_ptr
+	add	s2_ptr,32,s2_ptr
+	add	res_ptr,32,res_ptr
+	bge	Loop1b
+	subcc	%g0,%o4,%g0		! restore cy
+
+Lfin1b:	addcc	size,8-2,size
+	blt	Lend1b
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 2 limbs until less than 2 limbs remain */
+Loope1b:subxcc	%g2,%g4,%o4
+	ld	[s2_ptr+8],%g4
+	subxcc	%g3,%g1,%o5
+	ld	[s2_ptr+12],%g1
+	ldd	[s1_ptr+8],%g2
+	std	%o4,[res_ptr+0]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-2,size
+	add	s1_ptr,8,s1_ptr
+	add	s2_ptr,8,s2_ptr
+	add	res_ptr,8,res_ptr
+	bge	Loope1b
+	subcc	%g0,%o4,%g0		! restore cy
+Lend1b:	subxcc	%g2,%g4,%o4
+	subxcc	%g3,%g1,%o5
+	std	%o4,[res_ptr+0]
+	addx	%g0,%g0,%o4		! save cy in register
+
+	andcc	size,1,%g0
+	be	Lret1b
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+	ld	[s2_ptr+8],%g4
+	ld	[s1_ptr+8],%g2
+	subxcc	%g2,%g4,%o4
+	st	%o4,[res_ptr+8]
+
+Lret1b:	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
+
+! **  V2  **
+/* If we come here, the alignment of s1_ptr and res_ptr as well as the
+   alignment of s2_ptr and res_ptr differ.  Since there are only two ways
+   things can be aligned (that we care about) we now know that the alignment
+   of s1_ptr and s2_ptr are the same.  */
+
+L2:	cmp	size,1
+	be	Ljone
+	nop
+	andcc	s1_ptr,4,%g0		! s1_ptr unaligned? Side effect: cy=0
+	be	L_v2			! if no, branch
+	nop
+/* Add least significant limb separately to align s1_ptr and s2_ptr */
+	ld	[s1_ptr],%g4
+	add	s1_ptr,4,s1_ptr
+	ld	[s2_ptr],%g2
+	add	s2_ptr,4,s2_ptr
+	add	size,-1,size
+	subcc	%g4,%g2,%o4
+	st	%o4,[res_ptr]
+	add	res_ptr,4,res_ptr
+
+L_v2:	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	blt	Lfin2
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add blocks of 8 limbs until less than 8 limbs remain */
+Loop2:	ldd	[s1_ptr+0],%g2
+	ldd	[s2_ptr+0],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+0]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+4]
+	ldd	[s1_ptr+8],%g2
+	ldd	[s2_ptr+8],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+8]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+12]
+	ldd	[s1_ptr+16],%g2
+	ldd	[s2_ptr+16],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+16]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+20]
+	ldd	[s1_ptr+24],%g2
+	ldd	[s2_ptr+24],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+24]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+28]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-8,size
+	add	s1_ptr,32,s1_ptr
+	add	s2_ptr,32,s2_ptr
+	add	res_ptr,32,res_ptr
+	bge	Loop2
+	subcc	%g0,%o4,%g0		! restore cy
+
+Lfin2:	addcc	size,8-2,size
+	blt	Lend2
+	subcc	%g0,%o4,%g0		! restore cy
+Loope2:	ldd	[s1_ptr+0],%g2
+	ldd	[s2_ptr+0],%o4
+	subxcc	%g2,%o4,%g2
+	st	%g2,[res_ptr+0]
+	subxcc	%g3,%o5,%g3
+	st	%g3,[res_ptr+4]
+	addx	%g0,%g0,%o4		! save cy in register
+	addcc	size,-2,size
+	add	s1_ptr,8,s1_ptr
+	add	s2_ptr,8,s2_ptr
+	add	res_ptr,8,res_ptr
+	bge	Loope2
+	subcc	%g0,%o4,%g0		! restore cy
+Lend2:	andcc	size,1,%g0
+	be	Lret2
+	subcc	%g0,%o4,%g0		! restore cy
+/* Add last limb */
+Ljone:	ld	[s1_ptr],%g4
+	ld	[s2_ptr],%g2
+	subxcc	%g4,%g2,%o4
+	st	%o4,[res_ptr]
+
+Lret2:	retl
+	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
diff --git a/mpn/sparc32/submul_1.S b/mpn/sparc32/submul_1.S
new file mode 100644
index 000000000..a8ebd501a
--- /dev/null
+++ b/mpn/sparc32/submul_1.S
@@ -0,0 +1,147 @@
+! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+! the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+	! Make S1_PTR and RES_PTR point at the end of their blocks
+	! and put (- 4 x SIZE) in index/loop counter.
+	sll	%o2,2,%o2
+	add	%o0,%o2,%o4	! RES_PTR in o4 since o0 is retval
+	add	%o1,%o2,%o1
+	sub	%g0,%o2,%o2
+
+	cmp	%o3,0xfff
+	bgu	Large
+	nop
+
+	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	b	L0
+	 add	%o4,-4,%o4
+Loop0:
+	subcc	%o5,%g1,%g1
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g1,[%o4+%o2]
+L0:	wr	%g0,%o3,%y
+	sra	%o5,31,%g2
+	and	%o3,%g2,%g2
+	andcc	%g1,0,%g1
+	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+ 	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,%o5,%g1
+	mulscc	%g1,0,%g1
+	sra	%g1,20,%g4
+	sll	%g1,12,%g1
+ 	rd	%y,%g3
+	srl	%g3,20,%g3
+	or	%g1,%g3,%g1
+
+	addcc	%g1,%o0,%g1
+	addx	%g2,%g4,%o0	! add sign-compensation and cy to hi limb
+	addcc	%o2,4,%o2	! loop counter
+	bne	Loop0
+	 ld	[%o4+%o2],%o5
+
+	subcc	%o5,%g1,%g1
+	addx	%o0,%g0,%o0
+	retl
+	st	%g1,[%o4+%o2]
+
+
+Large:	ld	[%o1+%o2],%o5
+	mov	0,%o0
+	sra	%o3,31,%g4	! g4 = mask of ones iff S2_LIMB < 0
+	b	L1
+	 add	%o4,-4,%o4
+Loop:
+	subcc	%o5,%g3,%g3
+	ld	[%o1+%o2],%o5
+	addx	%o0,%g0,%o0
+	st	%g3,[%o4+%o2]
+L1:	wr	%g0,%o5,%y
+	and	%o5,%g4,%g2
+	andcc	%g0,%g0,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%g0,%g1
+	rd	%y,%g3
+	addcc	%g3,%o0,%g3
+	addx	%g2,%g1,%o0
+	addcc	%o2,4,%o2
+	bne	Loop
+	 ld	[%o4+%o2],%o5
+
+	subcc	%o5,%g3,%g3
+	addx	%o0,%g0,%o0
+	retl
+	st	%g3,[%o4+%o2]
diff --git a/mpn/sparc32/udiv_fp.S b/mpn/sparc32/udiv_fp.S
new file mode 100644
index 000000000..d11227dff
--- /dev/null
+++ b/mpn/sparc32/udiv_fp.S
@@ -0,0 +1,145 @@
+! SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+! This is for v7 CPUs with a floating-point unit.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr	i0
+! n1		i1
+! n0		i2
+! d		i3
+
+#include "sysdep.h"
+#undef ret	/* Kludge for glibc */
+
+	.text
+	.align	8
+LC0:	.double	0r4294967296
+LC1:	.double	0r2147483648
+
+	.align	4
+	.global	C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+	!#PROLOGUE# 0
+	save	%sp,-104,%sp
+	!#PROLOGUE# 1
+	st	%i1,[%fp-8]
+	ld	[%fp-8],%f10
+	sethi	%hi(LC0),%o7
+	fitod	%f10,%f4
+	ldd	[%o7+%lo(LC0)],%f8
+	cmp	%i1,0
+	bge	L248
+	mov	%i0,%i5
+	faddd	%f4,%f8,%f4
+L248:
+	st	%i2,[%fp-8]
+	ld	[%fp-8],%f10
+	fmuld	%f4,%f8,%f6
+	cmp	%i2,0
+	bge	L249
+	fitod	%f10,%f2
+	faddd	%f2,%f8,%f2
+L249:
+	st	%i3,[%fp-8]
+	faddd	%f6,%f2,%f2
+	ld	[%fp-8],%f10
+	cmp	%i3,0
+	bge	L250
+	fitod	%f10,%f4
+	faddd	%f4,%f8,%f4
+L250:
+	fdivd	%f2,%f4,%f2
+	sethi	%hi(LC1),%o7
+	ldd	[%o7+%lo(LC1)],%f4
+	fcmped	%f2,%f4
+	nop
+	fbge,a	L251
+	fsubd	%f2,%f4,%f2
+	fdtoi	%f2,%f2
+	st	%f2,[%fp-8]
+	b	L252
+	ld	[%fp-8],%i4
+L251:
+	fdtoi	%f2,%f2
+	st	%f2,[%fp-8]
+	ld	[%fp-8],%i4
+	sethi	%hi(-2147483648),%g2
+	xor	%i4,%g2,%i4
+L252:
+	wr	%g0,%i4,%y
+	sra	%i3,31,%g2
+	and	%i4,%g2,%g2
+	andcc	%g0,0,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,%i3,%g1
+	mulscc	%g1,0,%g1
+	add	%g1,%g2,%i0
+	rd	%y,%g3
+	subcc	%i2,%g3,%o7
+	subxcc	%i1,%i0,%g0
+	be	L253
+	cmp	%o7,%i3
+
+	add	%i4,-1,%i0
+	add	%o7,%i3,%o7
+	st	%o7,[%i5]
+	ret
+	restore
+L253:
+	blu	L246
+	mov	%i4,%i0
+	add	%i4,1,%i0
+	sub	%o7,%i3,%o7
+L246:
+	st	%o7,[%i5]
+	ret
+	restore
diff --git a/mpn/sparc32/udiv_nfp.S b/mpn/sparc32/udiv_nfp.S
new file mode 100644
index 000000000..118d8a4a2
--- /dev/null
+++ b/mpn/sparc32/udiv_nfp.S
@@ -0,0 +1,188 @@
+! SPARC v7 __udiv_qrnnd division support, used from longlong.h.
+! This is for v7 CPUs without a floating-point unit.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr	o0
+! n1		o1
+! n0		o2
+! d		o3
+
+#include "sysdep.h"
+
+	.text
+	.align 4
+	.global	C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+	tst	%o3
+	bneg	Largedivisor
+	mov	8,%g1
+
+	b	Lp1
+	addxcc	%o2,%o2,%o2
+
+Lplop:	bcc	Ln1
+	addxcc	%o2,%o2,%o2
+Lp1:	addx	%o1,%o1,%o1
+	subcc	%o1,%o3,%o4
+	bcc	Ln2
+	addxcc	%o2,%o2,%o2
+Lp2:	addx	%o1,%o1,%o1
+	subcc	%o1,%o3,%o4
+	bcc	Ln3
+	addxcc	%o2,%o2,%o2
+Lp3:	addx	%o1,%o1,%o1
+	subcc	%o1,%o3,%o4
+	bcc	Ln4
+	addxcc	%o2,%o2,%o2
+Lp4:	addx	%o1,%o1,%o1
+	addcc	%g1,-1,%g1
+	bne	Lplop
+	subcc	%o1,%o3,%o4
+	bcc	Ln5
+	addxcc	%o2,%o2,%o2
+Lp5:	st	%o1,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+Lnlop:	bcc	Lp1
+	addxcc	%o2,%o2,%o2
+Ln1:	addx	%o4,%o4,%o4
+	subcc	%o4,%o3,%o1
+	bcc	Lp2
+	addxcc	%o2,%o2,%o2
+Ln2:	addx	%o4,%o4,%o4
+	subcc	%o4,%o3,%o1
+	bcc	Lp3
+	addxcc	%o2,%o2,%o2
+Ln3:	addx	%o4,%o4,%o4
+	subcc	%o4,%o3,%o1
+	bcc	Lp4
+	addxcc	%o2,%o2,%o2
+Ln4:	addx	%o4,%o4,%o4
+	addcc	%g1,-1,%g1
+	bne	Lnlop
+	subcc	%o4,%o3,%o1
+	bcc	Lp5
+	addxcc	%o2,%o2,%o2
+Ln5:	st	%o4,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+Largedivisor:
+	and	%o2,1,%o5	! %o5 = n0 & 1
+
+	srl	%o2,1,%o2
+	sll	%o1,31,%g2
+	or	%g2,%o2,%o2	! %o2 = lo(n1n0 >> 1)
+	srl	%o1,1,%o1	! %o1 = hi(n1n0 >> 1)
+
+	and	%o3,1,%g2
+	srl	%o3,1,%g3	! %g3 = floor(d / 2)
+	add	%g3,%g2,%g3	! %g3 = ceil(d / 2)
+
+	b	LLp1
+	addxcc	%o2,%o2,%o2
+
+LLplop:	bcc	LLn1
+	addxcc	%o2,%o2,%o2
+LLp1:	addx	%o1,%o1,%o1
+	subcc	%o1,%g3,%o4
+	bcc	LLn2
+	addxcc	%o2,%o2,%o2
+LLp2:	addx	%o1,%o1,%o1
+	subcc	%o1,%g3,%o4
+	bcc	LLn3
+	addxcc	%o2,%o2,%o2
+LLp3:	addx	%o1,%o1,%o1
+	subcc	%o1,%g3,%o4
+	bcc	LLn4
+	addxcc	%o2,%o2,%o2
+LLp4:	addx	%o1,%o1,%o1
+	addcc	%g1,-1,%g1
+	bne	LLplop
+	subcc	%o1,%g3,%o4
+	bcc	LLn5
+	addxcc	%o2,%o2,%o2
+LLp5:	add	%o1,%o1,%o1	! << 1
+	tst	%g2
+	bne	Oddp
+	add	%o5,%o1,%o1
+	st	%o1,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+LLnlop:	bcc	LLp1
+	addxcc	%o2,%o2,%o2
+LLn1:	addx	%o4,%o4,%o4
+	subcc	%o4,%g3,%o1
+	bcc	LLp2
+	addxcc	%o2,%o2,%o2
+LLn2:	addx	%o4,%o4,%o4
+	subcc	%o4,%g3,%o1
+	bcc	LLp3
+	addxcc	%o2,%o2,%o2
+LLn3:	addx	%o4,%o4,%o4
+	subcc	%o4,%g3,%o1
+	bcc	LLp4
+	addxcc	%o2,%o2,%o2
+LLn4:	addx	%o4,%o4,%o4
+	addcc	%g1,-1,%g1
+	bne	LLnlop
+	subcc	%o4,%g3,%o1
+	bcc	LLp5
+	addxcc	%o2,%o2,%o2
+LLn5:	add	%o4,%o4,%o4	! << 1
+	tst	%g2
+	bne	Oddn
+	add	%o5,%o4,%o4
+	st	%o4,[%o0]
+	retl
+	xnor	%g0,%o2,%o0
+
+Oddp:	xnor	%g0,%o2,%o2
+	! q' in %o2. r' in %o1
+	addcc	%o1,%o2,%o1
+	bcc	LLp6
+	addx	%o2,0,%o2
+	sub	%o1,%o3,%o1
+LLp6:	subcc	%o1,%o3,%g0
+	bcs	LLp7
+	subx	%o2,-1,%o2
+	sub	%o1,%o3,%o1
+LLp7:	st	%o1,[%o0]
+	retl
+	mov	%o2,%o0
+
+Oddn:	xnor	%g0,%o2,%o2
+	! q' in %o2. r' in %o4
+	addcc	%o4,%o2,%o4
+	bcc	LLn6
+	addx	%o2,0,%o2
+	sub	%o4,%o3,%o4
+LLn6:	subcc	%o4,%o3,%g0
+	bcs	LLn7
+	subx	%o2,-1,%o2
+	sub	%o4,%o3,%o4
+LLn7:	st	%o4,[%o0]
+	retl
+	mov	%o2,%o0
diff --git a/mpn/sparc32/v8/addmul_1.S b/mpn/sparc32/v8/addmul_1.S
new file mode 100644
index 000000000..fb9ea7cf0
--- /dev/null
+++ b/mpn/sparc32/v8/addmul_1.S
@@ -0,0 +1,124 @@
+! SPARC v8 __mpn_addmul_1 -- Multiply a limb vector with a limb and
+! add the result to a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1):
+	orcc	%g0,%g0,%g2
+	ld	[%o1+0],%o4	! 1
+
+	sll	%o2,4,%g1
+	and	%g1,(4-1)<<4,%g1
+#if PIC
+	mov	%o7,%g4			! Save return address register
+	call	1f
+	add	%o7,LL-1f,%g3
+1:	mov	%g4,%o7			! Restore return address register
+#else
+	sethi	%hi(LL),%g3
+	or	%g3,%lo(LL),%g3
+#endif
+	jmp	%g3+%g1
+	nop
+LL:
+LL00:	add	%o0,-4,%o0
+	b	Loop00		/* 4, 8, 12, ... */
+	add	%o1,-4,%o1
+	nop
+LL01:	b	Loop01		/* 1, 5, 9, ... */
+	nop
+	nop
+	nop
+LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	b	Loop10
+	add	%o1,4,%o1
+	nop
+LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	b	Loop11
+	add	%o1,-8,%o1
+	nop
+
+1:	addcc	%g3,%g2,%g3	! 1
+	ld	[%o1+4],%o4	! 2
+	rd	%y,%g2		! 1
+	addx	%g0,%g2,%g2
+	ld	[%o0+0],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+0]	! 1
+Loop00:	umul	%o4,%o3,%g3	! 2
+	ld	[%o0+4],%g1	! 2
+	addxcc	%g3,%g2,%g3	! 2
+	ld	[%o1+8],%o4	! 3
+	rd	%y,%g2		! 2
+	addx	%g0,%g2,%g2
+	nop
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+4]	! 2
+Loop11:	umul	%o4,%o3,%g3	! 3
+	addxcc	%g3,%g2,%g3	! 3
+	ld	[%o1+12],%o4	! 4
+	rd	%y,%g2		! 3
+	add	%o1,16,%o1
+	addx	%g0,%g2,%g2
+	ld	[%o0+8],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+8]	! 3
+Loop10:	umul	%o4,%o3,%g3	! 4
+	addxcc	%g3,%g2,%g3	! 4
+	ld	[%o1+0],%o4	! 1
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+	ld	[%o0+12],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+12]	! 4
+	add	%o0,16,%o0
+	addx	%g0,%g2,%g2
+Loop01:	addcc	%o2,-4,%o2
+	bg	1b
+	umul	%o4,%o3,%g3	! 1
+
+	addcc	%g3,%g2,%g3	! 4
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+	ld	[%o0+0],%g1	! 2
+	addcc	%g1,%g3,%g3
+	st	%g3,[%o0+0]	! 4
+	addx	%g0,%g2,%o0
+
+	retl
+	 nop
+
+
+!	umul, ld, addxcc, rd, st
+
+!	umul, ld, addxcc, rd, ld, addcc, st, addx
+
diff --git a/mpn/sparc32/v8/mul_1.S b/mpn/sparc32/v8/mul_1.S
new file mode 100644
index 000000000..b641feb45
--- /dev/null
+++ b/mpn/sparc32/v8/mul_1.S
@@ -0,0 +1,99 @@
+! SPARC v8 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align	8
+	.global	C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1):
+	sll	%o2,4,%g1
+	and	%g1,(4-1)<<4,%g1
+#if PIC
+	mov	%o7,%g4			! Save return address register
+	call	1f
+	add	%o7,LL-1f,%g3
+1:	mov	%g4,%o7			! Restore return address register
+#else
+	sethi	%hi(LL),%g3
+	or	%g3,%lo(LL),%g3
+#endif
+	jmp	%g3+%g1
+	ld	[%o1+0],%o4	! 1
+LL:
+LL00:	add	%o0,-4,%o0
+	add	%o1,-4,%o1
+	b	Loop00		/* 4, 8, 12, ... */
+	orcc	%g0,%g0,%g2
+LL01:	b	Loop01		/* 1, 5, 9, ... */
+	orcc	%g0,%g0,%g2
+	nop
+	nop
+LL10:	add	%o0,-12,%o0	/* 2, 6, 10, ... */
+	add	%o1,4,%o1
+	b	Loop10
+	orcc	%g0,%g0,%g2
+	nop
+LL11:	add	%o0,-8,%o0	/* 3, 7, 11, ... */
+	add	%o1,-8,%o1
+	b	Loop11
+	orcc	%g0,%g0,%g2
+
+Loop:	addcc	%g3,%g2,%g3	! 1
+	ld	[%o1+4],%o4	! 2
+	st	%g3,[%o0+0]	! 1
+	rd	%y,%g2		! 1
+Loop00:	umul	%o4,%o3,%g3	! 2
+	addxcc	%g3,%g2,%g3	! 2
+	ld	[%o1+8],%o4	! 3
+	st	%g3,[%o0+4]	! 2
+	rd	%y,%g2		! 2
+Loop11:	umul	%o4,%o3,%g3	! 3
+	addxcc	%g3,%g2,%g3	! 3
+	ld	[%o1+12],%o4	! 4
+	add	%o1,16,%o1
+	st	%g3,[%o0+8]	! 3
+	rd	%y,%g2		! 3
+Loop10:	umul	%o4,%o3,%g3	! 4
+	addxcc	%g3,%g2,%g3	! 4
+	ld	[%o1+0],%o4	! 1
+	st	%g3,[%o0+12]	! 4
+	add	%o0,16,%o0
+	rd	%y,%g2		! 4
+	addx	%g0,%g2,%g2
+Loop01:	addcc	%o2,-4,%o2
+	bg	Loop
+	umul	%o4,%o3,%g3	! 1
+
+	addcc	%g3,%g2,%g3	! 4
+	st	%g3,[%o0+0]	! 4
+	rd	%y,%g2		! 4
+
+	retl
+	addx	%g0,%g2,%o0
diff --git a/mpn/sparc32/v8/submul_1.S b/mpn/sparc32/v8/submul_1.S
new file mode 100644
index 000000000..e40119d01
--- /dev/null
+++ b/mpn/sparc32/v8/submul_1.S
@@ -0,0 +1,58 @@
+! SPARC v8 __mpn_submul_1 -- Multiply a limb vector with a limb and
+! subtract the result from a second limb vector.
+
+! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+#include "sysdep.h"
+
+.text
+	.align 4
+	.global	C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1):
+	sub	%g0,%o2,%o2		! negate ...
+	sll	%o2,2,%o2		! ... and scale size
+	sub	%o1,%o2,%o1		! o1 is offset s1_ptr
+	sub	%o0,%o2,%g1		! g1 is offset res_ptr
+
+	mov	0,%o0			! clear cy_limb
+
+Loop:	ld	[%o1+%o2],%o4
+	ld	[%g1+%o2],%g2
+	umul	%o4,%o3,%o5
+	rd	%y,%g3
+	addcc	%o5,%o0,%o5
+	addx	%g3,0,%o0
+	subcc	%g2,%o5,%g2
+	addx	%o0,0,%o0
+	st	%g2,[%g1+%o2]
+
+	addcc	%o2,4,%o2
+	bne	Loop
+	 nop
+
+	retl
+	 nop
diff --git a/mpn/sparc32/v8/supersparc/udiv.S b/mpn/sparc32/v8/supersparc/udiv.S
new file mode 100644
index 000000000..ed688ee1d
--- /dev/null
+++ b/mpn/sparc32/v8/supersparc/udiv.S
@@ -0,0 +1,109 @@
+! SuperSPARC __udiv_qrnnd division support, used from longlong.h.
+! This is for SuperSPARC only, to compensate for its semi-functional
+! udiv instruction.
+
+! Copyright (C) 1993, 1994, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! rem_ptr	i0
+! n1		i1
+! n0		i2
+! d		i3
+
+#include "sysdep.h"
+#undef ret	/* Kludge for glibc */
+
+	.text
+	.align	8
+LC0:	.double	0r4294967296
+LC1:	.double	0r2147483648
+
+	.align	4
+	.global	C_SYMBOL_NAME(__udiv_qrnnd)
+C_SYMBOL_NAME(__udiv_qrnnd):
+	!#PROLOGUE# 0
+	save	%sp,-104,%sp
+	!#PROLOGUE# 1
+	st	%i1,[%fp-8]
+	ld	[%fp-8],%f10
+	sethi	%hi(LC0),%o7
+	fitod	%f10,%f4
+	ldd	[%o7+%lo(LC0)],%f8
+	cmp	%i1,0
+	bge	L248
+	mov	%i0,%i5
+	faddd	%f4,%f8,%f4
+L248:
+	st	%i2,[%fp-8]
+	ld	[%fp-8],%f10
+	fmuld	%f4,%f8,%f6
+	cmp	%i2,0
+	bge	L249
+	fitod	%f10,%f2
+	faddd	%f2,%f8,%f2
+L249:
+	st	%i3,[%fp-8]
+	faddd	%f6,%f2,%f2
+	ld	[%fp-8],%f10
+	cmp	%i3,0
+	bge	L250
+	fitod	%f10,%f4
+	faddd	%f4,%f8,%f4
+L250:
+	fdivd	%f2,%f4,%f2
+	sethi	%hi(LC1),%o7
+	ldd	[%o7+%lo(LC1)],%f4
+	fcmped	%f2,%f4
+	nop
+	fbge,a	L251
+	fsubd	%f2,%f4,%f2
+	fdtoi	%f2,%f2
+	st	%f2,[%fp-8]
+	b	L252
+	ld	[%fp-8],%i4
+L251:
+	fdtoi	%f2,%f2
+	st	%f2,[%fp-8]
+	ld	[%fp-8],%i4
+	sethi	%hi(-2147483648),%g2
+	xor	%i4,%g2,%i4
+L252:
+	umul	%i3,%i4,%g3
+	rd	%y,%i0
+	subcc	%i2,%g3,%o7
+	subxcc	%i1,%i0,%g0
+	be	L253
+	cmp	%o7,%i3
+
+	add	%i4,-1,%i0
+	add	%o7,%i3,%o7
+	st	%o7,[%i5]
+	ret
+	restore
+L253:
+	blu	L246
+	mov	%i4,%i0
+	add	%i4,1,%i0
+	sub	%o7,%i3,%o7
+L246:
+	st	%o7,[%i5]
+	ret
+	restore
diff --git a/mpn/sparc64/add_n.s b/mpn/sparc64/add_n.s
new file mode 100644
index 000000000..01d1f4956
--- /dev/null
+++ b/mpn/sparc64/add_n.s
@@ -0,0 +1,58 @@
+! SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+! sum in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! s1_ptr	%o1
+! s2_ptr	%o2
+! size		%o3
+
+.section	".text"
+	.align 4
+	.global __mpn_add_n
+	.type	 __mpn_add_n,#function
+	.proc	04
+__mpn_add_n:
+	sub %g0,%o3,%g3
+	sllx %o3,3,%g1
+	add %o1,%g1,%o1			! make s1_ptr point at end
+	add %o2,%g1,%o2			! make s2_ptr point at end
+	add %o0,%g1,%o0			! make res_ptr point at end
+	mov 0,%o4			! clear carry variable
+	sllx %g3,3,%o5			! compute initial address index
+
+.Loop:	ldx [%o2+%o5],%g1		! load s2 limb
+	add %g3,1,%g3			! increment loop count
+	ldx [%o1+%o5],%g2		! load s1 limb
+	addcc %g1,%o4,%g1		! add s2 limb and carry variable
+	movcc %xcc,0,%o4		! if carry-out, o4 was 1; clear it
+	addcc %g1,%g2,%g1		! add s1 limb to sum
+	stx %g1,[%o0+%o5]		! store result
+	add %o5,8,%o5			! increment address index
+	brnz,pt %g3,.Loop
+	movcs %xcc,1,%o4		! if s1 add gave carry, record it
+
+	retl
+	mov %o4,%o0
+.LLfe1:
+	.size	 __mpn_add_n,.LLfe1-__mpn_add_n
diff --git a/mpn/sparc64/addmul_1.s b/mpn/sparc64/addmul_1.s
new file mode 100644
index 000000000..8d8639080
--- /dev/null
+++ b/mpn/sparc64/addmul_1.s
@@ -0,0 +1,89 @@
+! SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and
+! add the product to a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+.section	".text"
+	.align 4
+	.global __mpn_addmul_1
+	.type	 __mpn_addmul_1,#function
+	.proc	016
+__mpn_addmul_1:
+	!#PROLOGUE#	0
+	save	%sp,-160,%sp
+	!#PROLOGUE#	1
+	sub	%g0,%i2,%o7
+	sllx	%o7,3,%g5
+	sub	%i1,%g5,%o3
+	sub	%i0,%g5,%o4
+	mov	0,%o0			! zero cy_limb
+
+	srl	%i3,0,%o1		! extract low 32 bits of s2_limb
+	srlx	%i3,32,%i3		! extract high 32 bits of s2_limb
+	mov	1,%o2
+	sllx	%o2,32,%o2		! o2 = 0x100000000
+
+	!   hi   !
+             !  mid-1 !
+             !  mid-2 !
+		 !   lo   !
+.Loop:
+	sllx	%o7,3,%g1
+	ldx	[%o3+%g1],%g5
+	srl	%g5,0,%i0		! zero hi bits
+	srlx	%g5,32,%g5
+	mulx	%o1,%i0,%i4		! lo product
+	mulx	%i3,%i0,%i1		! mid-1 product
+	mulx	%o1,%g5,%l2		! mid-2 product
+	mulx	%i3,%g5,%i5		! hi product
+	srlx	%i4,32,%i0		! extract high 32 bits of lo product...
+	add	%i1,%i0,%i1		! ...and add it to the mid-1 product
+	addcc	%i1,%l2,%i1		! add mid products
+	mov	0,%l0			! we need the carry from that add...
+	movcs	%xcc,%o2,%l0		! ...compute it and...
+	add	%i5,%l0,%i5		! ...add to bit 32 of the hi product
+	sllx	%i1,32,%i0		! align low bits of mid product
+	srl	%i4,0,%g5		! zero high 32 bits of lo product
+	add	%i0,%g5,%i0		! combine into low 64 bits of result
+	srlx	%i1,32,%i1		! extract high bits of mid product...
+	add	%i5,%i1,%i1		! ...and add them to the high result
+	addcc	%i0,%o0,%i0		! add cy_limb to low 64 bits of result
+	mov	0,%g5
+	movcs	%xcc,1,%g5
+	add	%o7,1,%o7
+	ldx	[%o4+%g1],%l1
+	addcc	%l1,%i0,%i0
+	movcs	%xcc,1,%g5
+	stx	%i0,[%o4+%g1]
+	brnz	%o7,.Loop
+	add	%i1,%g5,%o0		! compute new cy_limb
+
+	mov	%o0,%i0
+	ret
+	restore
+.LLfe1:
+	.size  __mpn_addmul_1,.LLfe1-__mpn_addmul_1
diff --git a/mpn/sparc64/gmp-mparam.h b/mpn/sparc64/gmp-mparam.h
new file mode 100644
index 000000000..a3c66974d
--- /dev/null
+++ b/mpn/sparc64/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 64
+#define BYTES_PER_MP_LIMB 8
+#define BITS_PER_LONGINT 64
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/mpn/sparc64/lshift.s b/mpn/sparc64/lshift.s
new file mode 100644
index 000000000..ad1f667fa
--- /dev/null
+++ b/mpn/sparc64/lshift.s
@@ -0,0 +1,96 @@
+! SPARC v9 __mpn_lshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! src_ptr	%o1
+! size		%o2
+! cnt		%o3
+
+.section	".text"
+	.align 4
+	.global __mpn_lshift
+	.type	 __mpn_lshift,#function
+	.proc	04
+__mpn_lshift:
+	sllx	%o2,3,%g1
+	add	%o1,%g1,%o1	! make %o1 point at end of src
+	ldx	[%o1-8],%g2	! load first limb
+	sub	%g0,%o3,%o5	! negate shift count
+	add	%o0,%g1,%o0	! make %o0 point at end of res
+	add	%o2,-1,%o2
+	and	%o2,4-1,%g4	! number of limbs in first loop
+	srlx	%g2,%o5,%g1	! compute function result
+	brz,pn	%g4,.L0		! if multiple of 4 limbs, skip first loop
+	stx	%g1,[%sp+80]
+
+	sub	%o2,%g4,%o2	! adjust count for main loop
+
+.Loop0:	ldx	[%o1-16],%g3
+	add	%o0,-8,%o0
+	add	%o1,-8,%o1
+	add	%g4,-1,%g4
+	sllx	%g2,%o3,%o4
+	srlx	%g3,%o5,%g1
+	mov	%g3,%g2
+	or	%o4,%g1,%o4
+	brnz,pt	%g4,.Loop0
+	 stx	%o4,[%o0+0]
+
+.L0:	brz,pn	%o2,.Lend
+	 nop
+
+.Loop:	ldx	[%o1-16],%g3
+	add	%o0,-32,%o0
+	add	%o2,-4,%o2
+	sllx	%g2,%o3,%o4
+	srlx	%g3,%o5,%g1
+
+	ldx	[%o1-24],%g2
+	sllx	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	stx	%o4,[%o0+24]
+	srlx	%g2,%o5,%g1
+
+	ldx	[%o1-32],%g3
+	sllx	%g2,%o3,%o4
+	or	%g4,%g1,%g4
+	stx	%g4,[%o0+16]
+	srlx	%g3,%o5,%g1
+
+	ldx	[%o1-40],%g2
+	sllx	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	stx	%o4,[%o0+8]
+	srlx	%g2,%o5,%g1
+
+	add	%o1,-32,%o1
+	or	%g4,%g1,%g4
+	brnz,pt	%o2,.Loop
+	 stx	%g4,[%o0+0]
+
+.Lend:	sllx	%g2,%o3,%g2
+	stx	%g2,[%o0-8]
+	retl
+	ldx	[%sp+80],%o0
+.LLfe1:
+	.size	 __mpn_lshift,.LLfe1-__mpn_lshift
diff --git a/mpn/sparc64/mul_1.s b/mpn/sparc64/mul_1.s
new file mode 100644
index 000000000..91d6eb01b
--- /dev/null
+++ b/mpn/sparc64/mul_1.s
@@ -0,0 +1,86 @@
+! SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and
+! store the product in a second limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+.section	".text"
+	.align 4
+	.global __mpn_mul_1
+	.type	 __mpn_mul_1,#function
+	.proc	016
+__mpn_mul_1:
+	!#PROLOGUE#	0
+	save	%sp,-160,%sp
+	!#PROLOGUE#	1
+	sub	%g0,%i2,%o7
+	sllx	%o7,3,%g5
+	sub	%i1,%g5,%o3
+	sub	%i0,%g5,%o4
+	mov	0,%o0			! zero cy_limb
+
+	srl	%i3,0,%o1		! extract low 32 bits of s2_limb
+	srlx	%i3,32,%i3		! extract high 32 bits of s2_limb
+	mov	1,%o2
+	sllx	%o2,32,%o2		! o2 = 0x100000000
+
+	!   hi   !
+             !  mid-1 !
+             !  mid-2 !
+		 !   lo   !
+.Loop:
+	sllx	%o7,3,%g1
+	ldx	[%o3+%g1],%g5
+	srl	%g5,0,%i0		! zero hi bits
+	srlx	%g5,32,%g5
+	mulx	%o1,%i0,%i4		! lo product
+	mulx	%i3,%i0,%i1		! mid-1 product
+	mulx	%o1,%g5,%l2		! mid-2 product
+	mulx	%i3,%g5,%i5		! hi product
+	srlx	%i4,32,%i0		! extract high 32 bits of lo product...
+	add	%i1,%i0,%i1		! ...and add it to the mid-1 product
+	addcc	%i1,%l2,%i1		! add mid products
+	mov	0,%l0			! we need the carry from that add...
+	movcs	%xcc,%o2,%l0		! ...compute it and...
+	add	%i5,%l0,%i5		! ...add to bit 32 of the hi product
+	sllx	%i1,32,%i0		! align low bits of mid product
+	srl	%i4,0,%g5		! zero high 32 bits of lo product
+	add	%i0,%g5,%i0		! combine into low 64 bits of result
+	srlx	%i1,32,%i1		! extract high bits of mid product...
+	add	%i5,%i1,%i1		! ...and add them to the high result
+	addcc	%i0,%o0,%i0		! add cy_limb to low 64 bits of result
+	mov	0,%g5
+	movcs	%xcc,1,%g5
+	add	%o7,1,%o7
+	stx	%i0,[%o4+%g1]
+	brnz	%o7,.Loop
+	add	%i1,%g5,%o0		! compute new cy_limb
+
+	mov	%o0,%i0
+	ret
+	restore
+.LLfe1:
+	.size  __mpn_mul_1,.LLfe1-__mpn_mul_1
diff --git a/mpn/sparc64/rshift.s b/mpn/sparc64/rshift.s
new file mode 100644
index 000000000..ff6a38016
--- /dev/null
+++ b/mpn/sparc64/rshift.s
@@ -0,0 +1,93 @@
+! SPARC v9 __mpn_rshift --
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! src_ptr	%o1
+! size		%o2
+! cnt		%o3
+
+.section	".text"
+	.align	4
+	.global	__mpn_rshift
+	.type	__mpn_rshift,#function
+	.proc	04
+__mpn_rshift:
+	ldx	[%o1],%g2	! load first limb
+	sub	%g0,%o3,%o5	! negate shift count
+	add	%o2,-1,%o2
+	and	%o2,4-1,%g4	! number of limbs in first loop
+	sllx	%g2,%o5,%g1	! compute function result
+	brz,pn	%g4,.L0		! if multiple of 4 limbs, skip first loop
+	stx	%g1,[%sp+80]
+
+	sub	%o2,%g4,%o2	! adjust count for main loop
+
+.Loop0:	ldx	[%o1+8],%g3
+	add	%o0,8,%o0
+	add	%o1,8,%o1
+	add	%g4,-1,%g4
+	srlx	%g2,%o3,%o4
+	sllx	%g3,%o5,%g1
+	mov	%g3,%g2
+	or	%o4,%g1,%o4
+	brnz,pt	%g4,.Loop0
+	 stx	%o4,[%o0-8]
+
+.L0:	brz,pn	%o2,.Lend
+	 nop
+
+.Loop:	ldx	[%o1+8],%g3
+	add	%o0,32,%o0
+	add	%o2,-4,%o2
+	srlx	%g2,%o3,%o4
+	sllx	%g3,%o5,%g1
+
+	ldx	[%o1+16],%g2
+	srlx	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	stx	%o4,[%o0-32]
+	sllx	%g2,%o5,%g1
+
+	ldx	[%o1+24],%g3
+	srlx	%g2,%o3,%o4
+	or	%g4,%g1,%g4
+	stx	%g4,[%o0-24]
+	sllx	%g3,%o5,%g1
+
+	ldx	[%o1+32],%g2
+	srlx	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	stx	%o4,[%o0-16]
+	sllx	%g2,%o5,%g1
+
+	add	%o1,32,%o1
+	or	%g4,%g1,%g4
+	brnz	%o2,.Loop
+	 stx	%g4,[%o0-8]
+
+.Lend:	srlx	%g2,%o3,%g2
+	stx	%g2,[%o0-0]
+	retl
+	ldx	[%sp+80],%o0
+.LLfe1:
+	.size	__mpn_rshift,.LLfe1-__mpn_rshift
diff --git a/mpn/sparc64/sub_n.s b/mpn/sparc64/sub_n.s
new file mode 100644
index 000000000..d4842b8cd
--- /dev/null
+++ b/mpn/sparc64/sub_n.s
@@ -0,0 +1,58 @@
+! SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! s1_ptr	%o1
+! s2_ptr	%o2
+! size		%o3
+
+.section	".text"
+	.align 4
+	.global __mpn_sub_n
+	.type	 __mpn_sub_n,#function
+	.proc	04
+__mpn_sub_n:
+	sub %g0,%o3,%g3
+	sllx %o3,3,%g1
+	add %o1,%g1,%o1			! make s1_ptr point at end
+	add %o2,%g1,%o2			! make s2_ptr point at end
+	add %o0,%g1,%o0			! make res_ptr point at end
+	mov 0,%o4			! clear carry variable
+	sllx %g3,3,%o5			! compute initial address index
+
+.Loop:	ldx [%o2+%o5],%g1		! load s2 limb
+	add %g3,1,%g3			! increment loop count
+	ldx [%o1+%o5],%g2		! load s1 limb
+	addcc %g1,%o4,%g1		! add s2 limb and carry variable
+	movcc %xcc,0,%o4		! if carry-out, o4 was 1; clear it
+	subcc %g1,%g2,%g1		! subtract s1 limb from sum
+	stx %g1,[%o0+%o5]		! store result
+	add %o5,8,%o5			! increment address index
+	brnz,pt %g3,.Loop
+	movcs %xcc,1,%o4		! if s1 subtract gave carry, record it
+
+	retl
+	mov %o4,%o0
+.LLfe1:
+	.size	 __mpn_sub_n,.LLfe1-__mpn_sub_n
diff --git a/mpn/sparc64/submul_1.s b/mpn/sparc64/submul_1.s
new file mode 100644
index 000000000..e79624347
--- /dev/null
+++ b/mpn/sparc64/submul_1.s
@@ -0,0 +1,89 @@
+! SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and
+! subtract the product from a second limb vector.
+
+! Copyright (C) 1996 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	o0
+! s1_ptr	o1
+! size		o2
+! s2_limb	o3
+
+.section	".text"
+	.align 4
+	.global __mpn_submul_1
+	.type	 __mpn_submul_1,#function
+	.proc	016
+__mpn_submul_1:
+	!#PROLOGUE#	0
+	save	%sp,-160,%sp
+	!#PROLOGUE#	1
+	sub	%g0,%i2,%o7
+	sllx	%o7,3,%g5
+	sub	%i1,%g5,%o3
+	sub	%i0,%g5,%o4
+	mov	0,%o0			! zero cy_limb
+
+	srl	%i3,0,%o1		! extract low 32 bits of s2_limb
+	srlx	%i3,32,%i3		! extract high 32 bits of s2_limb
+	mov	1,%o2
+	sllx	%o2,32,%o2		! o2 = 0x100000000
+
+	!   hi   !
+             !  mid-1 !
+             !  mid-2 !
+		 !   lo   !
+.Loop:
+	sllx	%o7,3,%g1
+	ldx	[%o3+%g1],%g5
+	srl	%g5,0,%i0		! zero hi bits
+	srlx	%g5,32,%g5
+	mulx	%o1,%i0,%i4		! lo product
+	mulx	%i3,%i0,%i1		! mid-1 product
+	mulx	%o1,%g5,%l2		! mid-2 product
+	mulx	%i3,%g5,%i5		! hi product
+	srlx	%i4,32,%i0		! extract high 32 bits of lo product...
+	add	%i1,%i0,%i1		! ...and add it to the mid-1 product
+	addcc	%i1,%l2,%i1		! add mid products
+	mov	0,%l0			! we need the carry from that add...
+	movcs	%xcc,%o2,%l0		! ...compute it and...
+	add	%i5,%l0,%i5		! ...add to bit 32 of the hi product
+	sllx	%i1,32,%i0		! align low bits of mid product
+	srl	%i4,0,%g5		! zero high 32 bits of lo product
+	add	%i0,%g5,%i0		! combine into low 64 bits of result
+	srlx	%i1,32,%i1		! extract high bits of mid product...
+	add	%i5,%i1,%i1		! ...and add them to the high result
+	addcc	%i0,%o0,%i0		! add cy_limb to low 64 bits of result
+	mov	0,%g5
+	movcs	%xcc,1,%g5
+	add	%o7,1,%o7
+	ldx	[%o4+%g1],%l1
+	subcc	%l1,%i0,%i0
+	movcs	%xcc,1,%g5
+	stx	%i0,[%o4+%g1]
+	brnz	%o7,.Loop
+	add	%i1,%g5,%o0		! compute new cy_limb
+
+	mov	%o0,%i0
+	ret
+	restore
+.LLfe1:
+	.size  __mpn_submul_1,.LLfe1-__mpn_submul_1
diff --git a/mpn/sysv.h b/mpn/sysv.h
new file mode 100644
index 000000000..87c250902
--- /dev/null
+++ b/mpn/sysv.h
@@ -0,0 +1 @@
+#define C_SYMBOL_NAME(name) name
diff --git a/mpn/tests/add_n.c b/mpn/tests/add_n.c
new file mode 100644
index 000000000..c27d34710
--- /dev/null
+++ b/mpn/tests/add_n.c
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 328
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+
+mp_limb_t
+#if __STDC__
+refmpn_add_n (mp_ptr res_ptr,
+	       mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     register mp_srcptr s2_ptr;
+     mp_size_t size;
+#endif
+{
+  register mp_limb_t x, y, cy;
+  register mp_size_t j;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  s2_ptr -= j;
+  res_ptr -= j;
+
+  cy = 0;
+  do
+    {
+      y = s2_ptr[j];
+      x = s1_ptr[j];
+      y += cy;			/* add previous carry to one addend */
+      cy = (y < cy);		/* get out carry from that addition */
+      y = x + y;		/* add other addend */
+      cy = (y < x) + cy;	/* get out carry from that add, combine */
+      res_ptr[j] = y;
+    }
+  while (++j != 0);
+
+  return cy;
+}
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t s2[SIZE];
+  mp_limb_t dx[SIZE+1];
+  mp_limb_t dy[SIZE+1];
+  int cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  mp_size_t size;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+
+      mpn_random2 (s1, size);
+      mpn_random2 (s2, size);
+
+      dx[size] = 0x12345678;
+      dy[size] = 0x12345678;
+
+#ifdef PRINT
+      mpn_print (s1, size);
+      mpn_print (s2, size);
+#endif
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_add_n (dx, s1, s2, size);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("refmpn_add_n:   %ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = mpn_add_n (dx, s1, s2, size);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("mpn_add_n:   %ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+#ifndef NOCHECK
+      /* Put garbage in the destination.  */
+      for (i = 0; i < size; i++)
+	{
+	  dx[i] = 0x7654321;
+	  dy[i] = 0x1234567;
+	}
+
+      cyx = refmpn_add_n (dx, s1, s2, size);
+      cyy = mpn_add_n (dy, s1, s2, size);
+      if (cyx != cyy || mpn_cmp (dx, dy, size) != 0
+	  || dx[size] != 0x12345678 || dy[size] != 0x12345678)
+	{
+#ifndef PRINT
+	  printf ("%d ", cyx); mpn_print (dx, size);
+	  printf ("%d ", cyy); mpn_print (dy, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/addmul_1.c b/mpn/tests/addmul_1.c
new file mode 100644
index 000000000..23952a1cc
--- /dev/null
+++ b/mpn/tests/addmul_1.c
@@ -0,0 +1,223 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     mp_size_t s1_size;
+     register mp_limb_t s2_limb;
+{
+  register mp_limb_t cy_limb;
+  register mp_size_t j;
+  register mp_limb_t prod_high, prod_low;
+  register mp_limb_t x;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -s1_size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  res_ptr -= j;
+  s1_ptr -= j;
+
+  cy_limb = 0;
+  do
+    {
+      umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+      prod_low += cy_limb;
+      cy_limb = (prod_low < cy_limb) + prod_high;
+
+      x = res_ptr[j];
+      prod_low = x + prod_low;
+      cy_limb += (prod_low < x);
+      res_ptr[j] = prod_low;
+    }
+  while (++j != 0);
+
+  return cy_limb;
+}
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t dx[SIZE+2];
+  mp_limb_t dy[SIZE+2];
+  mp_limb_t cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  mp_limb_t xlimb;
+  mp_size_t size;
+  double cyc;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+
+      mpn_random2 (s1, size);
+      mpn_random2 (dy+1, size);
+
+      if (random () % 0x100 == 0)
+	xlimb = 0;
+      else
+	mpn_random2 (&xlimb, 1);
+
+      dy[size+1] = 0x12345678;
+      dy[0] = 0x87654321;
+
+#if defined (PRINT) || defined (XPRINT)
+      printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
+#endif
+#ifdef PRINT
+      mpn_print (dy+1, size);
+      mpn_print (s1, size);
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
+      t = cputime() - t0;
+#if TIMES != 1
+      cyc = ((double) t * CLOCK) / (OPS * 1000.0);
+      printf ("refmpn_addmul_1: %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
+	      t,
+	      cyc,
+	      CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyy = mpn_addmul_1 (dx+1, s1, size, xlimb);
+      t = cputime() - t0;
+#if TIMES != 1
+      cyc = ((double) t * CLOCK) / (OPS * 1000.0);
+      printf ("mpn_addmul_1:    %5ldms (%.2f cycles/limb) [%.2f Gb/s]\n",
+	      t,
+	      cyc,
+	      CLOCK/cyc*BITS_PER_MP_LIMB*BITS_PER_MP_LIMB);
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      cyx = refmpn_addmul_1 (dx+1, s1, size, xlimb);
+      cyy = mpn_addmul_1 (dy+1, s1, size, xlimb);
+
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+      mpn_print (dx+1, size);
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+      mpn_print (dy+1, size);
+#endif
+
+#ifndef NOCHECK
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+	  || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+	{
+#ifndef PRINT
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+	  mpn_print (dx+1, size);
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+	  mpn_print (dy+1, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/divmod_1.c b/mpn/tests/divmod_1.c
new file mode 100644
index 000000000..f6b541ee4
--- /dev/null
+++ b/mpn/tests/divmod_1.c
@@ -0,0 +1,120 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 1000
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+main ()
+{
+  mp_limb_t nptr[SIZE];
+  mp_limb_t qptr[SIZE];
+  mp_limb_t pptr[SIZE];
+  mp_limb_t dlimb, rlimb, plimb;
+  mp_size_t nsize, qsize, psize;
+  int test;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      nsize = random () % SIZE + 1;
+#else
+      nsize = SIZE;
+#endif
+
+      mpn_random2 (nptr, nsize);
+
+      mpn_random2 (&dlimb, 1);
+      if (dlimb == 0)
+	abort ();
+
+      rlimb = mpn_divmod_1 (qptr, nptr, nsize, dlimb);
+      qsize = nsize - (qptr[nsize - 1] == 0);
+      if (qsize == 0)
+	{
+	  plimb = rlimb;
+	  psize = qsize;
+	}
+      else
+	{
+	  plimb = mpn_mul_1 (pptr, qptr, qsize, dlimb);
+	  psize = qsize;
+	  plimb += mpn_add_1 (pptr, pptr, psize, rlimb);
+	}
+      if (plimb != 0)
+	pptr[psize++] = plimb;
+
+
+      if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0)
+	abort ();
+    }
+}
diff --git a/mpn/tests/divrem.c b/mpn/tests/divrem.c
new file mode 100644
index 000000000..6eafc99e8
--- /dev/null
+++ b/mpn/tests/divrem.c
@@ -0,0 +1,129 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 100
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+main ()
+{
+  mp_limb_t nptr[2 * SIZE];
+  mp_limb_t dptr[SIZE];
+  mp_limb_t qptr[2 * SIZE];
+  mp_limb_t pptr[2 * SIZE];
+  mp_limb_t rptr[2 * SIZE];
+  mp_size_t nsize, dsize, qsize, rsize, psize;
+  int test;
+  mp_limb_t qlimb;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      nsize = random () % (2 * SIZE) + 1;
+      dsize = random () % nsize + 1;
+#else
+      nsize = 2 * SIZE;
+      dsize = SIZE;
+#endif
+
+      mpn_random2 (nptr, nsize);
+      mpn_random2 (dptr, dsize);
+      dptr[dsize - 1] |= (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
+
+      MPN_COPY (rptr, nptr, nsize);
+      qlimb = mpn_divrem (qptr, (mp_size_t) 0, rptr, nsize, dptr, dsize);
+      rsize = dsize;
+      qsize = nsize - dsize;
+      qptr[qsize] = qlimb;
+      qsize += qlimb;
+      if (qsize == 0 || qsize > 2 * SIZE)
+	{
+	  continue;		/* bogus */
+	}
+      else
+	{
+	  mp_limb_t cy;
+	  if (qsize > dsize)
+	    mpn_mul (pptr, qptr, qsize, dptr, dsize);
+	  else
+	    mpn_mul (pptr, dptr, dsize, qptr, qsize);
+	  psize = qsize + dsize;
+	  psize -= pptr[psize - 1] == 0;
+	  cy = mpn_add (pptr, pptr, psize, rptr, rsize);
+	  pptr[psize] = cy;
+	  psize += cy;
+	}
+
+      if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0)
+	abort ();
+    }
+}
diff --git a/mpn/tests/lshift.c b/mpn/tests/lshift.c
new file mode 100644
index 000000000..f50c5dceb
--- /dev/null
+++ b/mpn/tests/lshift.c
@@ -0,0 +1,226 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_lshift (wp, up, usize, cnt)
+     register mp_ptr wp;
+     register mp_srcptr up;
+     mp_size_t usize;
+     register unsigned int cnt;
+{
+  register mp_limb_t high_limb, low_limb;
+  register unsigned sh_1, sh_2;
+  register mp_size_t i;
+  mp_limb_t retval;
+
+#ifdef DEBUG
+  if (usize == 0 || cnt == 0)
+    abort ();
+#endif
+
+  sh_1 = cnt;
+#if 0
+  if (sh_1 == 0)
+    {
+      if (wp != up)
+	{
+	  /* Copy from high end to low end, to allow specified input/output
+	     overlapping.  */
+	  for (i = usize - 1; i >= 0; i--)
+	    wp[i] = up[i];
+	}
+      return 0;
+    }
+#endif
+
+  wp += 1;
+  sh_2 = BITS_PER_MP_LIMB - sh_1;
+  i = usize - 1;
+  low_limb = up[i];
+  retval = low_limb >> sh_2;
+  high_limb = low_limb;
+  while (--i >= 0)
+    {
+      low_limb = up[i];
+      wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+      high_limb = low_limb;
+    }
+  wp[i] = high_limb << sh_1;
+
+  return retval;
+}
+
+#ifndef CNT
+#define CNT 4
+#endif
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t dx[SIZE+2];
+  mp_limb_t dy[SIZE+2];
+  mp_limb_t cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  int cnt = CNT;
+  mp_size_t size;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+      mpn_random2 (s1, size);
+
+      dx[size+1] = 0x12345678;
+      dy[size+1] = 0x12345678;
+      dx[0] = 0x87654321;
+      dy[0] = 0x87654321;
+
+#ifdef PRINT
+      mpn_print (s1, size);
+#endif
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_lshift (dx+1, s1, size, cnt);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("refmpn_lshift: %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size);
+#endif
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyy = mpn_lshift (dx+1, s1, size, cnt);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("mpn_lshift:  %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size);
+#endif
+
+#ifndef NOCHECK
+      /* Put garbage in the destination.  */
+      for (i = 1; i <= size; i++)
+	{
+	  dx[i] = 0x7654321;
+	  dy[i] = 0x1234567;
+	}
+
+      cyx = refmpn_lshift (dx+1, s1, size, cnt);
+      cyy = mpn_lshift (dy+1, s1, size, cnt);
+
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+	  || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+	{
+#ifndef PRINT
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+	  mpn_print (dx+1, size);
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+	  mpn_print (dy+1, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/mul_1.c b/mpn/tests/mul_1.c
new file mode 100644
index 000000000..2b522fa79
--- /dev/null
+++ b/mpn/tests/mul_1.c
@@ -0,0 +1,212 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     mp_size_t s1_size;
+     register mp_limb_t s2_limb;
+{
+  register mp_limb_t cy_limb;
+  register mp_size_t j;
+  register mp_limb_t prod_high, prod_low;
+
+  /* The loop counter and index J goes from -S1_SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -s1_size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  res_ptr -= j;
+
+  cy_limb = 0;
+  do
+    {
+      umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+      prod_low += cy_limb;
+      cy_limb = (prod_low < cy_limb) + prod_high;
+
+      res_ptr[j] = prod_low;
+    }
+  while (++j != 0);
+
+  return cy_limb;
+}
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t dx[SIZE+2];
+  mp_limb_t dy[SIZE+2];
+  mp_limb_t cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  mp_limb_t xlimb;
+  mp_size_t size;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+
+      mpn_random2 (s1, size);
+      mpn_random2 (dy+1, size);
+
+      if (random () % 0x100 == 0)
+	xlimb = 0;
+      else
+	mpn_random2 (&xlimb, 1);
+
+      dy[size+1] = 0x12345678;
+      dy[0] = 0x87654321;
+
+#if defined (PRINT) || defined (XPRINT)
+      printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
+#endif
+#ifdef PRINT
+      mpn_print (s1, size);
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_mul_1 (dx+1, s1, size, xlimb);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("refmpn_mul_1: %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyy = mpn_mul_1 (dx+1, s1, size, xlimb);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("mpn_mul_1:    %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+      cyx = refmpn_mul_1 (dx+1, s1, size, xlimb);
+      cyy = mpn_mul_1 (dy+1, s1, size, xlimb);
+
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+      mpn_print (dx+1, size);
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+      mpn_print (dy+1, size);
+#endif
+
+#ifndef NOCHECK
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+	  || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+	{
+#ifndef PRINT
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+	  mpn_print (dx+1, size);
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+	  mpn_print (dy+1, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/rshift.c b/mpn/tests/rshift.c
new file mode 100644
index 000000000..2482bf340
--- /dev/null
+++ b/mpn/tests/rshift.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_rshift (wp, up, usize, cnt)
+     register mp_ptr wp;
+     register mp_srcptr up;
+     mp_size_t usize;
+     register unsigned int cnt;
+{
+  register mp_limb_t high_limb, low_limb;
+  register unsigned sh_1, sh_2;
+  register mp_size_t i;
+  mp_limb_t retval;
+
+#ifdef DEBUG
+  if (usize == 0 || cnt == 0)
+    abort ();
+#endif
+
+  sh_1 = cnt;
+#if 0
+  if (sh_1 == 0)
+    {
+      if (wp != up)
+	{
+	  /* Copy from low end to high end, to allow specified input/output
+	     overlapping.  */
+	  for (i = 0; i < usize; i++)
+	    wp[i] = up[i];
+	}
+      return 0;
+    }
+#endif
+
+  wp -= 1;
+  sh_2 = BITS_PER_MP_LIMB - sh_1;
+  high_limb = up[0];
+  retval = high_limb << sh_2;
+  low_limb = high_limb;
+
+  for (i = 1; i < usize; i++)
+    {
+      high_limb = up[i];
+      wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+      low_limb = high_limb;
+    }
+  low_limb >>= sh_1;
+  wp[i] = low_limb;
+
+  return retval;
+}
+
+#ifndef CNT
+#define CNT 4
+#endif
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t dx[SIZE+2];
+  mp_limb_t dy[SIZE+2];
+  mp_limb_t cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  int cnt = CNT;
+  mp_size_t size;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+      mpn_random2 (s1, size);
+
+      dx[size+1] = 0x12345678;
+      dy[size+1] = 0x12345678;
+      dx[0] = 0x87654321;
+      dy[0] = 0x87654321;
+
+#ifdef PRINT
+      mpn_print (s1, size);
+#endif
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_rshift (dx+1, s1, size, cnt);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("refmpn_rshift: %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx); mpn_print (dx+1, size);
+#endif
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyy = mpn_rshift (dx+1, s1, size, cnt);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("mpn_rshift:  %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy); mpn_print (dx+1, size);
+#endif
+
+#ifndef NOCHECK
+      /* Put garbage in the destination.  */
+      for (i = 1; i <= size; i++)
+	{
+	  dx[i] = 0x7654321;
+	  dy[i] = 0x1234567;
+	}
+
+      cyx = refmpn_rshift (dx+1, s1, size, cnt);
+      cyy = mpn_rshift (dy+1, s1, size, cnt);
+
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+	  || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+	{
+#ifndef PRINT
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+	  mpn_print (dx+1, size);
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+	  mpn_print (dy+1, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/sub_n.c b/mpn/tests/sub_n.c
new file mode 100644
index 000000000..2b9031bbf
--- /dev/null
+++ b/mpn/tests/sub_n.c
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 10000000
+#endif
+#ifndef SIZE
+#define SIZE 328
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+
+mp_limb_t
+#if __STDC__
+refmpn_sub_n (mp_ptr res_ptr,
+	       mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     register mp_srcptr s2_ptr;
+     mp_size_t size;
+#endif
+{
+  register mp_limb_t x, y, cy;
+  register mp_size_t j;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  s2_ptr -= j;
+  res_ptr -= j;
+
+  cy = 0;
+  do
+    {
+      y = s2_ptr[j];
+      x = s1_ptr[j];
+      y += cy;			/* add previous carry to subtrahend */
+      cy = (y < cy);		/* get out carry from that addition */
+      y = x - y;		/* main subtract */
+      cy = (y > x) + cy;	/* get out carry from the subtract, combine */
+      res_ptr[j] = y;
+    }
+  while (++j != 0);
+
+  return cy;
+}
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t s2[SIZE];
+  mp_limb_t dx[SIZE+1];
+  mp_limb_t dy[SIZE+1];
+  int cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  mp_size_t size;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+
+      mpn_random2 (s1, size);
+      mpn_random2 (s2, size);
+
+      dx[size] = 0x12345678;
+      dy[size] = 0x12345678;
+
+#ifdef PRINT
+      mpn_print (s1, size);
+      mpn_print (s2, size);
+#endif
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_sub_n (dx, s1, s2, size);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("refmpn_sub_n:   %ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = mpn_sub_n (dx, s1, s2, size);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("mpn_sub_n:   %ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+#ifdef PRINT
+      printf ("%d ", cyx); mpn_print (dx, size);
+#endif
+
+#ifndef NOCHECK
+      /* Put garbage in the destination.  */
+      for (i = 0; i < size; i++)
+	{
+	  dx[i] = 0x7654321;
+	  dy[i] = 0x1234567;
+	}
+
+      cyx = refmpn_sub_n (dx, s1, s2, size);
+      cyy = mpn_sub_n (dy, s1, s2, size);
+      if (cyx != cyy || mpn_cmp (dx, dy, size) != 0
+	  || dx[size] != 0x12345678 || dy[size] != 0x12345678)
+	{
+#ifndef PRINT
+	  printf ("%d ", cyx); mpn_print (dx, size);
+	  printf ("%d ", cyy); mpn_print (dy, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/submul_1.c b/mpn/tests/submul_1.c
new file mode 100644
index 000000000..0e464e613
--- /dev/null
+++ b/mpn/tests/submul_1.c
@@ -0,0 +1,218 @@
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef USG
+#include <sys/time.h>
+#include <sys/resource.h>
+
+unsigned long
+cputime ()
+{
+    struct rusage rus;
+
+    getrusage (0, &rus);
+    return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
+}
+#else
+#include <time.h>
+
+#ifndef CLOCKS_PER_SEC
+#define CLOCKS_PER_SEC 1000000
+#endif
+
+#if CLOCKS_PER_SEC >= 10000
+#define CLOCK_TO_MILLISEC(cl) ((cl) / (CLOCKS_PER_SEC / 1000))
+#else
+#define CLOCK_TO_MILLISEC(cl) ((cl) * 1000 / CLOCKS_PER_SEC)
+#endif
+
+unsigned long
+cputime ()
+{
+  return CLOCK_TO_MILLISEC (clock ());
+}
+#endif
+
+#define M * 1000000
+
+#ifndef CLOCK
+#if defined (__m88k__)
+#define CLOCK 20 M
+#elif defined (__i386__)
+#define CLOCK (16.666667 M)
+#elif defined (__m68k__)
+#define CLOCK (20 M)
+#elif defined (_IBMR2)
+#define CLOCK (25 M)
+#elif defined (__sparc__)
+#define CLOCK (20 M)
+#elif defined (__sun__)
+#define CLOCK (20 M)
+#elif defined (__mips)
+#define CLOCK (40 M)
+#elif defined (__hppa__)
+#define CLOCK (50 M)
+#elif defined (__alpha)
+#define CLOCK (133 M)
+#else
+#error "Don't know CLOCK of your machine"
+#endif
+#endif
+
+#ifndef OPS
+#define OPS 20000000
+#endif
+#ifndef SIZE
+#define SIZE 496
+#endif
+#ifndef TIMES
+#define TIMES OPS/SIZE
+#else
+#undef OPS
+#define OPS (SIZE*TIMES)
+#endif
+
+mp_limb_t
+refmpn_submul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     mp_size_t s1_size;
+     register mp_limb_t s2_limb;
+{
+  register mp_limb_t cy_limb;
+  register mp_size_t j;
+  register mp_limb_t prod_high, prod_low;
+  register mp_limb_t x;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -s1_size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  res_ptr -= j;
+  s1_ptr -= j;
+
+  cy_limb = 0;
+  do
+    {
+      umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
+
+      prod_low += cy_limb;
+      cy_limb = (prod_low < cy_limb) + prod_high;
+
+      x = res_ptr[j];
+      prod_low = x - prod_low;
+      cy_limb += (prod_low > x);
+      res_ptr[j] = prod_low;
+    }
+  while (++j != 0);
+
+  return cy_limb;
+}
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_limb_t s1[SIZE];
+  mp_limb_t dx[SIZE+2];
+  mp_limb_t dy[SIZE+2];
+  mp_limb_t cyx, cyy;
+  int i;
+  long t0, t;
+  int test;
+  mp_limb_t xlimb;
+  mp_size_t size;
+
+  for (test = 0; ; test++)
+    {
+#ifdef RANDOM
+      size = (random () % SIZE + 1);
+#else
+      size = SIZE;
+#endif
+
+      mpn_random2 (s1, size);
+      mpn_random2 (dy+1, size);
+
+      if (random () % 0x100 == 0)
+	xlimb = 0;
+      else
+	mpn_random2 (&xlimb, 1);
+
+      dy[size+1] = 0x12345678;
+      dy[0] = 0x87654321;
+
+#if defined (PRINT) || defined (XPRINT)
+      printf ("xlimb=%*lX\n", (int) (2 * sizeof(mp_limb_t)), xlimb);
+#endif
+#ifdef PRINT
+      mpn_print (dy+1, size);
+      mpn_print (s1, size);
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyx = refmpn_submul_1 (dx+1, s1, size, xlimb);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("refmpn_submul_1: %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      t0 = cputime();
+      for (i = 0; i < TIMES; i++)
+	cyy = mpn_submul_1 (dx+1, s1, size, xlimb);
+      t = cputime() - t0;
+#if TIMES != 1
+      printf ("mpn_submul_1:    %5ldms (%.2f cycles/limb)\n",
+	      t,
+	      ((double) t * CLOCK) / (OPS * 1000.0));
+#endif
+
+      MPN_COPY (dx, dy, size+2);
+      cyx = refmpn_submul_1 (dx+1, s1, size, xlimb);
+      cyy = mpn_submul_1 (dy+1, s1, size, xlimb);
+
+#ifdef PRINT
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+      mpn_print (dx+1, size);
+      printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+      mpn_print (dy+1, size);
+#endif
+
+#ifndef NOCHECK
+      if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
+	  || dx[size+1] != 0x12345678 || dx[0] != 0x87654321)
+	{
+#ifndef PRINT
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyx);
+	  mpn_print (dx+1, size);
+	  printf ("%*lX ", (int) (2 * sizeof(mp_limb_t)), cyy);
+	  mpn_print (dy+1, size);
+#endif
+	  abort();
+	}
+#endif
+    }
+}
+
+mpn_print (mp_ptr p, mp_size_t size)
+{
+  mp_size_t i;
+
+  for (i = size - 1; i >= 0; i--)
+    {
+      printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
+#ifdef SPACE
+      if (i != 0)
+	printf (" ");
+#endif
+    }
+  puts ("");
+}
diff --git a/mpn/tests/tst-addsub.c b/mpn/tests/tst-addsub.c
new file mode 100644
index 000000000..e02b9d5cc
--- /dev/null
+++ b/mpn/tests/tst-addsub.c
@@ -0,0 +1,164 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#define ADD 1
+#define SUB 2
+
+#ifndef METHOD
+#define METHOD ADD
+#endif
+
+#if METHOD == ADD
+#define REFCALL refmpn_add_n
+#define TESTCALL mpn_add_n
+#endif
+
+#if METHOD == SUB
+#define REFCALL refmpn_sub_n
+#define TESTCALL mpn_sub_n
+#endif
+
+mp_limb_t refmpn_add_n ();
+mp_limb_t refmpn_sub_n ();
+
+#define SIZE 100
+
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  mp_size_t alloc_size, max_size, size, i, cumul_size;
+  mp_ptr s1, s2, dx, dy;
+  int s1_align, s2_align, d_align;
+  long pass, n_passes;
+  mp_limb_t cx, cy;
+
+  max_size = SIZE;
+  n_passes = 1000000;
+
+  argc--; argv++;
+  if (argc)
+    {
+      max_size = atol (*argv);
+      argc--; argv++;
+    }
+
+  alloc_size = max_size + 32;
+  s1 = malloc (alloc_size * BYTES_PER_MP_LIMB);
+  s2 = malloc (alloc_size * BYTES_PER_MP_LIMB);
+  dx = malloc (alloc_size * BYTES_PER_MP_LIMB);
+  dy = malloc (alloc_size * BYTES_PER_MP_LIMB);
+
+  cumul_size = 0;
+  for (pass = 0; pass < n_passes; pass++)
+    {
+      cumul_size += size;
+      if (cumul_size >= 1000000)
+	{
+	  cumul_size -= 1000000;
+	  printf ("%d ", pass); fflush (stdout);
+	}
+      s1_align = random () % 32;
+      s2_align = random () % 32;
+      d_align = random () % 32;
+
+      size = random () % max_size + 1;
+
+      mpn_random2 (s1 + s1_align, size);
+      mpn_random2 (s2 + s2_align, size);
+
+      for (i = 0; i < alloc_size; i++)
+	dx[i] = dy[i] = i + 0x9876500;
+
+      cx = TESTCALL (dx + d_align, s1 + s1_align, s2 + s2_align, size);
+      cy = REFCALL (dy + d_align, s1 + s1_align, s2 + s2_align, size);
+
+      if (cx != cy || mpn_cmp (dx, dy, alloc_size) != 0)
+	abort ();
+    }
+
+  printf ("%d passes OK\n", n_passes);
+  exit (0);
+}
+
+mp_limb_t
+#if __STDC__
+refmpn_add_n (mp_ptr res_ptr,
+	      mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     register mp_srcptr s2_ptr;
+     mp_size_t size;
+#endif
+{
+  register mp_limb_t x, y, cy;
+  register mp_size_t j;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  s2_ptr -= j;
+  res_ptr -= j;
+
+  cy = 0;
+  do
+    {
+      y = s2_ptr[j];
+      x = s1_ptr[j];
+      y += cy;			/* add previous carry to one addend */
+      cy = (y < cy);		/* get out carry from that addition */
+      y = x + y;		/* add other addend */
+      cy = (y < x) + cy;	/* get out carry from that add, combine */
+      res_ptr[j] = y;
+    }
+  while (++j != 0);
+
+  return cy;
+}
+
+mp_limb_t
+#if __STDC__
+refmpn_sub_n (mp_ptr res_ptr,
+	       mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+#else
+refmpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
+     register mp_ptr res_ptr;
+     register mp_srcptr s1_ptr;
+     register mp_srcptr s2_ptr;
+     mp_size_t size;
+#endif
+{
+  register mp_limb_t x, y, cy;
+  register mp_size_t j;
+
+  /* The loop counter and index J goes from -SIZE to -1.  This way
+     the loop becomes faster.  */
+  j = -size;
+
+  /* Offset the base pointers to compensate for the negative indices.  */
+  s1_ptr -= j;
+  s2_ptr -= j;
+  res_ptr -= j;
+
+  cy = 0;
+  do
+    {
+      y = s2_ptr[j];
+      x = s1_ptr[j];
+      y += cy;			/* add previous carry to subtrahend */
+      cy = (y < cy);		/* get out carry from that addition */
+      y = x - y;		/* main subtract */
+      cy = (y > x) + cy;	/* get out carry from the subtract, combine */
+      res_ptr[j] = y;
+    }
+  while (++j != 0);
+
+  return cy;
+}
diff --git a/mpn/vax/add_n.s b/mpn/vax/add_n.s
new file mode 100644
index 000000000..d4764e23a
--- /dev/null
+++ b/mpn/vax/add_n.s
@@ -0,0 +1,48 @@
+# VAX __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+# sum in a third limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# s2_ptr	(sp + 12)
+# size		(sp + 16)
+
+.text
+	.align 1
+.globl ___mpn_add_n
+___mpn_add_n:
+	.word	0x0
+	movl	16(ap),r0
+	movl	12(ap),r1
+	movl	8(ap),r2
+	movl	4(ap),r3
+	subl2	r4,r4
+
+Loop:
+	movl	(r2)+,r4
+	adwc	(r1)+,r4
+	movl	r4,(r3)+
+	jsobgtr	r0,Loop
+
+	adwc	r0,r0
+	ret
diff --git a/mpn/vax/addmul_1.s b/mpn/vax/addmul_1.s
new file mode 100644
index 000000000..746d95ba7
--- /dev/null
+++ b/mpn/vax/addmul_1.s
@@ -0,0 +1,126 @@
+# VAX __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+# the result to a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# size		(sp + 12)
+# s2_limb	(sp + 16)
+
+.text
+	.align 1
+.globl ___mpn_addmul_1
+___mpn_addmul_1:
+	.word	0xfc0
+	movl	12(ap),r4
+	movl	8(ap),r8
+	movl	4(ap),r9
+	movl	16(ap),r6
+	jlss	s2_big
+
+	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L1
+	clrl	r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1:	movl	(r8)+,r1
+	jlss	L1n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	$0,r3
+	addl2	r2,(r9)+
+	adwc	$0,r3
+L1:	movl	(r8)+,r1
+	jlss	L1n1
+L1p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	$0,r11
+	addl2	r10,(r9)+
+	adwc	$0,r11
+
+	jsobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+L1n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	addl2	r2,(r9)+
+	adwc	$0,r3
+	movl	(r8)+,r1
+	jgeq	L1p1
+L1n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	addl2	r10,(r9)+
+	adwc	$0,r11
+
+	jsobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+
+s2_big:	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L2
+	clrl	r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2:	movl	(r8)+,r1
+	jlss	L2n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r1,r3
+	addl2	r2,(r9)+
+	adwc	$0,r3
+L2:	movl	(r8)+,r1
+	jlss	L2n1
+L2p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r1,r11
+	addl2	r10,(r9)+
+	adwc	$0,r11
+
+	jsobgtr	r7,Loop2
+	movl	r11,r0
+	ret
+
+L2n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	addl2	r2,(r9)+
+	adwc	r1,r3
+	movl	(r8)+,r1
+	jgeq	L2p1
+L2n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	addl2	r10,(r9)+
+	adwc	r1,r11
+
+	jsobgtr	r7,Loop2
+	movl	r11,r0
+	ret
diff --git a/mpn/vax/gmp-mparam.h b/mpn/vax/gmp-mparam.h
new file mode 100644
index 000000000..d909cd2a5
--- /dev/null
+++ b/mpn/vax/gmp-mparam.h
@@ -0,0 +1,29 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+#define IEEE_DOUBLE_BIG_ENDIAN 0
diff --git a/mpn/vax/mul_1.s b/mpn/vax/mul_1.s
new file mode 100644
index 000000000..e2ff5a1bc
--- /dev/null
+++ b/mpn/vax/mul_1.s
@@ -0,0 +1,123 @@
+# VAX __mpn_mul_1 -- Multiply a limb vector with a limb and store
+# the result in a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# size		(sp + 12)
+# s2_limb	(sp + 16)
+
+.text
+	.align 1
+.globl ___mpn_mul_1
+___mpn_mul_1:
+	.word	0xfc0
+	movl	12(ap),r4
+	movl	8(ap),r8
+	movl	4(ap),r9
+	movl	16(ap),r6
+	jlss	s2_big
+
+# One might want to combine the addl2 and the store below, but that
+# is actually just slower according to my timing tests.  (VAX 3600)
+
+	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L1
+	clrl	r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1:	movl	(r8)+,r1
+	jlss	L1n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	$0,r3
+	movl	r2,(r9)+
+L1:	movl	(r8)+,r1
+	jlss	L1n1
+L1p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	$0,r11
+	movl	r10,(r9)+
+
+	jsobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+L1n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	movl	r2,(r9)+
+	movl	(r8)+,r1
+	jgeq	L1p1
+L1n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	movl	r10,(r9)+
+
+	jsobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+
+s2_big:	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L2
+	clrl	r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2:	movl	(r8)+,r1
+	jlss	L2n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r1,r3
+	movl	r2,(r9)+
+L2:	movl	(r8)+,r1
+	jlss	L2n1
+L2p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r1,r11
+	movl	r10,(r9)+
+
+	jsobgtr	r7,Loop2
+	movl	r11,r0
+	ret
+
+L2n0:	emul	r1,r6,$0,r2
+	addl2	r1,r3
+	addl2	r11,r2
+	adwc	r6,r3
+	movl	r2,(r9)+
+	movl	(r8)+,r1
+	jgeq	L2p1
+L2n1:	emul	r1,r6,$0,r10
+	addl2	r1,r11
+	addl2	r3,r10
+	adwc	r6,r11
+	movl	r10,(r9)+
+
+	jsobgtr	r7,Loop2
+	movl	r11,r0
+	ret
diff --git a/mpn/vax/sub_n.s b/mpn/vax/sub_n.s
new file mode 100644
index 000000000..a891c4425
--- /dev/null
+++ b/mpn/vax/sub_n.s
@@ -0,0 +1,48 @@
+# VAX __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
+# difference in a third limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# s2_ptr	(sp + 12)
+# size		(sp + 16)
+
+.text
+	.align 1
+.globl ___mpn_sub_n
+___mpn_sub_n:
+	.word	0x0
+	movl	16(ap),r0
+	movl	12(ap),r1
+	movl	8(ap),r2
+	movl	4(ap),r3
+	subl2	r4,r4
+
+Loop:
+	movl	(r2)+,r4
+	sbwc	(r1)+,r4
+	movl	r4,(r3)+
+	jsobgtr	r0,Loop
+
+	adwc	r0,r0
+	ret
diff --git a/mpn/vax/submul_1.s b/mpn/vax/submul_1.s
new file mode 100644
index 000000000..c473937ca
--- /dev/null
+++ b/mpn/vax/submul_1.s
@@ -0,0 +1,126 @@
+# VAX __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+# the result from a second limb vector.
+
+# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+# This file is part of the GNU MP Library.
+
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or (at your
+# option) any later version.
+
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+# License for more details.
+
+# You should have received a copy of the GNU Library General Public License
+# along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+# the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+# MA 02111-1307, USA.
+
+
+# INPUT PARAMETERS
+# res_ptr	(sp + 4)
+# s1_ptr	(sp + 8)
+# size		(sp + 12)
+# s2_limb	(sp + 16)
+
+.text
+	.align 1
+.globl ___mpn_submul_1
+___mpn_submul_1:
+	.word	0xfc0
+	movl	12(ap),r4
+	movl	8(ap),r8
+	movl	4(ap),r9
+	movl	16(ap),r6
+	jlss	s2_big
+
+	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L1
+	clrl	r11
+
+# Loop for S2_LIMB < 0x80000000
+Loop1:	movl	(r8)+,r1
+	jlss	L1n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	$0,r3
+	subl2	r2,(r9)+
+	adwc	$0,r3
+L1:	movl	(r8)+,r1
+	jlss	L1n1
+L1p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	$0,r11
+	subl2	r10,(r9)+
+	adwc	$0,r11
+
+	jsobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+L1n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	subl2	r2,(r9)+
+	adwc	$0,r3
+	movl	(r8)+,r1
+	jgeq	L1p1
+L1n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	subl2	r10,(r9)+
+	adwc	$0,r11
+
+	jsobgtr	r7,Loop1
+	movl	r11,r0
+	ret
+
+
+s2_big:	clrl	r3
+	incl	r4
+	ashl	$-1,r4,r7
+	jlbc	r4,L2
+	clrl	r11
+
+# Loop for S2_LIMB >= 0x80000000
+Loop2:	movl	(r8)+,r1
+	jlss	L2n0
+	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r1,r3
+	subl2	r2,(r9)+
+	adwc	$0,r3
+L2:	movl	(r8)+,r1
+	jlss	L2n1
+L2p1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r1,r11
+	subl2	r10,(r9)+
+	adwc	$0,r11
+
+	jsobgtr	r7,Loop2
+	movl	r11,r0
+	ret
+
+L2n0:	emul	r1,r6,$0,r2
+	addl2	r11,r2
+	adwc	r6,r3
+	subl2	r2,(r9)+
+	adwc	r1,r3
+	movl	(r8)+,r1
+	jgeq	L2p1
+L2n1:	emul	r1,r6,$0,r10
+	addl2	r3,r10
+	adwc	r6,r11
+	subl2	r10,(r9)+
+	adwc	r1,r11
+
+	jsobgtr	r7,Loop2
+	movl	r11,r0
+	ret
diff --git a/mpn/x86/add_n.S b/mpn/x86/add_n.S
new file mode 100644
index 000000000..9c1b13322
--- /dev/null
+++ b/mpn/x86/add_n.S
@@ -0,0 +1,106 @@
+/* i80386 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s2_ptr	(sp + 12)
+  size		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n:)
+	pushl %edi
+	pushl %esi
+
+	movl 12(%esp),%edi		/* res_ptr */
+	movl 16(%esp),%esi		/* s1_ptr */
+	movl 20(%esp),%edx		/* s2_ptr */
+	movl 24(%esp),%ecx		/* size */
+
+	movl	%ecx,%eax
+	shrl	$3,%ecx			/* compute count for unrolled loop */
+	negl	%eax
+	andl	$7,%eax			/* get index where to start loop */
+	jz	Loop			/* necessary special case for 0 */
+	incl	%ecx			/* adjust loop count */
+	shll	$2,%eax			/* adjustment for pointers... */
+	subl	%eax,%edi		/* ... since they are offset ... */
+	subl	%eax,%esi		/* ... by a constant when we ... */
+	subl	%eax,%edx		/* ... enter the loop */
+	shrl	$2,%eax			/* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC.  Due to limitations in some
+   assemblers, Loop-L0-3 cannot be put into the leal */
+	call	L0
+L0:	leal	(%eax,%eax,8),%eax
+	addl	(%esp),%eax
+	addl	$(Loop-L0-3),%eax 
+	addl	$4,%esp
+#else
+/* Calculate start address in loop for non-PIC.  */
+ 	leal	(Loop - 3)(%eax,%eax,8),%eax
+#endif
+	jmp	*%eax			/* jump into loop */
+	ALIGN (3)
+Loop:	movl	(%esi),%eax
+	adcl	(%edx),%eax
+	movl	%eax,(%edi)
+	movl	4(%esi),%eax
+	adcl	4(%edx),%eax
+	movl	%eax,4(%edi)
+	movl	8(%esi),%eax
+	adcl	8(%edx),%eax
+	movl	%eax,8(%edi)
+	movl	12(%esi),%eax
+	adcl	12(%edx),%eax
+	movl	%eax,12(%edi)
+	movl	16(%esi),%eax
+	adcl	16(%edx),%eax
+	movl	%eax,16(%edi)
+	movl	20(%esi),%eax
+	adcl	20(%edx),%eax
+	movl	%eax,20(%edi)
+	movl	24(%esi),%eax
+	adcl	24(%edx),%eax
+	movl	%eax,24(%edi)
+	movl	28(%esi),%eax
+	adcl	28(%edx),%eax
+	movl	%eax,28(%edi)
+	leal	32(%edi),%edi
+	leal	32(%esi),%esi
+	leal	32(%edx),%edx
+	decl	%ecx
+	jnz	Loop
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl %esi
+	popl %edi
+	ret
diff --git a/mpn/x86/addmul_1.S b/mpn/x86/addmul_1.S
new file mode 100644
index 000000000..c11209d92
--- /dev/null
+++ b/mpn/x86/addmul_1.S
@@ -0,0 +1,76 @@
+/* i80386 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+   the result to a second limb vector.
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   size		(sp + 12)
+   s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+Loop:
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+	INSN1(mul,l	,R(s2_limb))
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(add,l	,MEM_INDEX(res_ptr,size,4),R(eax))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(mov,l	,R(ebx),R(edx))
+
+	INSN1(inc,l	,R(size))
+	INSN1(jnz,	,Loop)
+	INSN2(mov,l	,R(eax),R(ebx))
+
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
diff --git a/mpn/x86/gmp-mparam.h b/mpn/x86/gmp-mparam.h
new file mode 100644
index 000000000..d909cd2a5
--- /dev/null
+++ b/mpn/x86/gmp-mparam.h
@@ -0,0 +1,29 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 32
+#define BYTES_PER_MP_LIMB 4
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 32
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
+
+#define IEEE_DOUBLE_BIG_ENDIAN 0
diff --git a/mpn/x86/lshift.S b/mpn/x86/lshift.S
new file mode 100644
index 000000000..8173b92cb
--- /dev/null
+++ b/mpn/x86/lshift.S
@@ -0,0 +1,85 @@
+/* i80386 __mpn_lshift -- 
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s_ptr		(sp + 8)
+  size		(sp + 12)
+  cnt		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_lshift)
+C_SYMBOL_NAME(__mpn_lshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	movl	16(%esp),%edi		/* res_ptr */
+	movl	20(%esp),%esi		/* s_ptr */
+	movl	24(%esp),%edx		/* size */
+	movl	28(%esp),%ecx		/* cnt */
+
+	subl	$4,%esi			/* adjust s_ptr */
+
+	movl	(%esi,%edx,4),%ebx	/* read most significant limb */
+	xorl	%eax,%eax
+	shldl	%cl,%ebx,%eax		/* compute carry limb */
+	decl	%edx
+	jz	Lend
+	pushl	%eax			/* push carry limb onto stack */
+	testb	$1,%edx
+	jnz	L1			/* enter loop in the middle */
+	movl	%ebx,%eax
+
+	ALIGN (3)
+Loop:	movl	(%esi,%edx,4),%ebx	/* load next lower limb */
+	shldl	%cl,%ebx,%eax		/* compute result limb */
+	movl	%eax,(%edi,%edx,4)	/* store it */
+	decl	%edx
+L1:	movl	(%esi,%edx,4),%eax
+	shldl	%cl,%eax,%ebx
+	movl	%ebx,(%edi,%edx,4)
+	decl	%edx
+	jnz	Loop
+
+	shll	%cl,%eax		/* compute least significant limb */
+	movl	%eax,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+Lend:	shll	%cl,%ebx		/* compute least significant limb */
+	movl	%ebx,(%edi)		/* store it */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/mpn/x86/mul_1.S b/mpn/x86/mul_1.S
new file mode 100644
index 000000000..b27139998
--- /dev/null
+++ b/mpn/x86/mul_1.S
@@ -0,0 +1,75 @@
+/* i80386 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+   the result in a second limb vector.
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   size		(sp + 12)
+   s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+Loop:
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+	INSN1(mul,l	,R(s2_limb))
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(eax))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(mov,l	,R(ebx),R(edx))
+
+	INSN1(inc,l	,R(size))
+	INSN1(jnz,	,Loop)
+	INSN2(mov,l	,R(eax),R(ebx))
+
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
diff --git a/mpn/x86/pentium/README b/mpn/x86/pentium/README
new file mode 100644
index 000000000..d73b08268
--- /dev/null
+++ b/mpn/x86/pentium/README
@@ -0,0 +1,26 @@
+This directory contains mpn functions optimized for Intel Pentium
+processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. Pentium doesn't allocate cache lines on writes, unlike most other modern
+processors.  Since the functions in the mpn class do array writes, we have to
+handle allocating the destination cache lines by reading a word from it in the
+loops, to achieve the best performance.
+
+2. Pairing of memory operations requires that the two issued operations refer
+to different cache banks.  The simplest way to insure this is to read/write
+two words from the same object.  If we make operations on different objects,
+they might or might not be to the same cache bank.
+
+STATUS
+
+1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically.
+
+2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb.  Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
+
+3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should...
diff --git a/mpn/x86/pentium/add_n.S b/mpn/x86/pentium/add_n.S
new file mode 100644
index 000000000..ac6f2819b
--- /dev/null
+++ b/mpn/x86/pentium/add_n.S
@@ -0,0 +1,130 @@
+/* Pentium __mpn_add_n -- Add two limb vectors of the same length > 0 and store
+   sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   s2_ptr	(sp + 12)
+   size		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_add_n)
+C_SYMBOL_NAME(__mpn_add_n:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s1_ptr */
+	movl	28(%esp),%ebp		/* s2_ptr */
+	movl	32(%esp),%ecx		/* size */
+
+	movl	(%ebp),%ebx
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	Lend
+	pushl	%edx
+
+	ALIGN (3)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L1:	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	8(%ebp),%ebx
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L2:	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	adcl	%ebx,%eax
+	movl	12(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	16(%ebp),%ebx
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L3:	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	adcl	%ebx,%eax
+	movl	20(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	24(%ebp),%ebx
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L4:	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%ebx,%eax
+	movl	28(%ebp),%ebx
+	adcl	%ebx,%edx
+	movl	32(%ebp),%ebx
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebp),%ebp
+	decl	%ecx
+	jnz	Loop
+
+	popl	%edx
+Lend:
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	Lend2
+	incl	%edx
+Loop2:
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	adcl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebp),%ebp
+	decl	%edx
+	jnz	Loop2
+Lend2:
+	movl	(%esi),%eax
+	adcl	%ebx,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/mpn/x86/pentium/addmul_1.S b/mpn/x86/pentium/addmul_1.S
new file mode 100644
index 000000000..7cfa5db68
--- /dev/null
+++ b/mpn/x86/pentium/addmul_1.S
@@ -0,0 +1,83 @@
+/* Pentium __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+   the result to a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   size		(sp + 12)
+   s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(__mpn_addmul_1)
+C_SYMBOL_NAME(__mpn_addmul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(add,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
diff --git a/mpn/x86/pentium/lshift.S b/mpn/x86/pentium/lshift.S
new file mode 100644
index 000000000..b29898356
--- /dev/null
+++ b/mpn/x86/pentium/lshift.S
@@ -0,0 +1,217 @@
+/* Pentium optimized __mpn_lshift -- 
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s_ptr		(sp + 8)
+  size		(sp + 12)
+  cnt		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_lshift)
+C_SYMBOL_NAME(__mpn_lshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Lnormal
+	leal	4(%esi),%eax
+	cmpl	%edi,%eax
+	jnc	Lspecial		/* jump if s_ptr + 1 >= res_ptr */
+	leal	(%esi,%ebp,4),%eax
+	cmpl	%eax,%edi
+	jnc	Lspecial		/* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+	xorl	%eax,%eax
+	shldl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Lend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Loop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	shldl	%cl,%ebx,%edx
+	shldl	%cl,%eax,%ebx
+	movl	%edx,-8(%edi)
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	shldl	%cl,%edx,%eax
+	shldl	%cl,%ebx,%edx
+	movl	%eax,-16(%edi)
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,-24(%edi)
+	movl	%eax,-28(%edi)
+
+	subl	$32,%esi
+	subl	$32,%edi
+	decl	%ebp
+	jnz	Loop
+
+Lend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Lend2
+Loop2:	movl	(%esi),%eax
+	shldl	%cl,%eax,%edx
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	subl	$4,%esi
+	subl	$4,%edi
+	decl	%ebp
+	jnz	Loop2
+
+Lend2:	shll	%cl,%edx		/* compute least significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+	movl	(%esi),%edx
+	addl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	addl	%edx,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	LLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+LLoop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,(%edi)
+	adcl	%edx,%edx
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	adcl	%ebx,%ebx
+	movl	%edx,8(%edi)
+	adcl	%eax,%eax
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	adcl	%edx,%edx
+	movl	%eax,16(%edi)
+	adcl	%ebx,%ebx
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,24(%edi)
+	adcl	%edx,%edx
+	movl	%eax,28(%edi)
+
+	leal	32(%esi),%esi		/* use leal not to clobber carry */
+	leal	32(%edi),%edi
+	decl	%ebp
+	jnz	LLoop
+
+LLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	LLend2
+	addl	%eax,%eax		/* restore carry from eax */
+LLoop2:	movl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	%edx,%edx
+	movl	%ebx,(%edi)
+
+	leal	4(%esi),%esi		/* use leal not to clobber carry */
+	leal	4(%edi),%edi
+	decl	%ebp
+	jnz	LLoop2
+
+	jmp	LL1
+LLend2:	addl	%eax,%eax		/* restore carry from eax */
+LL1:	movl	%edx,(%edi)		/* store last limb */
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/mpn/x86/pentium/mul_1.S b/mpn/x86/pentium/mul_1.S
new file mode 100644
index 000000000..4ac3050a6
--- /dev/null
+++ b/mpn/x86/pentium/mul_1.S
@@ -0,0 +1,79 @@
+/* Pentium __mpn_mul_1 -- Multiply a limb vector with a limb and store
+   the result in a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   size		(sp + 12)
+   s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(__mpn_mul_1)
+C_SYMBOL_NAME(__mpn_mul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
diff --git a/mpn/x86/pentium/rshift.S b/mpn/x86/pentium/rshift.S
new file mode 100644
index 000000000..38398edb1
--- /dev/null
+++ b/mpn/x86/pentium/rshift.S
@@ -0,0 +1,217 @@
+/* Pentium optimized __mpn_rshift -- 
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s_ptr		(sp + 8)
+  size		(sp + 12)
+  cnt		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_rshift)
+C_SYMBOL_NAME(__mpn_rshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Lnormal
+	leal	4(%edi),%eax
+	cmpl	%esi,%eax
+	jnc	Lspecial		/* jump if res_ptr + 1 >= s_ptr */
+	leal	(%edi,%ebp,4),%eax
+	cmpl	%eax,%esi
+	jnc	Lspecial		/* jump if s_ptr >= res_ptr + size */
+
+Lnormal:
+	movl	(%esi),%edx
+	addl	$4,%esi
+	xorl	%eax,%eax
+	shrdl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Lend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	shrdl	%cl,%eax,%ebx
+	shrdl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	shrdl	%cl,%ebx,%edx
+	shrdl	%cl,%eax,%ebx
+	movl	%edx,8(%edi)
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	shrdl	%cl,%edx,%eax
+	shrdl	%cl,%ebx,%edx
+	movl	%eax,16(%edi)
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	shrdl	%cl,%eax,%ebx
+	shrdl	%cl,%edx,%eax
+	movl	%ebx,24(%edi)
+	movl	%eax,28(%edi)
+
+	addl	$32,%esi
+	addl	$32,%edi
+	decl	%ebp
+	jnz	Loop
+
+Lend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Lend2
+Loop2:	movl	(%esi),%eax
+	shrdl	%cl,%eax,%edx		/* compute result limb */
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	addl	$4,%esi
+	addl	$4,%edi
+	decl	%ebp
+	jnz	Loop2
+
+Lend2:	shrl	%cl,%edx		/* compute most significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	shrl	$1,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	LLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+LLoop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebx,(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	rcrl	$1,%ebx
+	movl	%edx,-8(%edi)
+	rcrl	$1,%eax
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	rcrl	$1,%edx
+	movl	%eax,-16(%edi)
+	rcrl	$1,%ebx
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	rcrl	$1,%eax
+	movl	%ebx,-24(%edi)
+	rcrl	$1,%edx
+	movl	%eax,-28(%edi)
+
+	leal	-32(%esi),%esi		/* use leal not to clobber carry */
+	leal	-32(%edi),%edi
+	decl	%ebp
+	jnz	LLoop
+
+LLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	LLend2
+	addl	%eax,%eax		/* restore carry from eax */
+LLoop2:	movl	%edx,%ebx
+	movl	(%esi),%edx
+	rcrl	$1,%edx
+	movl	%ebx,(%edi)
+
+	leal	-4(%esi),%esi		/* use leal not to clobber carry */
+	leal	-4(%edi),%edi
+	decl	%ebp
+	jnz	LLoop2
+
+	jmp	LL1
+LLend2:	addl	%eax,%eax		/* restore carry from eax */
+LL1:	movl	%edx,(%edi)		/* store last limb */
+
+	movl	$0,%eax
+	rcrl	$1,%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/mpn/x86/pentium/sub_n.S b/mpn/x86/pentium/sub_n.S
new file mode 100644
index 000000000..d1a2bc084
--- /dev/null
+++ b/mpn/x86/pentium/sub_n.S
@@ -0,0 +1,130 @@
+/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
+   and store difference in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   s2_ptr	(sp + 12)
+   size		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s1_ptr */
+	movl	28(%esp),%ebp		/* s2_ptr */
+	movl	32(%esp),%ecx		/* size */
+
+	movl	(%ebp),%ebx
+
+	decl	%ecx
+	movl	%ecx,%edx
+	shrl	$3,%ecx
+	andl	$7,%edx
+	testl	%ecx,%ecx		/* zero carry flag */
+	jz	Lend
+	pushl	%edx
+
+	ALIGN (3)
+Loop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	leal	32(%edi),%edi
+
+L1:	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	8(%ebp),%ebx
+	movl	%eax,-32(%edi)
+	movl	%edx,-28(%edi)
+
+L2:	movl	8(%esi),%eax
+	movl	12(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	12(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	16(%ebp),%ebx
+	movl	%eax,-24(%edi)
+	movl	%edx,-20(%edi)
+
+L3:	movl	16(%esi),%eax
+	movl	20(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	20(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	24(%ebp),%ebx
+	movl	%eax,-16(%edi)
+	movl	%edx,-12(%edi)
+
+L4:	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	sbbl	%ebx,%eax
+	movl	28(%ebp),%ebx
+	sbbl	%ebx,%edx
+	movl	32(%ebp),%ebx
+	movl	%eax,-8(%edi)
+	movl	%edx,-4(%edi)
+
+	leal	32(%esi),%esi
+	leal	32(%ebp),%ebp
+	decl	%ecx
+	jnz	Loop
+
+	popl	%edx
+Lend:
+	decl	%edx			/* test %edx w/o clobbering carry */
+	js	Lend2
+	incl	%edx
+Loop2:
+	leal	4(%edi),%edi
+	movl	(%esi),%eax
+	sbbl	%ebx,%eax
+	movl	4(%ebp),%ebx
+	movl	%eax,-4(%edi)
+	leal	4(%esi),%esi
+	leal	4(%ebp),%ebp
+	decl	%edx
+	jnz	Loop2
+Lend2:
+	movl	(%esi),%eax
+	sbbl	%ebx,%eax
+	movl	%eax,(%edi)
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/mpn/x86/pentium/submul_1.S b/mpn/x86/pentium/submul_1.S
new file mode 100644
index 000000000..adf2d63e6
--- /dev/null
+++ b/mpn/x86/pentium/submul_1.S
@@ -0,0 +1,83 @@
+/* Pentium __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+   the result from a second limb vector.
+
+Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   size		(sp + 12)
+   s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+
+Loop:	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+	INSN1(mul,l	,R(s2_limb))
+
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(mov,l	,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(sub,l	,R(ebx),R(eax))
+
+	INSN2(mov,l	,MEM_INDEX(res_ptr,size,4),R(ebx))
+	INSN1(inc,l	,R(size))
+
+	INSN2(mov,l	,R(ebx),R(edx))
+	INSN1(jnz,	,Loop)
+
+	INSN2(adc,l	,R(ebx),$0)
+	INSN2(mov,l	,R(eax),R(ebx))
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
diff --git a/mpn/x86/rshift.S b/mpn/x86/rshift.S
new file mode 100644
index 000000000..9abbf9a45
--- /dev/null
+++ b/mpn/x86/rshift.S
@@ -0,0 +1,87 @@
+/* i80386 __mpn_rshift -- 
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s_ptr		(sp + 8)
+  size		(sp + 12)
+  cnt		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_rshift)
+C_SYMBOL_NAME(__mpn_rshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	movl	16(%esp),%edi		/* res_ptr */
+	movl	20(%esp),%esi		/* s_ptr */
+	movl	24(%esp),%edx		/* size */
+	movl	28(%esp),%ecx		/* cnt */
+
+	leal	-4(%edi,%edx,4),%edi
+	leal	(%esi,%edx,4),%esi
+	negl	%edx
+
+	movl	(%esi,%edx,4),%ebx	/* read least significant limb */
+	xorl	%eax,%eax
+	shrdl	%cl,%ebx,%eax		/* compute carry limb */
+	incl	%edx
+	jz	Lend
+	pushl	%eax			/* push carry limb onto stack */
+	testb	$1,%edx
+	jnz	L1			/* enter loop in the middle */
+	movl	%ebx,%eax
+
+	ALIGN (3)
+Loop:	movl	(%esi,%edx,4),%ebx	/* load next higher limb */
+	shrdl	%cl,%ebx,%eax		/* compute result limb */
+	movl	%eax,(%edi,%edx,4)	/* store it */
+	incl	%edx
+L1:	movl	(%esi,%edx,4),%eax
+	shrdl	%cl,%eax,%ebx
+	movl	%ebx,(%edi,%edx,4)
+	incl	%edx
+	jnz	Loop
+
+	shrl	%cl,%eax		/* compute most significant limb */
+	movl	%eax,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+Lend:	shrl	%cl,%ebx		/* compute most significant limb */
+	movl	%ebx,(%edi)		/* store it */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
diff --git a/mpn/x86/sub_n.S b/mpn/x86/sub_n.S
new file mode 100644
index 000000000..ec93d1bd5
--- /dev/null
+++ b/mpn/x86/sub_n.S
@@ -0,0 +1,106 @@
+/* i80386 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
+   sum in a third limb vector.
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+  INPUT PARAMETERS
+  res_ptr	(sp + 4)
+  s1_ptr	(sp + 8)
+  s2_ptr	(sp + 12)
+  size		(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(__mpn_sub_n)
+C_SYMBOL_NAME(__mpn_sub_n:)
+	pushl %edi
+	pushl %esi
+
+	movl 12(%esp),%edi		/* res_ptr */
+	movl 16(%esp),%esi		/* s1_ptr */
+	movl 20(%esp),%edx		/* s2_ptr */
+	movl 24(%esp),%ecx		/* size */
+
+	movl	%ecx,%eax
+	shrl	$3,%ecx			/* compute count for unrolled loop */
+	negl	%eax
+	andl	$7,%eax			/* get index where to start loop */
+	jz	Loop			/* necessary special case for 0 */
+	incl	%ecx			/* adjust loop count */
+	shll	$2,%eax			/* adjustment for pointers... */
+	subl	%eax,%edi		/* ... since they are offset ... */
+	subl	%eax,%esi		/* ... by a constant when we ... */
+	subl	%eax,%edx		/* ... enter the loop */
+	shrl	$2,%eax			/* restore previous value */
+#ifdef PIC
+/* Calculate start address in loop for PIC.  Due to limitations in some
+   assemblers, Loop-L0-3 cannot be put into the leal */
+	call	L0
+L0:	leal	(%eax,%eax,8),%eax
+	addl	(%esp),%eax
+	addl	$(Loop-L0-3),%eax 
+	addl	$4,%esp
+#else
+/* Calculate start address in loop for non-PIC.  */
+ 	leal	(Loop - 3)(%eax,%eax,8),%eax
+#endif
+	jmp	*%eax			/* jump into loop */
+	ALIGN (3)
+Loop:	movl	(%esi),%eax
+	sbbl	(%edx),%eax
+	movl	%eax,(%edi)
+	movl	4(%esi),%eax
+	sbbl	4(%edx),%eax
+	movl	%eax,4(%edi)
+	movl	8(%esi),%eax
+	sbbl	8(%edx),%eax
+	movl	%eax,8(%edi)
+	movl	12(%esi),%eax
+	sbbl	12(%edx),%eax
+	movl	%eax,12(%edi)
+	movl	16(%esi),%eax
+	sbbl	16(%edx),%eax
+	movl	%eax,16(%edi)
+	movl	20(%esi),%eax
+	sbbl	20(%edx),%eax
+	movl	%eax,20(%edi)
+	movl	24(%esi),%eax
+	sbbl	24(%edx),%eax
+	movl	%eax,24(%edi)
+	movl	28(%esi),%eax
+	sbbl	28(%edx),%eax
+	movl	%eax,28(%edi)
+	leal	32(%edi),%edi
+	leal	32(%esi),%esi
+	leal	32(%edx),%edx
+	decl	%ecx
+	jnz	Loop
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl %esi
+	popl %edi
+	ret
diff --git a/mpn/x86/submul_1.S b/mpn/x86/submul_1.S
new file mode 100644
index 000000000..730e73204
--- /dev/null
+++ b/mpn/x86/submul_1.S
@@ -0,0 +1,76 @@
+/* i80386 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
+   the result from a second limb vector.
+
+Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/*
+   INPUT PARAMETERS
+   res_ptr	(sp + 4)
+   s1_ptr	(sp + 8)
+   size		(sp + 12)
+   s2_limb	(sp + 16)
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+	TEXT
+	ALIGN (3)
+	GLOBL	C_SYMBOL_NAME(__mpn_submul_1)
+C_SYMBOL_NAME(__mpn_submul_1:)
+
+	INSN1(push,l	,R(edi))
+	INSN1(push,l	,R(esi))
+	INSN1(push,l	,R(ebx))
+	INSN1(push,l	,R(ebp))
+
+	INSN2(mov,l	,R(res_ptr),MEM_DISP(esp,20))
+	INSN2(mov,l	,R(s1_ptr),MEM_DISP(esp,24))
+	INSN2(mov,l	,R(size),MEM_DISP(esp,28))
+	INSN2(mov,l	,R(s2_limb),MEM_DISP(esp,32))
+
+	INSN2(lea,l	,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+	INSN2(lea,l	,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+	INSN1(neg,l	,R(size))
+	INSN2(xor,l	,R(ebx),R(ebx))
+	ALIGN (3)
+Loop:
+	INSN2(mov,l	,R(eax),MEM_INDEX(s1_ptr,size,4))
+	INSN1(mul,l	,R(s2_limb))
+	INSN2(add,l	,R(eax),R(ebx))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(sub,l	,MEM_INDEX(res_ptr,size,4),R(eax))
+	INSN2(adc,l	,R(edx),$0)
+	INSN2(mov,l	,R(ebx),R(edx))
+
+	INSN1(inc,l	,R(size))
+	INSN1(jnz,	,Loop)
+	INSN2(mov,l	,R(eax),R(ebx))
+
+	INSN1(pop,l	,R(ebp))
+	INSN1(pop,l	,R(ebx))
+	INSN1(pop,l	,R(esi))
+	INSN1(pop,l	,R(edi))
+	ret
diff --git a/mpn/x86/syntax.h b/mpn/x86/syntax.h
new file mode 100644
index 000000000..c53c73c03
--- /dev/null
+++ b/mpn/x86/syntax.h
@@ -0,0 +1,62 @@
+/* asm.h -- Definitions for x86 syntax variations.
+
+Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.  */
+
+
+#undef ALIGN
+
+#if defined (BSD_SYNTAX) || defined (ELF_SYNTAX)
+#define R(r) %r
+#define MEM(base)(base)
+#define MEM_DISP(base,displacement)displacement(R(base))
+#define MEM_INDEX(base,index,size)(R(base),R(index),size)
+#ifdef __STDC__
+#define INSN1(mnemonic,size_suffix,dst)mnemonic##size_suffix dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic##size_suffix src,dst
+#else
+#define INSN1(mnemonic,size_suffix,dst)mnemonic/**/size_suffix dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic/**/size_suffix src,dst
+#endif
+#define TEXT .text
+#if defined (BSD_SYNTAX)
+#define ALIGN(log) .align log
+#endif
+#if defined (ELF_SYNTAX)
+#define ALIGN(log) .align 1<<(log)
+#endif
+#define GLOBL .globl
+#endif
+
+#ifdef INTEL_SYNTAX
+#define R(r) r
+#define MEM(base)[base]
+#define MEM_DISP(base,displacement)[base+(displacement)]
+#define MEM_INDEX(base,index,size)[base+index*size]
+#define INSN1(mnemonic,size_suffix,dst)mnemonic dst
+#define INSN2(mnemonic,size_suffix,dst,src)mnemonic dst,src
+#define TEXT .text
+#define ALIGN(log) .align log
+#define GLOBL .globl
+#endif
+
+#ifdef BROKEN_ALIGN
+#undef ALIGN
+#define ALIGN(log) .align log,0x90
+#endif
diff --git a/mpn/z8000/add_n.s b/mpn/z8000/add_n.s
new file mode 100644
index 000000000..a50fc3ef5
--- /dev/null
+++ b/mpn/z8000/add_n.s
@@ -0,0 +1,53 @@
+! Z8000 __mpn_add_n -- Add two limb vectors of equal, non-zero length.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	unseg
+	.text
+	even
+	global ___mpn_add_n
+___mpn_add_n:
+	pop	r0,@r6
+	pop	r1,@r5
+	add	r0,r1
+	ld	@r7,r0
+	dec	r4
+	jr	eq,Lend
+Loop:	pop	r0,@r6
+	pop	r1,@r5
+	adc	r0,r1
+	inc	r7,#2
+	ld	@r7,r0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	adc	r2,r2
+	ret	t
diff --git a/mpn/z8000/gmp-mparam.h b/mpn/z8000/gmp-mparam.h
new file mode 100644
index 000000000..e0a303e97
--- /dev/null
+++ b/mpn/z8000/gmp-mparam.h
@@ -0,0 +1,27 @@
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#define BITS_PER_MP_LIMB 16
+#define BYTES_PER_MP_LIMB 2
+#define BITS_PER_LONGINT 32
+#define BITS_PER_INT 16
+#define BITS_PER_SHORTINT 16
+#define BITS_PER_CHAR 8
diff --git a/mpn/z8000/mul_1.s b/mpn/z8000/mul_1.s
new file mode 100644
index 000000000..f1126b5ab
--- /dev/null
+++ b/mpn/z8000/mul_1.s
@@ -0,0 +1,68 @@
+! Z8000 __mpn_mul_1 -- Multiply a limb vector with a limb and store
+! the result in a second limb vector.
+
+! Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! size		r5
+! s2_limb	r4
+
+	unseg
+	.text
+	even
+	global ___mpn_mul_1
+___mpn_mul_1:
+	sub	r2,r2		! zero carry limb
+	and	r4,r4
+	jr	mi,Lneg
+
+Lpos:	pop	r1,@r6
+	ld	r9,r1
+	mult	rr8,r4
+	and	r1,r1		! shift msb of loaded limb into cy
+	jr	mi,Lp		! branch if loaded limb's msb is set
+	add	r8,r4		! hi_limb += sign_comp2
+Lp:	add	r9,r2		! lo_limb += cy_limb
+	xor	r2,r2
+	adc	r2,r8
+	ld	@r7,r9
+	inc	r7,#2
+	dec	r5
+	jr	ne,Lpos
+	ret t
+
+Lneg:	pop	r1,@r6
+	ld	r9,r1
+	mult	rr8,r4
+	add	r8,r1		! hi_limb += sign_comp1
+	and	r1,r1
+	jr	mi,Ln
+	add	r8,r4		! hi_limb += sign_comp2
+Ln:	add	r9,r2		! lo_limb += cy_limb
+	xor	r2,r2
+	adc	r2,r8
+	ld	@r7,r9
+	inc	r7,#2
+	dec	r5
+	jr	ne,Lneg
+	ret t
diff --git a/mpn/z8000/sub_n.s b/mpn/z8000/sub_n.s
new file mode 100644
index 000000000..272c671bd
--- /dev/null
+++ b/mpn/z8000/sub_n.s
@@ -0,0 +1,54 @@
+! Z8000 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+! store difference in a third limb vector.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	unseg
+	.text
+	even
+	global ___mpn_sub_n
+___mpn_sub_n:
+	pop	r0,@r6
+	pop	r1,@r5
+	sub	r0,r1
+	ld	@r7,r0
+	dec	r4
+	jr	eq,Lend
+Loop:	pop	r0,@r6
+	pop	r1,@r5
+	sbc	r0,r1
+	inc	r7,#2
+	ld	@r7,r0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	adc	r2,r2
+	ret	t
diff --git a/mpn/z8000x/add_n.s b/mpn/z8000x/add_n.s
new file mode 100644
index 000000000..c5c0d4275
--- /dev/null
+++ b/mpn/z8000x/add_n.s
@@ -0,0 +1,56 @@
+! Z8000 (32 bit limb version) __mpn_add_n -- Add two limb vectors of equal,
+! non-zero length.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	segm
+	.text
+	even
+	global ___mpn_add_n
+___mpn_add_n:
+	popl	rr0,@r6
+	popl	rr8,@r5
+	addl	rr0,rr8
+	ldl	@r7,rr0
+	dec	r4
+	jr	eq,Lend
+Loop:	popl	rr0,@r6
+	popl	rr8,@r5
+	adc	r1,r9
+	adc	r0,r8
+	inc	r7,#4
+	ldl	@r7,rr0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	ld	r3,r4
+	adc	r2,r2
+	ret	t
diff --git a/mpn/z8000x/sub_n.s b/mpn/z8000x/sub_n.s
new file mode 100644
index 000000000..9eeece69d
--- /dev/null
+++ b/mpn/z8000x/sub_n.s
@@ -0,0 +1,56 @@
+! Z8000 (32 bit limb version) __mpn_sub_n -- Subtract two limb vectors of the
+! same length > 0 and store difference in a third limb vector.
+
+! Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+! This file is part of the GNU MP Library.
+
+! The GNU MP Library is free software; you can redistribute it and/or modify
+! it under the terms of the GNU Library General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or (at your
+! option) any later version.
+
+! The GNU MP Library is distributed in the hope that it will be useful, but
+! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+! License for more details.
+
+! You should have received a copy of the GNU Library General Public License
+! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+! MA 02111-1307, USA.
+
+
+! INPUT PARAMETERS
+! res_ptr	r7
+! s1_ptr	r6
+! s2_ptr	r5
+! size		r4
+
+! If we are really crazy, we can use push to write a few result words
+! backwards, using push just because it is faster than reg+disp.  We'd
+! then add 2x the number of words written to r7...
+
+	segm
+	.text
+	even
+	global ___mpn_sub_n
+___mpn_sub_n:
+	popl	rr0,@r6
+	popl	rr8,@r5
+	subl	rr0,rr8
+	ldl	@r7,rr0
+	dec	r4
+	jr	eq,Lend
+Loop:	popl	rr0,@r6
+	popl	rr8,@r5
+	sbc	r1,r9
+	sbc	r0,r8
+	inc	r7,#4
+	ldl	@r7,rr0
+	dec	r4
+	jr	ne,Loop
+Lend:	ld	r2,r4		! use 0 already in r4
+	ld	r3,r4
+	adc	r2,r2
+	ret	t
author	tege <tege@gmplib.org>	1996-05-08 09:10:48 +0200
committer	tege <tege@gmplib.org>	1996-05-08 09:10:48 +0200
commit	c6d715868f53b08c62a129ffd77fb585fd89c43b (patch)
tree	82f36d2d8cbe7e07ad3e18d5c6e047e8796d861e /mpn
download	gmp-c6d715868f53b08c62a129ffd77fb585fd89c43b.tar.gz